with open('index.html', 'r') as f: Soup = BeautifulSoup(f.read(), 'lxml') #有冒号的得删掉 images = Soup.select('body > div > div > div.col-md-9 > div > div > div > img') titles = Soup.select('body > div > div > div.col-md-9 > div > div > div > div.caption > h4 > a') prices = Soup.select('body > div > div > div.col-md-9 > div > div > div > div.caption > h4.pull-right') #这里直接把星星和数目都统计了,然后用i*2,和i*2+1分别提取 stars = Soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p') """ 优质作业答案: reviews = soup.select( 'body > div > div > div.col-md-9 > div > div > div > div.ratings > p.pull-right') grades_crawler = soup.select( 'body > div > div > div.col-md-9 > div > div > div > div.ratings > p:nth-of-type(2) > span ') # 上一行抓取所有的星星描述 grades = [] # 设置一个空列表 while len(grades_crawler) != 0: # 循环条件长度不为0 e = grades_crawler[0:5] # 提取星星描述前五个元素,也就是一个商品的星级 grades.insert(1, e) # 把这五个商品星级的列表作为一个元素插入grades列表中 del grades_crawler[0:5] # 删除抓取到的描述列表的前五位 这样之后就可以用zip来读取了 另外 c = b.replace('<span class="glyphicon glyphicon-star"></span>', '★') # 将描述实五角星的替换为图案 d = c.replace('<span class="glyphicon glyphicon-star-empty"></span>', '☆') # 将描述虚五角星的替换为图案 star.append(d) # 将转化完的结果逐个插入列表star中 'grade': ''.join(star).replace('[', '').replace(']', '').replace(',', '').replace(' ', '') 这个很炫. join用法示例 >>>li = ['my','name','is','bob'] >>>' '.join(li) 'my name is bob' >>>'_'.join(li) 'my_name_is_bob'
"""
info = []
for i in range(0,len(images)): data = dict(image = images[i].get('src'),title = titles[i].get_text(), price = prices[i].get_text(),starNum = stars[i*2].get_text(), star = len(stars[i*2+1].find_all("span",'glyphicon glyphicon-star'))) info.append(data) #print(data)
with open("1-2.txt","w") as out: for i in info: print(i,file = out)