#!/usr/bin/env python# Version = 3.5.2# __auth__ = '无名小妖'import requestsfrom bs4 import BeautifulSoupimport uuidresponse = requests.get( url='http://www.autohome.com.cn/news/')response.encoding = response.apparent_encoding # 使用原页面的编码进行解析# response.status_code 状态码# 将页面字符串转化成bs对象,features 是转化方式,此处用的html.parser,而生产中用的是lxml,性能较好soup = BeautifulSoup(response.text, features='html.parser')# 获取id为'auto-channel-lazyload-article' 的标签target = soup.find(id='auto-channel-lazyload-article')# 在此标签下找到所有的li标签li = target.find_all('li')# 获取每个li标签下的a标签for i in li: a = i.find('a') if a: # print(a.attrs.get('href')) txt = a.find('h3').text # 获取图片的地址 img_url = a.find('img').attrs.get('src') if not img_url.startswith("http:"): img_url = "http:" + img_url # 下载图片 img_response = requests.get(url=img_url) h = img_url.split('.') jpg_name = '{}.{}'.format(uuid.uuid4(), h[-1]) with open(jpg_name, 'wb') as f: f.write(img_response.content)