python 下载图片损坏_爬虫爬取出的图片下载出错,图片文件直接损坏
import requests
import os, sys, stat
from lxml import etree
import time
class HuangMan():
def __init__(self):
#設(shè)置請(qǐng)求頭
self.headers = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
self.url_list = []
self.Hman_url_list = []
self.Hman_name_list = []
def get_url_list(self):
#這里是設(shè)置爬取到第3頁(yè),想要更多的話(huà)請(qǐng)自行更改
url = "http://www.93qoqo.com/artlist/27-%d.html"
for i in range(2,4):
#獲取第2到3頁(yè)的url
self.url_list.append(url % i)
#由于第一頁(yè)網(wǎng)址不同,所以單獨(dú)寫(xiě)一個(gè)獲取函數(shù)
def start_1(self):
url_1 = "http://www.93qoqo.com/artlist/27.html"
response = requests.get(url_1, headers = self.headers).content.decode('utf-8')
html = etree.HTML(response)
result = html.xpath('//ul/li[@class="name"]/a')
for i in range(len(result)):
#獲取result的屬性
shuxing = result[i].attrib
#獲取url
self.Hman_url_list.append("http://www.93qoqo.com" + shuxing.get('href'))
#獲取名字
self.Hman_name_list.append(shuxing.get('title'))
print("正在爬取第1頁(yè)的網(wǎng)站信息")
time.sleep(2)
#從第二頁(yè)開(kāi)始爬取信息
def start(self):
self.start_1()
self.get_url_list()
for url in self.url_list:
response = requests.get(url, headers = self.headers).content.decode('utf-8')
html = etree.HTML(response)
result = html.xpath('//ul/li[@class="name"]/a')
for i in range(len(result)):
#獲取result的屬性
shuxing = result[i].attrib
#獲取url
self.Hman_url_list.append("http://www.93qoqo.com" + shuxing.get('href'))
#獲取名字
self.Hman_name_list.append(result[i].text)
print("正在爬取第%d頁(yè)的網(wǎng)站信息" % (self.url_list.index(url) + 2))
time.sleep(2)
self.wenjian()
self.main()
def wenjian(self):
#創(chuàng)建文件夾
if os.path.exists("photo") == False:
os.makedirs("photo")
os.chmod("photo", stat.S_IWRITE)
os.chdir("photo")
else:
print("文件已經(jīng)存在")
def main(self):
for url in self.Hman_url_list:
response = requests.get(url, headers = self.headers).content.decode('utf-8')
html = etree.HTML(response)
result = html.xpath('//center/div[@class="t_msgfont"]/img')
print(self.Hman_name_list[self.Hman_url_list.index(url)])
for i in range(len(self.Hman_name_list)):
#創(chuàng)建次級(jí)文件夾用于存放圖片
f = os.makedirs(self.Hman_name_list[i])
os.chmod(self.Hman_name_list[i], stat.S_IWRITE)
print("正在創(chuàng)建文件夾")
for e in range(len(result)):
print("下載圖片中。。。請(qǐng)稍等片刻")
name ="%d.jpg" % e
#請(qǐng)求圖片
respomse=requests.get(url,headers=self.headers)
#下載
with open(os.path.join(self.Hman_name_list[i],name),mode='wb') as f:
f.write(respomse.content)
if __name__ == "__main__":
h = HuangMan()
h.start()
總結(jié)
以上是生活随笔為你收集整理的python 下载图片损坏_爬虫爬取出的图片下载出错,图片文件直接损坏的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: date oracle 表中_从 MyS
- 下一篇: oracle修改字段名称 试图删除被依赖