Scrapy爬取妹子图保存到不同目录下
生活随笔
收集整理的這篇文章主要介紹了
Scrapy爬取妹子图保存到不同目录下
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
?
進行設置settings
#啟動圖片管道 ITEM_PIPELINES = {'mztu.pipelines.ImagesPipelinse': 300, } #設置默認目錄地址 注意下載圖片的話默認地址必須設置!!! IMAGES_STORE = "E:\study\Python\scrapy\mztu\imges" #設置圖片通道失效時間 IMAGES_EXPIRES =90 #縮略圖生成 #IMAGES_THUMBS = {# 'small': (50, 50), # 'big': (270, 270), #}spider目錄
# -*- coding: utf-8 -*- import scrapy from mztu.items import MztuItemclass ZimdgSpider(scrapy.Spider):name = 'zimdg'allowed_domains = ['mzitu.com']#生成鏈接列表start_urls = ['http://www.mzitu.com/xinggan/page/{}/'.format(str(x)) for x in range(118)]def parse(self, response):#解析出鏈接set_li = response.xpath("//div[@class='postlist']/ul/li")for ecth in set_li:ed = ecth.xpath('./a/@href').extract()#進行二次分類解析yield scrapy.Request(ed[0],callback=self.parse_item)def parse_item(self,response):itme = MztuItem()# 獲取頁數鏈接進行訪問offset = int(response.xpath('//div[@class="pagenavi"]/a/span/text()')[4].extract())#生成鏈接訪問#遍歷鏈接訪問for i in [response.url+"/{}".format(str(x)) for x in range(1,offset+1)]:itme['Referer']=i#將meta傳入鏈接yield scrapy.Request(itme['Referer'],meta={'meta_1':itme}, callback=self.parse_ponse)# for i in url:def parse_ponse(self,response):#獲取itme資源itme = response.meta['meta_1']#獲取圖片地址imgs = response.xpath('//div[@class="main-image"]/p/a/img/@src')[0].extract()#獲取圖片目錄title = response.xpath('//div[@class="main-image"]/p/a/img/@alt')[0].extract()itme["title"]= titleitme["imge_url"]= imgs#itme["nickname"] = itme["Referer"][itme["Referer"].rfind("/"):]+itme["imge_url"][itme["imge_url"].rfind('/')+1:itme["imge_url"].rfind('.')]#itme["nickname"] = itme["imge_url"][itme["imge_url"].rfind('/')+1:itme["imge_url"].rfind('.')]yield itmeitems
import scrapyclass MztuItem(scrapy.Item):#目錄title = scrapy.Field()#圖片地址imge_url = scrapy.Field()#請求頭Referer = scrapy.Field()image_Path = scrapy.Field()#圖片名稱# nickname = scrapy.Field()pipelines管道
# -*- coding: utf-8 -*-# Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html # 導入這個包為了移動文件 import shutil #此包不解釋 import scrapy # 導入項目設置 from scrapy.utils.project import get_project_settings # 導入scrapy框架的圖片下載類 from scrapy.pipelines.images import ImagesPipeline #此包不解釋 import osclass ImagesPipelinse(ImagesPipeline):#def process_item(self, item, spider):# return item# 獲取settings文件里設置的變量值IMAGES_STORE = get_project_settings().get("IMAGES_STORE")# 重寫ImagesPipeline類的此方法# 發送圖片下載請求def get_media_requests(self, item, info):image_url = item["imge_url"]#headers是請求頭主要是防反爬蟲yield scrapy.Request(image_url,headers={'Referer':item['Referer']})def item_completed(self, result, item, info):image_path = [x["path"] for ok, x in result if ok]# 定義分類保存的路徑img_path = "%s\%s" % (self.IMAGES_STORE, item['title'])# 目錄不存在則創建目錄if os.path.exists(img_path) == False:os.mkdir(img_path)# 將文件從默認下路路徑移動到指定路徑下shutil.move(self.IMAGES_STORE + "\\" +image_path[0], img_path + "\\" +image_path[0][image_path[0].find("full\\")+6:])item['image_Path'] = img_path + "\\" + image_path[0][image_path[0].find("full\\")+6:]return item這里實現圖片保存到不同的目錄下,主要函數是shutil.move(),將圖片從原始默認路徑移動到指定目錄下
轉載于:https://www.cnblogs.com/contiune/p/9384973.html
總結
以上是生活随笔為你收集整理的Scrapy爬取妹子图保存到不同目录下的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: SpringBoot | 第十五章:基于
- 下一篇: 【NLP_Stanford课堂】正则表达