PYTHON爬取66影视的电影下载链接,有搜索功能
生活随笔
收集整理的這篇文章主要介紹了
PYTHON爬取66影视的电影下载链接,有搜索功能
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
本片代碼亮點在于使用BeautifulSoup的select功能,可以直接根據(jù)數(shù)據(jù)在html頁面中的層級標(biāo)簽來獲取數(shù)據(jù)。
# -*- coding=gb18030 -*-__author__ = 'vincent'import sys import urllib2 import urllib import cookielib from bs4 import BeautifulSoupclass Spider66ys:headers = Nonehome_url = Nonedef __init__(self):self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 5.1; rv:50.0) Gecko/20100101 Firefox/50.0'}self.home_url = "http://www.66ys.tv"# 獲取網(wǎng)頁信息def get_html(self, url):print "正在獲取網(wǎng)頁[", url, "]的信息..."if len(url) == 0:print "Input url is null!"sys.exit(0)request = urllib2.Request(url, headers=self.headers)response = urllib2.urlopen(request)html = response.read()# print "獲取首頁信息(", url, ")完畢."return html# 在電影頁面下獲取電影的下載鏈接def get_download_url(self, film):print "開始從網(wǎng)頁[", film[0], "]中獲取電影[", film[1], "]的下載鏈接..."html = self.get_html(film[0])# fp = open("film.html", "w")# fp.write(html)# fp.close() soup = BeautifulSoup(html, "lxml", from_encoding="gb18030")# print soup.prettify()results = soup.select("html > body > div.wrap > div.mainleft \> div.contentinfo > div#text > table > tbody > tr > td > a")for result in results:film.append(result['href'])# 獲取最新更新電影def get_new_update(self):new_film_list = []print "正在獲取[", self.home_url, "]更新電影..."html = self.get_html(self.home_url)# fp = open("66ys.html", "w")# fp.write(html)# fp.close() soup = BeautifulSoup(html, "lxml", from_encoding="gb18030")results = soup.select("html > body > div.wrap > div.tnlist > ul > li > a")for result in results:film = []film.append(result['href'])film.append(result.getText().encode('gb18030').strip())self.get_download_url(film)new_film_list.append(film)return new_film_list# 根據(jù)關(guān)鍵字在66影視上搜索電影def search_film(self, content):search_film_list = []search_url = self.home_url + "/e/search/index.php"print "開始搜索電影[", content, "]..."# print search_urlpostDict = {"keyboard": content,"show": "title,smalltext","submit": "","tbname": "Article","tempid": "1"}postData = urllib.urlencode(postDict)# print postDatacookie_jar = cookielib.LWPCookieJar()opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie_jar))urllib2.install_opener(opener)request = urllib2.Request(search_url, postData, headers=self.headers)response = urllib2.urlopen(request)opener.open(request)html = response.read()# fp = open("search.html", "w")# fp.write(html)# fp.close()# print contentsoup = BeautifulSoup(html, "lxml", from_encoding="gb18030")results = soup.select("html > body > table.tableborder > tr > td > div > b")if len(results) == 1:print "沒有搜索到相關(guān)的內(nèi)容"return Noneresults = soup.select("html > body > div > div.wrap > div.mainleft > div.channellist > div.listBox > ul > li \div.listInfo > h3 > a")# print resultsfor result in results:film = []film.append(result['href'])film.append(result.getText().encode('gb18030').strip())self.get_download_url(film)search_film_list.append(film)print "共搜索到[", len(results), "]部電影。"return search_film_listif __name__ == "__main__":spider = Spider66ys()# new_film_list = spider.get_new_update()# for film in new_film_list:# for info in film:# print info, "\t"# print ""content = "冰與火之歌"search_film_list = spider.search_film(content)for film in search_film_list:print film[1], ":"for info in film[2:]:print infoprint "-"*200?
轉(zhuǎn)載于:https://www.cnblogs.com/stupid-vincent/p/6279794.html
總結(jié)
以上是生活随笔為你收集整理的PYTHON爬取66影视的电影下载链接,有搜索功能的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 特洛伊全面战争墨涅拉俄斯什么战车
- 下一篇: polimerized&nbsp