bs4抓起大众点评的用户评论
生活随笔
收集整理的這篇文章主要介紹了
bs4抓起大众点评的用户评论
小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
為什么80%的碼農(nóng)都做不了架構(gòu)師?>>> ??
抓起大眾點(diǎn)評(píng)的用戶評(píng)論
?#encoding='UTF-8' __author__?=?'Administrator' import?sys,urllib.request sys.path.append('./') import?sql #import?re,time import?time from?bs4?import?BeautifulSoup #------------------------------------------------------ def?Mysqls():return??sql.Mysql('127.0.0.1','root','123456','test_msccms') #------------------------------------------------------ class?dianping:def?__init__(self):self.names=''self.cturl=[]self.ctname=[]self.ctaddr=[]self.users=[]self.datas=[]self.tms=[]def?get_ct_url(self,htmlurl):self.htmlurl=htmlurlheaders?=?('User-Agent','Mozilla/5.0?(Windows?NT?6.1;?WOW64)?AppleWebKit/537.36?(KHTML,?like?Gecko)?Chrome/45.0.2454.93?Safari/537.36')opener?=?urllib.request.build_opener()opener.addheaders?=?[headers]htmlline?=?opener.open(self.htmlurl).read()#page=urllib.request.urlopen(self.htmlurl)#htmlline?=?page.read()#soup=BeautifulSoup(htmlline,"html.parser",from_encoding="UTF-8")soup=BeautifulSoup(htmlline,"html.parser")self.names=soup.span.stringprint('\n店名:',soup.span.string)#獲取餐廳名稱for?i?in??soup.find_all(attrs={"class"?:?"field-name"}):#psoup=BeautifulSoup(str(i),"html.parser")#self.ctname.append(psoup.div.string)try:#必須print打印,否則無(wú)法觸發(fā)異常,導(dǎo)致報(bào)錯(cuò)程序停止print(i)psoup=BeautifulSoup(str(i),"html.parser")self.ctname.append(psoup.div.string)except:self.ctname.append('')pass#print(self.users)#獲取餐廳地址for?i?in??soup.find_all(attrs={"class"?:?"field-addr"}):psoup=BeautifulSoup(str(i),"html.parser")self.ctaddr.append(psoup.div.string)##獲取餐廳URLfor?i?in??soup.find_all(attrs={"target"?:?"_blank"}):psoup=BeautifulSoup(str(i),"html.parser")if?psoup.a.string?==?None:self.cturl.append(psoup.a.attrs['href'])#print(psoup.a.attrs['href'])#print(self.cturl)return?self.cturl,self.ctname,self.ctaddrdef?get_ct_pinlun(self,htmlurl):self.htmlurl=htmlurlpage?=?urllib.request.urlopen(self.htmlurl)htmlline?=?page.read()#soup=BeautifulSoup(htmlline,"html.parser",from_encoding="UTF-8")soup=BeautifulSoup(htmlline,"html.parser")self.names=soup.span.stringprint('\n店名:',soup.span.string)for?i?in??soup.find_all(attrs={"class"?:?"name","rel":"nofollow"}):psoup=BeautifulSoup(str(i),"html.parser")self.users.append(psoup.a.string)#print(self.users)for?i?in?soup.find_all("span",{"class"?:?"time"}):tmsoup=BeautifulSoup(str(i),"html.parser",exclude_encodings="UTF-8").span.string#????#tmsoup.span.stringtmsjoin=''.join(str(tmsoup).split('\xa0\xa0'))self.tms.append(tmsjoin)sps=soup.findAll("p",{"class"?:?"desc"})for?i?in??sps:strs=str(i).split()try:dts=strs[1].split('>')[1:][0].split('<')[0]self.datas.append(dts)except:#print('F',i)continue#print('--',self.datas)return?self.names,self.htmlurl,self.users,self.datas,self.tmsdef?get_ct_info(self,htmlurl):self.htmlurl=htmlurlheaders?=?('User-Agent','Mozilla/5.0?(Windows?NT?6.1;?WOW64)?AppleWebKit/537.36?(KHTML,?like?Gecko)?Chrome/45.0.2454.93?Safari/537.36')opener?=?urllib.request.build_opener()opener.addheaders?=?[headers]htmlline?=?opener.open(self.htmlurl).read()#page?=?urllib.request.urlopen(self.htmlurl)#htmlline?=?page.read()#soup=BeautifulSoup(htmlline,"html.parser",from_encoding="UTF-8")soup=BeautifulSoup(htmlline,"html.parser")#獲取餐廳名稱names=soup.title.string.split('電話')[0]#print('\n店名:',names)#獲取地址addrs=soup.find_all(attrs={"class"?:?"item","itemprop":"street-address"})ap=BeautifulSoup(str(addrs),"html.parser")addrs=ap.span.string.split()[0]#print(ap.span.string.split()[0])#獲取電話phone=soup.find_all(attrs={"class"?:?"item","itemprop":"tel"})pp=BeautifulSoup(str(phone),"html.parser")phones=pp.span.string.split()[0]#print(pp.span.string.split()[0])return?names,phones,addrsdef?run(self,htmlurl):#dianping().get_html_test(htmlurl)#print('--------------------')cturl,ctname,ctaddr=dianping().get_ct_url(htmlurl)#mysql=Mysqls()n=1for?u?in?ctname[1:]:try:print(htmlurl,cturl[n],u,ctaddr[n])names,addrs,phones=dianping().get_ct_info(cturl[n])print(names,addrs,phones)#sqls="insert?into??tongji_user_pinglun?(ctid,ctname,ctarea,source_url,username,content,cttms)?values(%s,'%s','%s','%s','%s','%s','%s');"#mysql.cmd(sqls%(ctid,names,ctarea,htmlurl,u,datas[n],tms[n]))#mysql.commit()except:print('F',u)n=n+1time.sleep(1)#mysql.close()##==============================================================================================================if?__name__?=="__main__":url='http://dpindex.dianping.com/dpindex?type=rank&p='for?i?in?range(1,51):print(url+str(i))dianping().run('http://dpindex.dianping.com/dpindex?type=rank&p=1')#dianping().get_ct_info('http://www.dianping.com/shop/4708533')pass轉(zhuǎn)載于:https://my.oschina.net/jk409/blog/659108
《新程序員》:云原生和全面數(shù)字化實(shí)踐50位技術(shù)專(zhuān)家共同創(chuàng)作,文字、視頻、音頻交互閱讀總結(jié)
以上是生活随笔為你收集整理的bs4抓起大众点评的用户评论的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: 【oracle】TNS-03505: 无
- 下一篇: LVS总结