一.selenium爬取好友QQ空间日志说说
生活随笔
收集整理的這篇文章主要介紹了
一.selenium爬取好友QQ空间日志说说
小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
#coding:utf-8
from selenium import webdriver
import time,os
import xlrd,xlwt
from xlutils.copy import copy
#使用selenium
#使用selenium的隱藏PhantimJS瀏覽器登陸賬號(hào)后對(duì)內(nèi)容獲取
#注意frame與iframe的格式框切換
#driver = webdriver.PhantomJS(executable_path="E:\\mac\\id\\phantomjs-2.1.1-windows\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe")
driver=webdriver.Chrome()
#driver.set_preference('network.proxy.type', 1)
#driver.set_preference('network.proxy.http', '127.0.0.1')
#driver.set_preference('network.proxy.http_port', 17890)
driver.maximize_window()def get_shuoshuo(qq,path):testexist(path)try:driver.set_page_load_timeout(10)driver.get('http://user.qzone.qq.com/{}/311'.format(qq))time.sleep(3)except:print u'網(wǎng)頁(yè)啟動(dòng)異常,請(qǐng)重新打開(kāi)'
time.sleep(2)driver.quit()try:driver.find_element_by_id('login_div')except:print u"非好友無(wú)法進(jìn)入空間無(wú)權(quán)限抓取內(nèi)容"
driver.quit()else:#登錄QQ空間
driver.switch_to.frame('login_frame')driver.find_element_by_id('switcher_plogin').click()driver.find_element_by_id('u').clear()#選擇用戶(hù)名框
driver.find_element_by_id('u').send_keys('#####') #輸入個(gè)人登錄賬號(hào)driver.find_element_by_id('p').clear()driver.find_element_by_id('p').send_keys('#####') #輸入個(gè)人登錄密碼driver.find_element_by_id('login_button').click()time.sleep(3)driver.implicitly_wait(3)try:driver.find_element_by_id('QM_OwnerInfo_Icon')except:print u'空間加載異常,請(qǐng)重新打開(kāi)'
time.sleep(2)driver.quit()else:driver.switch_to.frame('app_canvas_frame')# last_page=driver.find_element_by_css_selector('.mod_pagenav')
# page_num=re.findall('\d+',last_page.text)[-1]
next_page='page'
page=1
try:while next_page:content = driver.find_elements_by_css_selector('.content')stime = driver.find_elements_by_css_selector('.c_tx.c_tx3.goDetail')for con,sti in zip(content,stime):data = {'time':sti.text,
'shuos':con.text}write_data(data['time'],data['shuos'],path)next_page=driver.find_element_by_link_text(u'下一頁(yè)')page=page+1
print u'正在抓取第%d頁(yè)面內(nèi)容······'%pagenext_page.click()time.sleep(3)driver.implicitly_wait(3)driver.quit()except:print u'抓取到%d頁(yè)面結(jié)束'%pagedriver.quit()def testexist(path):if not os.path.exists(path):w= xlwt.Workbook()w.add_sheet('Sheet1')w.save(path)else:os.remove(path)w= xlwt.Workbook()w.add_sheet('Sheet1')w.save(path)def write_data(data1,data2,path):f=xlrd.open_workbook(path)sheet=f.sheet_by_name('Sheet1')src=copy(f)row=sheet.nrowssrc.get_sheet(0).write(row,0,data1)src.get_sheet(0).write(row,1,data2)src.save(path)if __name__ == '__main__':# work_path=raw_input(u'請(qǐng)輸入存儲(chǔ)數(shù)據(jù)路徑--excle表格類(lèi)型')2571278041
work_path='E:\\0930\\WWWW.csv'
get_shuoshuo('######',work_path)#輸入好友QQ號(hào)
總結(jié)
以上是生活随笔為你收集整理的一.selenium爬取好友QQ空间日志说说的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: 为什么火线魔盒显示服务器繁忙,火线魔盒使
- 下一篇: Device eth0 does not