當前位置：首頁 > 编程语言 > python >内容正文

python

Python微博图

發布時間：2023/12/20 python 24 豆豆

生活随笔收集整理的這篇文章主要介紹了 Python微博图小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

# 生成隨機數 import random # 模仿瀏覽器的requests import urllib.request # 字符串轉換 import json # 政策表達式庫 import re # 請求庫 import requests # 時間庫 import time # 輸入目標微博數字id id=(input("請輸入要抓的微博uid:")) # 名字初始化 na='a' # IP代理池 iplist=['112.228.161.57:8118','125.126.164.21:34592','122.72.18.35:80','163.125.151.124:9999','114.250.25.19:80'] # 代理服務器，包含上面IP池 proxy_addr="163.125.151.124:9999"# 用代理服務器偽裝自己 def use_proxy(url,proxy_addr):# 訪問目標網址，借用urllib.requests.Requests偽裝req=urllib.request.Request(url)# 頭部信息req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0")# 代理服務器設置，后為隨機選擇IP池中的代理IPproxy=urllib.request.ProxyHandler({'http':random.choice(iplist)})# 解決代理代理服務器的cookie等問題，參數前為代理服務器，后為處理opener=urllib.request.build_opener(proxy,urllib.request.HTTPHandler)# 安裝代理服務器urllib.request.install_opener(opener)# 代理服務器的編碼賦值data=urllib.request.urlopen(req).read().decode('utf-8','ignore')# 返回，至此代理服務器的設置完成return datadef get_containerid(url):# 使用代理服務器參數分別為網址，隨機選擇IP url,random,choice(iplist)data=use_proxy(url,random.choice(iplist))# 字符格式轉換json化# json.loads(data).get('data')其中get為get其中的json格式的datacontent=json.loads(data).get('data')# 解析網址找到圖片文件所在處for data in content.get('tabsInfo').get('tabs'):if(data.get('tab_type')=='weibo'):containerid=data.get('containerid')return containerid# 獲取微博目標基本信息 def get_userInfo(id):# 微博目標用戶頭像Photourl='https://m.weibo.cn/api/container/getIndex?type=uid&value='+id# 再次使用代理服務器data=use_proxy(url,random.choice(iplist))# 再次json化數據并賦值給contentcontent=json.loads(data).get('data')# 頭像profile_image_url=content.get('userInfo').get('profile_image_url')description=content.get('userInfo').get('description')profile_url=content.get('userInfo').get('profile_url')verified=content.get('userInfo').get('verified')guanzhu=content.get('userInfo').get('follow_count')name=content.get('userInfo').get('screen_name')na=namefensi=content.get('userInfo').get('followers_count')gender=content.get('userInfo').get('gender')urank=content.get('userInfo').get('urank')if (gender=="f"):gender="女"else:gender="男"print("微博昵稱："+name+"\n"+"微博主頁地址："+profile_url+"\n"+"微博頭像地址："+profile_image_url+"\n"+"是否認證："+str(verified)+"\n"+"微博說明："+description+"\n"+"關注人數："+str(guanzhu)+"\n"+"粉絲數："+str(fensi)+"\n"+"性別："+gender+"\n"+"微博等級："+str(urank)+"\n")def get_weibo(id,file):# 初始化循環值，i為頁i=1# 定義保存目錄Directory = './111programmer/'# while true 為循環核心邏輯while True:# 目標對象的URLurl='https://m.weibo.cn/api/container/getIndex?type=uid&value='+id# 目標對象相冊weibo_url='https://m.weibo.cn/api/container/getIndex?type=uid&value='+id+'&containerid='+get_containerid(url)+'&page='+str(i)try:data=use_proxy(weibo_url,random.choice(iplist))content=json.loads(data).get('data')cards=content.get('cards')if(len(cards)>0):# range 范圍，在cards長度范圍內進行搜索for j in range(len(cards)):# i為頁，j為條print("-----正在爬取第"+str(i)+"頁，第"+str(j)+"條微博------")card_type=cards[j].get('card_type')# 9為轉發微博目前不知道if(card_type==9):mblog=cards[j].get('mblog')# 微博card# print(mblog)# 找到轉發微博tag#print(str(mblog).find("轉發微博"))# 根據轉發量判斷熱門微博？個人認為沒有用，注釋掉# if str(mblog).find('retweeted_status') == -1:# 因取消上一個if判斷所以這里操作了縮進# -1為異常狀態，意為圖片不存在if str(mblog).find('original_pic') !=-1:# re.findall(正則)找到并通過正則找到符合條件的圖片img_url=re.findall(r"'url': '(.+?)'", str(mblog))##pics(.+?)# 圖片數字的countn = 1# 時間的處理，str字符串化timename = str(time.time())# 后代替前wordtimename = timename.replace('.', '')# 表示最后一個到第七個，也可以說是第七個到最后一個timename = timename[7:]#利用時間作為獨特的名稱# 循環目標圖片網址for url in img_url:print('第' + str(n) + ' 張', end='')# with多少有點強制執行的概念，如果with后的open執行失敗，那么就會使用as后的寫入# wb為二進制方式打開with open(Directory + timename+url[-5:], 'wb') as f:f.write(requests.get(url).content)print('...OK!')n = n + 1# 不是特別精明的延遲算法if( n%3==0 ): ##延遲爬取，防止截流time.sleep(3)# 點贊數attitudes_count=mblog.get('attitudes_count')# 評論數comments_count=mblog.get('comments_count')# 發布此條微博時間created_at=mblog.get('created_at')# 轉發數reposts_count=mblog.get('reposts_count')# 此條微博地址scheme=cards[j].get('scheme')# 此條微博內容text=mblog.get('text')# 存入TXT文件with open(file,'a',encoding='utf-8') as fh:fh.write("----第"+str(i)+"頁，第"+str(j)+"條微博----"+"\n")fh.wrnameite("微博地址："+str(scheme)+"\n"+"發布時間："+str(created_at)+"\n"+"微博內容："+text+"\n"+"點贊數："+str(attitudes_count)+"\n"+"評論數："+str(comments_count)+"\n"+"轉發數："+str(reposts_count)+"\n")i+=1else:break# 報出錯誤并pass跳過except Exception as e:print(e)passif __name__=="__main__":file='./111/'+id+".txt"get_userInfo(id)get_weibo(id,file)

總結

以上是生活随笔為你收集整理的Python微博图的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：计算机显示找不到gpedit,Win7系
下一篇： ubuntu 18.04 英伟达显卡驱动

3atv精品不卡视频,97人人超碰国产精品最新,中文字幕av一区二区三区人妻少妇,久久久精品波多野结衣,日韩一区二区三区精品

python

Python微博图

總結