Python3--批量爬取数据之调用有道api进行翻译
生活随笔
收集整理的這篇文章主要介紹了
Python3--批量爬取数据之调用有道api进行翻译
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
上代碼:
# coding=utf-8 import urllib,urllib.request from fake_useragent import UserAgent import json import time import hashlib import urllib.parse import requests import random import csv,reclass YouDaoFanyi:def __init__(self, appKey, appSecret):self.url = 'https://openapi.youdao.com/api/'self.headers = { 'User-Agent':str(UserAgent().random)}self.appKey = appKey # 應用idself.appSecret = appSecret # 應用密鑰self.langFrom = 'EN' # 翻譯前文字語言,auto為自動檢查self.langTo = 'zh-CHS' # 翻譯后文字語言,auto為自動檢查def getUrlEncodedData(self, queryText):'''將數據url編碼:param queryText: 待翻譯的文字:return: 返回url編碼過的數據'''salt = '2' # 產生隨機數 ,其實固定值也可以,不如"2"sign_str = self.appKey + queryText + salt + self.appSecretsign_str=sign_str.encode('utf-8')sign = hashlib.md5(sign_str).hexdigest()payload = {'q': queryText,'from': self.langFrom,'to': self.langTo,'appKey': self.appKey,'salt': salt,'sign': sign}# 注意是get請求,不是請求data = urllib.parse.urlencode(payload)return datadef parseHtml(self, html):'''解析頁面,輸出翻譯結果:param html: 翻譯返回的頁面內容:return: None'''data = json.loads(html)print ('-------------------------') translationResult = data['translation']if isinstance(translationResult, list):translationResult = translationResult[0]print (translationResult)return translationResultdef translate(self, queryText):data = self.getUrlEncodedData(queryText) # 獲取url編碼過的數據target_url = self.url + '?' + data # 構造目標url# request = urllib2.Request(target_url, headers=self.headers) # 構造請求ip_list=get_ip_list()proxies=get_random_ip(ip_list)print('隨機ip為:'+str(proxies))req = requests.get(target_url,proxies=proxies, headers=self.headers) # 構造請求# with request.urlopen(request) as response111: # 發送請求req.encoding='utf-8'html=req.texttranslationResult=self.parseHtml(html) # 解析,顯示翻譯結果return translationResult#功能:讀取文件并處理 def read_file(filepath):reader=[]with open(filepath,'r',encoding='utf-8') as csvfile:spanreader = csv.reader(csvfile,delimiter='|',quoting=csv.QUOTE_MINIMAL)for row in spanreader:if row:reader.append(row)return reader#功能:將爬取到的內容寫入文件 #注意事項:寫文件時open中要加上newline='',否則寫一行后程序會自動換行 def write_file(filepath,row):with open(filepath,'a+',encoding='utf-8',newline='') as csvfile:spanreader = csv.writer(csvfile,delimiter='|',quoting=csv.QUOTE_MINIMAL)spanreader.writerow(row)#獲取IP列表并檢驗IP的有效性 def get_ip_list():f=open('IP.txt','r')ip_list=f.readlines()f.close()return ip_list#從IP列表中獲取隨機IP def get_random_ip(ip_list):proxy_ip = random.choice(ip_list)proxy_ip=proxy_ip.strip('\n')proxies = {'http': proxy_ip}return proxiesif __name__ == "__main__":print('程序開始運行!')appKey = '應用id' # 應用idappSecret = '應用密鑰' # 應用密鑰fanyi = YouDaoFanyi(appKey, appSecret)reader=read_file('E_baiduBaike_notHaveChinese.csv')for row in reader:print('現在翻譯的人名是:'+row[0])translationResult=fanyi.translate(row[0])print('翻譯結果為:'+str(translationResult))zhPattern = re.compile(u'[\u4e00-\u9fa5]+')if zhPattern.search(translationResult):row[6]=translationResultwrite_file('經有道翻譯處理后的文件/E_baiduBaike_youdaoChinese.csv',row)print('爬取完成')總結
以上是生活随笔為你收集整理的Python3--批量爬取数据之调用有道api进行翻译的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Python--strip()学习记录
- 下一篇: Python3--批量爬取数据之调用百度