python 多线程下载_Python3 多线程下载代码
'''Created on 2014-10-24
@author: Maple'''
importsysimportosimporttimeimportgetoptimporturllib.requestimporturllib.parsefrom threading importThread#===============================================================================#def download(url, output=os.getcwd(), blocks=6, proxies=local_proxies)#output:輸出文件路徑,默認(rèn)為當(dāng)前路徑#blocks:線程數(shù)#proxies:代理地址#===============================================================================
local_proxies= {}#代理地址
classMaple(Thread):
version= "Mozilla/5.0"
def __init__(self, threadname, url, filename, ranges=0, proxies={}):
Thread.__init__(self, name=threadname)
self.name=threadname
self.url=url
self.proxies=proxies
self.filename=filename
self.ranges=ranges
self.downloaded=0defrun(self):try:
self.downloaded= os.path.getsize( self.filename ) #獲取已下載的文件字節(jié)塊塊,支持?jǐn)帱c(diǎn)續(xù)傳
exceptOSError:#print 'never downloaded'
self.downloaded =0
opener=GetUrlOpener(self.proxies) #根據(jù)代理參數(shù)生成相應(yīng)的url opener
if self.ranges: #ranges為線程需要下載的文件塊的字節(jié)范圍
#rebuild start poind
self.startpoint = self.ranges[0] + self.downloaded #從已下載字節(jié)塊后的位置開(kāi)始下載
#This part is completed
if self.startpoint >= self.ranges[1]:
self.downloaded= self.ranges[1] -self.ranges[0]print ('Part %s has been downloaded over.' %self.filename)returnopener.addheaders=[('Range','bytes={}-{}'.format(self.startpoint, self.ranges[1])),('User-agent','Mozilla/5.0')] #添加請(qǐng)求頭部?jī)?nèi)容,僅下載指定范圍的字節(jié),偽裝成瀏覽器請(qǐng)求
print ('task %s will download from %d to %d' % (self.name, self.startpoint+1, self.ranges[1]+1))else: #ranges未指定(文件大小未知,無(wú)法切割),從已下載字節(jié)塊后的位置開(kāi)始下載剩余全部字節(jié)
self.startpoint =self.downloaded
opener.addheaders=[('Range','bytes={}-'.format(self.startpoint)),('User-agent','Mozilla/5.0')]
self.fetchsize= 16384 #每次讀取的字節(jié)數(shù)
self.urlhandle = opener.open(self.url) #打開(kāi)文件地址
data =self.urlhandle.read( self.fetchsize )while data: #循環(huán)讀取數(shù)據(jù)寫(xiě)入臨時(shí)文件,并更新已下載字節(jié)數(shù)
filehandle = open( self.filename, 'ab+')
filehandle.write( data )
filehandle.close()
self.downloaded+=len( data )
data=self.urlhandle.read( self.fetchsize )def Sec2Time(second): #將秒數(shù)轉(zhuǎn)換為標(biāo)準(zhǔn)時(shí)間格式。以為有現(xiàn)成的函數(shù),結(jié)果愣是沒(méi)找到
day=second//(3600*24)
second-=day*3600*24hour=second//3600second-=hour*3600minute=second//60second-=minute*60
if day ==0:if hour ==0:if minute ==0:return '{:0.2f}S.'.format(second)else:return '{:02}M:{:0.2f}S'.format(minute,second)else:return '{:02}H:{:02}M:{:0.2f}S'.format(hour,minute,second)else:return '{:03}D:{:02}H:{:02}M:{:0.2f}S'.format(day,hour,minute,second)def GetUrlOpener(proxies={}): #分析代理參數(shù),返回url opener。完整代理格式:user/passwd@http://127.0.0.1:8087。如格式不同,需要修改此分析函數(shù)
ifproxies:try:
ap=proxies.split('@')if len(ap) > 1:
auth=ap[0]
addr=ap[1]else:
addr=ap[0]
auth=''
if '://' inaddr:
ptype=addr[:addr.find('://')]
phost=addr[addr.find('://')+3:]else:
ptype='http'phost=addr
proxy={ptype:ptype+'://'+phost}
proxy_handler=urllib.request.ProxyHandler(proxy)exceptException as ex:print(ex)returnurllib.request.build_opener()try:
authlist=auth.split('/')if len(authlist) > 1:
user=authlist[0]
passwd=authlist[1]
proxy_auth_handler=urllib.request.ProxyBasicAuthHandler()
proxy_auth_handler.add_password('realm',phost,user,passwd)
opener=urllib.request.build_opener(proxy_handler,proxy_auth_handler)else:
opener=urllib.request.build_opener(proxy_handler)returnopenerexceptException as ex:print(ex)returnurllib.request.build_opener(proxy_handler)else:#urlHandler=urllib.request.urlopen(url)
returnurllib.request.build_opener()def GetUrlFileInfo(url,proxies={}): #獲取要下載的文件的信息,包括文件名,文件類(lèi)型和文件大小
scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url) #分析url
filename=urllib.parse.unquote(path) #如果url中的文件名部分存在中文,將其正確解碼出來(lái)
filename=filename.split('/')[-1]
opener=GetUrlOpener(proxies) #通過(guò)網(wǎng)絡(luò)請(qǐng)求讀取響應(yīng)頭部,根據(jù)頭部獲取文件信息。文件名以服務(wù)器返回的文件名信息為準(zhǔn)
urlHandler=opener.open(url)
headers=urlHandler.info()if 'Content-Disposition' in headers: #Content-Disposition字段有可能獲取到文件名,不過(guò)可能是亂碼,沒(méi)找到解決辦法
disposition=headers.get('Content-Disposition')if 'filename=' indisposition:
filename= disposition.split('filename=')[1]if filename[0] == '"' or filename[0] == "'":
filename= filename[1:-1]
filename=urllib.parse.unquote(filename)iffilename:
(name,ext)=os.path.splitext(filename)else:
(name,ext)=('Unknown','')if 'Content-Length' in headers: #獲取文件長(zhǎng)度,如果獲取失敗,則只能使用單線程下載
length=int(headers.get('Content-Length'))else:
length=-1(type, kind)=headers.get('Content-Type').split('/') #獲取文件類(lèi)型,備用
infos=[(name,ext),(type,kind),length]returninfosdef SpliteBlocks(totalsize, blocknumber): #根據(jù)指定的線程數(shù)參數(shù)和獲取到的文件長(zhǎng)度劃分各線程的下載范圍
blocksize = totalsize//blocknumber
ranges=[]for i in range(0, blocknumber-1):
ranges.append((i*blocksize, i*blocksize +blocksize - 1))
ranges.append(( blocksize*(blocknumber-1), totalsize -1))returnrangesdef islive(tasks): #檢查各線程是否全部下載完成
for task intasks:iftask.isAlive():returnTruereturnFalsedef download(url, target=os.getcwd(), blocks=6, proxies=local_proxies):
flag=Trueprint('Retrieving resource information...')
url=urllib.parse.quote(url,safe='/%&@=+?$;,:') #將提供的url編碼,非英文字符將被編碼為標(biāo)準(zhǔn)格式
try:
infos=GetUrlFileInfo(url,proxies) #獲取文件信息
exceptException as ex:print(ex)
flag=Falseifflag:if notos.path.exists(target):
os.makedirs(target)
size=infos[2] #獲取到的文件大小
output=os.path.join(target,''.join(infos[0])) #根據(jù)獲取到的文件名和指定的保存目錄生成完整路徑
type=infos[1][0]
starttime=time.time() #開(kāi)始計(jì)時(shí)
print('Infomation:')print('FileName:{0} FileType:{1} FileLength:{2}'.format(''.join(infos[0]),'/'.join(infos[1]),infos[2] if int(infos[2]) > 0 else 'Unknown')) #打印獲取到的文件信息
if size > 0: #size大于0表示成功獲取文件長(zhǎng)度,可以進(jìn)行多線程下載
print('Starting multithread download...')
ranges=SpliteBlocks( size, blocks )else: #只能單線程下載,線程數(shù)置1,ranges置空,
print('Starting single thread download...')
ranges=()
blocks=1threadname= [ infos[0][0]+"_thread_%d" % i for i in range(0, blocks) ] #生成線程名
filename = [ infos[0][0]+ "_tmpfile_%d" % i for i in range(0, blocks) ] #生成各線程的臨時(shí)文件名
tasks =[]for i in range(0,blocks): #生成下載線程,設(shè)置為后臺(tái)線程后啟動(dòng),將線程加入到線程列表中
task = Maple( threadname[i], url, filename[i], ranges[i] if ranges elseranges,proxies)
task.setDaemon( True )
task.start()
tasks.append( task )
time.sleep(1)
downloaded=0while islive(tasks): #統(tǒng)計(jì)線程列表中各線程的狀態(tài),輸出下載進(jìn)度
downloaded = sum( [task.downloaded for task intasks] )if size >0:
process= downloaded/float(size)*100show= '\rFilesize:%d Downloaded:%d Completed:%.2f%%' %(size, downloaded, process)else:
show= '\rDownloaded:%d' %downloaded
sys.stdout.write(show)
sys.stdout.flush()
time.sleep(0.2)
endtime=time.time() #下載完成后停止計(jì)時(shí)
consuming=Sec2Time(endtime-starttime)if size > 0: #多線程下載的后續(xù)處理
downloadsize =0for i infilename:
downloadsize+=os.path.getsize(i)if downloadsize ==size:
show= '\rFilesize:%d Downloaded:%d Completed:%.2f%%\n' % (size, downloadsize,100)else:
show= '\nSize is not mathed!\n'flag=Falseelse: #單線程下載的后續(xù)處理
show = '\nTotal Size: %d\n'%downloaded
sys.stdout.write(show)
sys.stdout.flush()if flag: #確認(rèn)下載的臨時(shí)文件沒(méi)問(wèn)題后將各文件整合為最終的目標(biāo)文件
print('Integrating files...')
num=1
while os.path.exists(output): #防止與本地已存在文件重名
fname,fext=os.path.splitext(output)if '('+str(num-1)+')'+fext inoutput:
output= output.replace('('+str(num-1)+')'+fext,'('+str(num)+')'+fext)else:
fname+= '('+str(num)+')'output= fname+fext
num+=1
if len(filename) ==1 : #單線程下載的話,直接將下載的文件重命名為目標(biāo)文件即可
os.rename(filename[0], output)else: #多線程臨時(shí)文件整合
filehandle = open( output, 'wb+')for i infilename:try:
f= open( i, 'rb')
filehandle.write( f.read() )
f.close()
os.remove(i)exceptException as ex:print(ex)
filehandle.close()ifos.path.exists(output):print('Download Complete!')else:print('Failed to generate target file!')try:#os.remove(output)
pass
except:pass
else:for i infilename:try:
os.remove(i)pass
except:pass
print('Download Failed!')pass
print('Consuming: {}\n'.format(consuming)) #輸出耗時(shí)
else:print('Failed to retrieve resource information!')
sys.exit()def main(argv): #處理傳入?yún)?shù),使用了getopt模塊,另外有一個(gè)更強(qiáng)大的處理傳入?yún)?shù)的模塊optparse
try:
options,args=getopt.getopt(argv,'hu:f:n:p:',['help','url=','target=','num=','proxy='])exceptException as ex:print(ex)
sys.exit()
num= 2url,target,proxies= '','',''url= 'http://www.pygtk.org/dist/pygtk2-tut.pdf'target= '/home/maple/Desktop'
#proxies = 'http://127.0.0.1:8087'
#proxies={}
for name, value inoptions:if name in ('-h','--help'):print('No Help ^^')
sys.exit()if name in ('-u','--url'):
url=valueif name in ('-t','--target'):
target=valueif name in ('-n','--num'):
num=int(value)if name in ('-p','--proxy'):
proxies=value#check args
download(url,target,num,proxies)if __name__ == '__main__':
main(sys.argv[1:])
總結(jié)
以上是生活随笔為你收集整理的python 多线程下载_Python3 多线程下载代码的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: scau 9502 ARDF一个变量的问
- 下一篇: MATLAB求实数绝对值——abs