python LAADS+Selenium下载MODIS数据
from selenium import webdriver
from time import sleep
import tempfile
import os,sys
import pandas as pd
import geopandas as gpd
import time
#?構(gòu)建查詢地址
def GetURL(ProductID,StartTime,EndTime,search_file):
?????#?查詢邊界
?????data = gpd.GeoDataFrame.from_file(search_file)
?????bbox = (data.bounds.values)[0].tolist()
?????#?研究區(qū)范圍,左上角和右下角。根據(jù)需要構(gòu)造字符串
?????Area = str(round(bbox[0],1))+','+str(round(bbox[3],1))+','+str(round(bbox[2],1))+','+str(round(bbox[1],1))
?????#?輸入?MODIS?軌道矢量
?????modis_grid_file = 'E:\***\modis_WGS84_grid_world.shp'
?????modis_grid = gpd.GeoDataFrame.from_file(modis_grid_file)
?????#?查詢邊界覆蓋的軌道中心坐標(biāo)
?????modis_intersection = modis_grid[modis_grid.intersects(data.geometry[0])]
?????path_row = 'Tile:'
?????for mv in modis_intersection.values:
?????????path_row += "H"+str(mv[1])+"V"+str(mv[2])+","
?????#?根據(jù)以上信息構(gòu)建?Search?頁的網(wǎng)址
?????path_row = path_row[0:-1]
url='
?????return url
#?使用?Selenium?查詢影像
def SearchFileList(url):
?????#?創(chuàng)建文件夾,命名規(guī)則為程序運(yùn)行的時(shí)刻
?????#?將使用?selenium?下載的文件使用該文件夾存儲(chǔ)
?????csvdir = 'E:\\***\\' + str(time.time()).replace('.','')
?????os.mkdir(csvdir)
?????#?配置?selenium?的參數(shù)
?????options = webdriver.ChromeOptions()
?????prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': csvdir}
?????options.add_experimental_option('prefs', prefs)
?????chromedriver = r"C:\***\Google\Chrome\Application\chromedriver.exe"#chromedriver.exe?的本地路徑
?????# options.add_argument('--headless') ?#?有無瀏覽器界面模式,根據(jù)需要設(shè)置
?????driver = webdriver.Chrome(executable_path=chromedriver,options=options)
?????#?自動(dòng)打開?Search?頁
?????driver.get(url)
?????#?瀏覽器打開?Search?頁后,外匯跟單gendan5.com還要留足時(shí)間讓服務(wù)器進(jìn)行數(shù)據(jù)檢索
?????#?所以這里?sleep50?秒,可以根據(jù)網(wǎng)速自行設(shè)定
?????sleep(50)
?????#?當(dāng)然也可以判斷搜索結(jié)果,也就是包含?csv?的標(biāo)簽是否出現(xiàn)
?????# WebDriverWait(driver,
?????#?下載?csv?文件
?????#?找到文本包含?csv?的標(biāo)簽
?????# csvElement = driver.find_element_by_link_text('csv')
?????csvElement = driver.find_element_by_xpath('// *[ @ id = "tab4download"] / a[2]')
?????#?點(diǎn)擊下載
?????csvElement.click()
?????#?留下下載?csv?文件的時(shí)間
?????sleep(20)
?????#?關(guān)閉瀏覽器
?????driver.quit()
?????return csvdir
#?下載影像
def MODISDown(FileDir):
?????#?獲取下載的?csv?文件的文件名
?????csvfilename = os.listdir(FileDir)[0]
?????#?構(gòu)造文件路徑
?????csvfilepath = os.path.join(FileDir, csvfilename)
?????# print(csvfilepath)
?????csvvalues = pd.read_csv(csvfilepath).values
?????os.remove(csvfilepath)
?????os.rmdir(FileDir)
?????#?下載數(shù)據(jù)
?????file_count = 0
?????for cv in csvvalues:
?????????file_count += 1
?????????#?構(gòu)建數(shù)據(jù)的下載鏈接
?????????modislink='[1]
?????????outdir = 'E:/***/MODIS/'+(cv[1].split("/"))[5]
?????????# outdir = 'E:/Temp/' + (cv[1].split("/"))[5]
?????????if not os.path.isdir(outdir):
?????????????os.mkdir(outdir)
?????????path = outdir + '/' + (cv[1].split("/"))[7]
?????????if not os.path.exists(path):
?????????????print("({0}/{1}) Downloading {2}".format(file_count, len(csvvalues), modislink.split("/")[-1]))
?????????????with open(path, 'w+b') as out:
?????????????????geturl(modislink, out)
#?獲取下載鏈接并下載影像數(shù)據(jù)
def geturl(url,out=None):
?????USERAGENT = 'tis/download.py_1.0--' + sys.version.replace('\n', '').replace('\r', '')
?????headers = { 'user-agent' : USERAGENT }
?????token = '******' #?你的?token,?可登陸?Earthdata?網(wǎng)站后在?profile?中得到
?????headers['Authorization'] = 'Bearer ' + token
?????try:
?????????import ssl
?????????CTX = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
?????????from urllib.request import urlopen, Request, URLError, HTTPError
?????????try:
?????????????response = urlopen(Request(url, headers=headers), context=CTX)
?????????????if out is None:
?????????????????return response.read().decode('utf-8')
?????????????else:
?????????????????start = time.time()
?????????????????#?將連接中的下載文件寫入臨時(shí)文件 并返回文件寫入進(jìn)度
?????????????????chunk_read(response, out, report_hook=chunk_report)
?????????????????elapsed = max(time.time() - start,1.0)
?????????????????#?平均下載速度
?????????????????rate = (get_total_size(response) / 1024 ** 2) / elapsed
?????????????????print("Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec".format(get_total_size(response), elapsed, rate))
?????????????????# shutil.copyfileobj(response, out)
?????????except HTTPError as e:
?????????????print('HTTP GET error code: %d' % e.code(), file=sys.stderr)
?????????????print('HTTP GET error message: %s' % e.message, file=sys.stderr)
?????????except URLError as e:
?????????????print('Failed to make request: %s' % e.reason, file=sys.stderr)
?????????return None
?????except AttributeError:
?????????# OS X Python 2 and 3 don't support tlsv1.1+ therefore... curl
?????????import subprocess
?????????try:
?????????????args = ['curl', '--fail', '-sS', '-L', '--get', url]
?????????????for (k,v) in headers.items():
?????????????????args.extend(['-H', ': '.join([k, v])])
?????????????if out is None:
?????????????????# python3's subprocess.check_output returns stdout as a byte string
?????????????????result = subprocess.check_output(args)
?????????????????return result.decode('utf-8') if isinstance(result, bytes) else result
?????????????else:
?????????????????subprocess.call(args, stdout=out)
?????????except subprocess.CalledProcessError as e:
?????????????print('curl GET error message: %' + (e.message if hasattr(e, 'message') else e.output), file=sys.stderr)
?????????return None
# ?chunk_read modified from
def chunk_read( response, local_file, chunk_size=10240, report_hook=None):
?????#?完整文件大小
?????file_size = get_total_size(response)
?????#?下載文件大小
?????bytes_so_far = 0
?????#?文件寫入本地
?????while 1:
?????????try:
?????????????#?從地址中讀取固定大小文件對(duì)象
?????????????chunk = response.read(chunk_size)
?????????except:
?????????????sys.stdout.write("\n > There was an error reading data. \n")
?????????????break
?????????try:
?????????????#?將讀取出的文件對(duì)象寫入本地文件
?????????????local_file.write(chunk)
?????????except TypeError:
?????????????local_file.write(chunk.decode(local_file.encoding))
?????????#?寫入完成即更新已下載文件大小
?????????bytes_so_far += len(chunk)
?????????if not chunk:
?????????????break
?????????if report_hook:
?????????????#?獲取下載進(jìn)度
?????????????report_hook(bytes_so_far, file_size)
?????return bytes_so_far
def chunk_report( bytes_so_far, file_size):
?????if file_size is not None:
?????????#?計(jì)算下載進(jìn)度百分比
?????????percent = float(bytes_so_far) / file_size
?????????percent = round(percent * 100, 2)
?????????sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %
??????????????????????????(bytes_so_far, file_size, percent))
?????else:
?????????# We couldn't figure out the size.
?????????sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far))
def get_total_size(response):
????try:
???????file_size = response.info().getheader('Content-Length').strip()
????except AttributeError:
???????try:
??????????file_size = response.getheader('Content-Length').strip()
???????except AttributeError:
??????????print ("> Problem getting size")
??????????return None
????return int(file_size)
if __name__ == "__main__":
?????#?定義要下載數(shù)據(jù)的信息
?????ProductID = 'MOD021KM--61/' ?#?產(chǎn)品號(hào)?#sys.argv[1]#
?????#?設(shè)置數(shù)據(jù)的起始和截至?xí)r間。其實(shí)就是根據(jù)需要構(gòu)造一個(gè)簡(jiǎn)單的字符串
?????StartTime = '2020-06-01' ?#?開始時(shí)間?#sys.argv[2]#
?????EndTime = '2020-06-03' ?#?截至日期?#sys.argv[3]#
?????search_file = r'E:\***\?北京市?.shp' ?#?查詢范圍?#sys.argv[4]#
?????#?構(gòu)建查詢地址
?????url = GetURL(ProductID,StartTime,EndTime,search_file)
?????#?獲取數(shù)據(jù)列表
?????csvdir = SearchFileList(url)
?????#?根據(jù)列表下載數(shù)據(jù)
?????MODISDown(csvdir)
總結(jié)
以上是生活随笔為你收集整理的python LAADS+Selenium下载MODIS数据的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: MySQL中的pid与socket是什么
- 下一篇: MySQL备份与恢复——基于Xtraba