使用python抓取美团商家信息
生活随笔
收集整理的這篇文章主要介紹了
使用python抓取美团商家信息
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
抓取美團商家信息
import requests from bs4 import BeautifulSoup import jsonurl = 'http://bj.meituan.com/' url_shop = 'http://bj.meituan.com/shop/{}' headers = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8','Accept-Encoding':'gzip, deflate, sdch','Accept-Language':'zh-CN,zh;q=0.8','Cache-Control':'max-age=0','DNT':'1','Host':'bj.meituan.com','Proxy-Connection':'keep-alive','Referer':'http://bj.meituan.com/shop/286725?acm=UwunyailsW15518532529028663069.286725.1&mtt=1.index%2Fdefault%2Fpoi.pz.1.j4cijrmg&cks=58899','Upgrade-Insecure-Requests':'1','User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' }# 得到所有的二級菜單url def get_start_menu_links():html = requests.get(url).textsoup = BeautifulSoup(html, 'lxml')links = [link.find('div').find('div').find('dl').find('dt').find('a')['href'] for link in soup.find_all('div',class_='J-nav-item') ]return linksdef get_shop_ids(url, headers=None):html = requests.get(url, headers=headers).textsoup = BeautifulSoup(html, 'lxml')content_id = json.loads(soup.find('div', class_='J-scrollloader cf J-hub')['data-async-params'])return json.loads(content_id.get('data')).get('poiidList')def main():start_menu_links = get_start_menu_links()for link in start_menu_links:for pageNum in range(4,5):category_url = link + '/all/page{}'.format(pageNum)for shop_id in get_shop_ids(category_url, headers=headers):html = requests.get(url_shop.format(shop_id), headers=headers).textsoup = BeautifulSoup(html, 'lxml')shop_detail = soup.find('div', class_='summary biz-box fs-section cf')print("==================================pageNum %d shop_id: %d===================================================" % (pageNum,shop_id ))try:shop_detail.find('div', class_='fs-section__left').find('h2').find('span').textexcept:continueprint("名稱: " + shop_detail.find('div', class_='fs-section__left').find('h2').find('span').text)print("地址: " + shop_detail.find('div', class_='fs-section__left').find('p', class_='under-title').find('span').text)print("聯系方式: " + shop_detail.find('div', class_='fs-section__left').find('p', class_='under-title').find_next_sibling().text)if '__main__' == __name__:main()總結
以上是生活随笔為你收集整理的使用python抓取美团商家信息的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 【例题收藏】◇例题·I◇ Snuke's
- 下一篇: 计算机cpu占用率高,CPU占用率高怎么