使用python+selenium批量提取群成员QQ
生活随笔
收集整理的這篇文章主要介紹了
使用python+selenium批量提取群成员QQ
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
聲明:此方法禁止進行違法用途,否則后果自負
1.環境配置
(1)python 3.7
(2)使用pip 安裝selenium
(3)下載Chrome瀏覽器,并下載對應版本chromedriver
2.代碼
import os import re import time import datetimefrom selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import Bydef scroll_foot(browser):js = 'var q=document.documentElement.scrollTop=100000'return browser.execute_script(js)def get_qq(browser,qq_file):trs = browser.find_elements_by_class_name('mb')if trs:for i,tr in enumerate(trs):tds = tr.find_elements_by_tag_name('td')[2:]qq = tds[2].textqq_file.writelines(qq+'\n')return idef extractor(browser,qq_list_path):current_len = 0while current_len < len(browser.page_source):current_len = len(browser.page_source)scroll_foot(browser)time.sleep(1.0)qq_list_file = open(qq_list_path,'w+')member_num = get_qq(browser,qq_list_file)qq_list_file.close()return member_numdef login_spider(exe_path, url):browser = webdriver.Chrome(exe_path)# 請求urlbrowser.get(url)# 模擬登陸,首先找到登陸的id,并點擊browser.find_element_by_css_selector('#headerInfo p a').click()WebDriverWait(browser, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#loginWin iframe')))print('登陸框已加載')iframe_url = browser.find_element_by_css_selector('#loginWin iframe').get_attribute('src')# 再訪問這個urlbrowser.get(iframe_url)# 找到快捷登陸的頭像并點擊# 首先用顯示等待這個頭像已經加載完成WebDriverWait(browser, 10).until(EC.presence_of_all_elements_located((By.ID, 'qlogin_list')))browser.find_element_by_css_selector('#qlogin_list a').click()print('登陸成功')return browser def switch_spider(browser):# 登陸成功之后,我們就找到群管理的標簽并點擊,首先等待這個元素加載完成WebDriverWait(browser, 10).until(EC.presence_of_all_elements_located((By.XPATH, './/ul[@id="headerNav"]/li[4]')))browser.find_element_by_xpath('.//ul[@id="headerNav"]/li[4]').click()# 點擊之后,我們找到成員管理標簽并點擊WebDriverWait(browser, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'color-tit')))browser.find_element_by_class_name('color-tit').click()browser.switch_to.window(browser.window_handles[1])return browserdef start_spider(browser,dir):WebDriverWait(browser, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'my-all-group')))# 篩選出我加入的群標簽lis = browser.find_elements_by_xpath('.//div[@class="my-all-group"]/ul[1]/li')group_num = len(lis)for idx in range(group_num):try:lis[idx].click()name_and_id = browser.find_element_by_id('groupTit').textname = name_and_id.split('(')[0]id = name_and_id.split('(')[1].split(')')[0]qq_list_path = dir + '/' + name + '_' + idprint('開始提取[' + name_and_id + ']')member_num = extractor(browser,qq_list_path)print('提取[' + name_and_id + ']成功:' + str(member_num) + '人')browser.find_element_by_id('changeGroup').click()WebDriverWait(browser, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'ui-dialog')))lis = browser.find_elements_by_xpath('.//div[@class="my-all-group"]/ul[1]/li')except Exception as e:continuedef combine(dir,file_name):qq_list = []dest_path = dir + '/' + file_namedest_file = open(dest_path, 'w+')list = os.listdir(dir) # 列出文件夾下所有的目錄與文件re_count = 0for file_name in list:path = os.path.join(dir, file_name)for line in open(path, 'r'):line = line.strip().split()[0]if line not in qq_list:qq_list.append(line)dest_file.write(line + '\n')else:re_count += 1dest_file.close()print('合并成功,共' + str(len(qq_list)) + '人,去除重復' + str(re_count))if __name__ =='__main__':url = 'https://qun.qq.com/'exe_path = 'C:/attachment/chromedriver.exe'# 構建谷歌驅動器now = datetime.datetime.today().strftime("%Y%m%d")dir = 'dataset_' + nowfile_name = 'all_qq.txt'try: ## 創建一個文件夾,用于存放數據集。文件夾命令方式:dataset + yyyymmdd(本日日期)os.mkdir(dir)except: ## 如果文件夾已存在,則放棄創建passbrowser = login_spider(exe_path, url)switch_spider(browser)start_spider(browser,dir)browser.quit()combine(dir, file_name)參考:
[1]Python爬蟲使用selenium爬取qq群的成員信息(全自動實現自動登陸)[博客園]
[2] 教你用python爬取自己加入的QQ群成員名單 [知乎]
總結
以上是生活随笔為你收集整理的使用python+selenium批量提取群成员QQ的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 11 父子组件数据关系与状态提升
- 下一篇: mysql 创建查询 删除_MYSQL数