目前藝賽旗RPA已經更新到8.0版本,可以讓所有用戶免費下載試用http://www.i-search.com.cn/index.html?from=line1 (復制鏈接下載)
一,介紹 一些網站會在正常的賬號密碼認證之外加一些驗證碼,以此來明確地區分人/機行為,從一定程度上達到反爬的效果,對于簡單的校驗碼Tesserocr就可以搞定,如下 但一些網站加入了滑動驗證碼,最典型的要屬于極驗滑動認證了,極驗官網:http://www.geetest.com/,下圖是極驗的登錄界面
現在極驗驗證碼已經更新到了3.0版本,截至2017年7月全球已有十六萬家企業正在使用極驗,每天服務響應超過四億次,廣泛應用于直播視頻,金融服務,電子商務,游戲娛樂,政府企業等各大類型網站
對于這類驗證,如果我們直接模擬表單請求,繁瑣的認證參數與認證流程會讓你蛋碎一地,我們可以用selenium驅動瀏覽器來解決這個問題,大致分為以下幾個步驟
#1,輸入賬號,密碼,然后點擊登陸 #2,點擊按鈕,彈出沒有缺口的圖 #3,針對沒有缺口的圖片進行截圖 #4,點擊滑動按鈕,彈出有缺口的圖 #5,針對有缺口的圖片進行截圖 #6,對比兩張圖片,找出缺口,即滑動的位移 #7,按照人的行為行為習慣,把總位移切成一段段小的位移 #8,按照位移移動 #9,完成登錄
二,實現 安裝:selenium+chrome/phantomjs 安裝:Pillow
這里用的是Chrome Pillow:基于PIL,處理python 3.x的圖形圖像庫。因為PIL只能處理到python 2.x,而這個模塊能處理Python3.x,目前用它做圖形的很多。 http://www.cnblogs.com/apexchu/p/4231041.html
C:\Users\Administrator>pip3 install pillow C:\Users\Administrator>python3 Python 3.6.1 (v3.6.1:69c0db5, Mar 21 2017, 18:41:36) [MSC v.1900 64 bit (AMD64)] on win32 Type “help”, “copyright”, “credits” or “license” for more information.
from PIL import Image
代碼如下(增加部分注釋) from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait from PIL import Image import time
def get_snap(): ‘’’ 對整個網頁截圖,保存成圖片,然后用PIL.Image拿到圖片對象 :return: 圖片對象 ‘’’ driver.save_screenshot(‘snap.png’) page_snap_obj=Image.open(‘snap.png’) return page_snap_obj
def get_image(): ‘’’ 從網頁的網站截圖中,截取驗證碼圖片 :return: 驗證碼圖片 ‘’’ img=wait.until(EC.presence_of_element_located((By.CLASS_NAME,‘geetest_canvas_img’))) time.sleep(2) #保證圖片刷新出來 localtion=img.location size=img.size
top=localtion['y']
bottom=localtion['y']+size['height']
left=localtion['x']
right=localtion['x']+size['width']page_snap_obj=get_snap()
crop_imag_obj=page_snap_obj.crop((left,top,right,bottom))
return crop_imag_obj
def get_distance(image1,image2): ‘’’ 拿到滑動驗證碼需要移動的距離 :param image1:沒有缺口的圖片對象 :param image2:帶缺口的圖片對象 :return:需要移動的距離 #兩張圖大小一樣,那就通過兩個for循環依次對比每個像素點的RGB值 #如果相差超過60(threshold)則就認為找到了缺口的位置 ‘’’ threshold = 60 #色差值 left = 57 #起始位置(開始進行RGB色差判斷的最小值) for i in range(left,image1.size[0]): for j in range(image1.size[1]): rgb1=image1.load()[i,j] rgb2=image2.load()[i,j] res1=abs(rgb1[0]-rgb2[0]) res2=abs(rgb1[1]-rgb2[1]) res3=abs(rgb1[2]-rgb2[2]) # 如果相差超過60(threshold)則就認為找到了缺口的位置 if not (res1 < threshold and res2 < threshold and res3 < threshold): return i-7 #經過測試,誤差為大概為7 return i-7 #經過測試,誤差為大概為7 def get_tracks(distance): ‘’’ 拿到移動軌跡,模仿人的滑動行為,先勻加速后勻減速 勻變速運動基本公式: ①v=v0+at ②s=v0t+?at2 ③v2-v02=2as
:param distance: 需要移動的距離
:return: 存放每0.3秒移動的距離
'''
#初速度
v=0
#單位時間為0.2s來統計軌跡,軌跡即0.2內的位移
t=0.3
#位移/軌跡列表,列表內的一個元素代表0.2s的位移
tracks=[]
#當前的位移
current=0
#到達mid值開始減速
mid=distance*4/5while current < distance:if current < mid:# 加速度越小,單位時間的位移越小,模擬的軌跡就越多越詳細a= 2else:a=-3#初速度v0=v#0.2秒時間內的位移s=v0*t+0.5*a*(t**2)#當前的位置current+=s#添加到軌跡列表tracks.append(round(s))#速度已經達到v,該速度作為下次的初速度v=v0+a*t
return tracks
try: driver=webdriver.Chrome() driver.get(‘https://account.geetest.com/login’) wait=WebDriverWait(driver,10)
#步驟一:先點擊按鈕,彈出沒有缺口的圖片
button=wait.until(EC.presence_of_element_located((By.CLASS_NAME,'geetest_radar_tip')))
button.click()#步驟二:拿到沒有缺口的圖片
image1=get_image()#步驟三:點擊拖動按鈕,彈出有缺口的圖片
button=wait.until(EC.presence_of_element_located((By.CLASS_NAME,'geetest_slider_button')))
button.click()#步驟四:拿到有缺口的圖片
image2=get_image()# print(image1,image1.size)
# print(image2,image2.size)#步驟五:對比兩張圖片的所有RBG像素點,得到不一樣像素點的x值,即要移動的距離
distance=get_distance(image1,image2)#步驟六:模擬人的行為習慣(先勻加速拖動后勻減速拖動),把需要拖動的總距離分成一段一段小的軌跡
tracks=get_tracks(distance)
print(tracks)
print(image1.size)
print(distance,sum(tracks))#步驟七:按照軌跡拖動,完全驗證
button=wait.until(EC.presence_of_element_located((By.CLASS_NAME,'geetest_slider_button')))
ActionChains(driver).click_and_hold(button).perform()
for track in tracks:ActionChains(driver).move_by_offset(xoffset=track,yoffset=0).perform()
else:ActionChains(driver).move_by_offset(xoffset=3,yoffset=0).perform() #先移過一點ActionChains(driver).move_by_offset(xoffset=-3,yoffset=0).perform() #再退回來,是不是更像人了time.sleep(0.5) #0.5秒后釋放鼠標
ActionChains(driver).release().perform()#步驟八:完成登錄
input_email=driver.find_element_by_id('email')
input_password=driver.find_element_by_id('password')
button=wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'login-btn')))input_email.send_keys('18611453110@163.com')
input_password.send_keys('linhaifeng123')
# button.send_keys(Keys.ENTER)
button.click()import time
time.sleep(200)
finally: driver.close() 案例: 1.破解博客園后臺登陸 from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait from PIL import Image import time
def get_snap(): driver.save_screenshot(‘full_snap.png’) page_snap_obj=Image.open(‘full_snap.png’) return page_snap_obj
def get_image(): img=driver.find_element_by_class_name(‘geetest_canvas_img’) time.sleep(2) location=img.location size=img.size
left=location['x']
top=location['y']
right=left+size['width']
bottom=top+size['height']page_snap_obj=get_snap()
image_obj=page_snap_obj.crop((left,top,right,bottom))
# image_obj.show()
return image_obj
def get_distance(image1,image2): start=57 threhold=60
for i in range(start,image1.size[0]):for j in range(image1.size[1]):rgb1=image1.load()[i,j]rgb2=image2.load()[i,j]res1=abs(rgb1[0]-rgb2[0])res2=abs(rgb1[1]-rgb2[1])res3=abs(rgb1[2]-rgb2[2])# print(res1,res2,res3)if not (res1 < threhold and res2 < threhold and res3 < threhold):return i-7
return i-7
def get_tracks(distance): distance+=20 #先滑過一點,最后再反著滑動回來 v=0 t=0.2 forward_tracks=[]
current=0
mid=distance*3/5
while current < distance:if current < mid:a=2else:a=-3s=v*t+0.5*a*(t**2)v=v+a*tcurrent+=sforward_tracks.append(round(s))#反著滑動到準確位置
back_tracks=[-3,-3,-2,-2,-2,-2,-2,-1,-1,-1] #總共等于-20return {'forward_tracks':forward_tracks,'back_tracks':back_tracks}
try: # 1、輸入賬號密碼回車 driver = webdriver.Chrome() driver.implicitly_wait(3) driver.get(‘https://passport.cnblogs.com/user/signin’)
username = driver.find_element_by_id('input1')
pwd = driver.find_element_by_id('input2')
signin = driver.find_element_by_id('signin')username.send_keys('linhaifeng')
pwd.send_keys('xxxxx')
signin.click()# 2、點擊按鈕,得到沒有缺口的圖片
button = driver.find_element_by_class_name('geetest_radar_tip')
button.click()# 3、獲取沒有缺口的圖片
image1 = get_image()# 4、點擊滑動按鈕,得到有缺口的圖片
button = driver.find_element_by_class_name('geetest_slider_button')
button.click()# 5、獲取有缺口的圖片
image2 = get_image()# 6、對比兩種圖片的像素點,找出位移
distance = get_distance(image1, image2)# 7、模擬人的行為習慣,根據總位移得到行為軌跡
tracks = get_tracks(distance)
print(tracks)# 8、按照行動軌跡先正向滑動,后反滑動
button = driver.find_element_by_class_name('geetest_slider_button')
ActionChains(driver).click_and_hold(button).perform()# 正常人類總是自信滿滿地開始正向滑動,自信地表現是瘋狂加速
for track in tracks['forward_tracks']:ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform()# 結果傻逼了,正常的人類停頓了一下,回過神來發現,臥槽,滑過了,然后開始反向滑動
time.sleep(0.5)
for back_track in tracks['back_tracks']:ActionChains(driver).move_by_offset(xoffset=back_track, yoffset=0).perform()# 小范圍震蕩一下,進一步迷惑極驗后臺,這一步可以極大地提高成功率
ActionChains(driver).move_by_offset(xoffset=-3, yoffset=0).perform()
ActionChains(driver).move_by_offset(xoffset=3, yoffset=0).perform()# 成功后,騷包人類總喜歡默默地欣賞一下自己拼圖的成果,然后戀戀不舍地松開那只臟手
time.sleep(0.5)
ActionChains(driver).release().perform()time.sleep(10) # 睡時間長一點,確定登錄成功
finally: driver.close() 2.修訂版博客園后臺登陸 from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait from PIL import Image import time
def get_snap(driver): driver.save_screenshot(‘full_snap.png’) page_snap_obj=Image.open(‘full_snap.png’) return page_snap_obj
def get_image(driver): img=driver.find_element_by_class_name(‘geetest_canvas_img’) time.sleep(2) location=img.location size=img.size
left=location['x']
top=location['y']
right=left+size['width']
bottom=top+size['height']page_snap_obj=get_snap(driver)
image_obj=page_snap_obj.crop((left,top,right,bottom))
# image_obj.show()
return image_obj
def get_distance(image1,image2): start=57 threhold=60
for i in range(start,image1.size[0]):for j in range(image1.size[1]):rgb1=image1.load()[i,j]rgb2=image2.load()[i,j]res1=abs(rgb1[0]-rgb2[0])res2=abs(rgb1[1]-rgb2[1])res3=abs(rgb1[2]-rgb2[2])# print(res1,res2,res3)if not (res1 < threhold and res2 < threhold and res3 < threhold):return i-7
return i-7
def get_tracks(distance): distance+=20 #先滑過一點,最后再反著滑動回來 v=0 t=0.2 forward_tracks=[]
current=0
mid=distance*3/5
while current < distance:if current < mid:a=2else:a=-3s=v*t+0.5*a*(t**2)v=v+a*tcurrent+=sforward_tracks.append(round(s))#反著滑動到準確位置
back_tracks=[-3,-3,-2,-2,-2,-2,-2,-1,-1,-1] #總共等于-20return {'forward_tracks':forward_tracks,'back_tracks':back_tracks}
def crack(driver): #破解滑動認證 # 1、點擊按鈕,得到沒有缺口的圖片 button = driver.find_element_by_class_name(‘geetest_radar_tip’) button.click()
# 2、獲取沒有缺口的圖片
image1 = get_image(driver)# 3、點擊滑動按鈕,得到有缺口的圖片
button = driver.find_element_by_class_name('geetest_slider_button')
button.click()# 4、獲取有缺口的圖片
image2 = get_image(driver)# 5、對比兩種圖片的像素點,找出位移
distance = get_distance(image1, image2)# 6、模擬人的行為習慣,根據總位移得到行為軌跡
tracks = get_tracks(distance)
print(tracks)# 7、按照行動軌跡先正向滑動,后反滑動
button = driver.find_element_by_class_name('geetest_slider_button')
ActionChains(driver).click_and_hold(button).perform()# 正常人類總是自信滿滿地開始正向滑動,自信地表現是瘋狂加速
for track in tracks['forward_tracks']:ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform()# 結果傻逼了,正常的人類停頓了一下,回過神來發現,臥槽,滑過了,然后開始反向滑動
time.sleep(0.5)
for back_track in tracks['back_tracks']:ActionChains(driver).move_by_offset(xoffset=back_track, yoffset=0).perform()# 小范圍震蕩一下,進一步迷惑極驗后臺,這一步可以極大地提高成功率
ActionChains(driver).move_by_offset(xoffset=-3, yoffset=0).perform()
ActionChains(driver).move_by_offset(xoffset=3, yoffset=0).perform()# 成功后,騷包人類總喜歡默默地欣賞一下自己拼圖的成果,然后戀戀不舍地松開那只臟手
time.sleep(0.5)
ActionChains(driver).release().perform()
def login_cnblogs(username,password): driver = webdriver.Chrome() try: # 1、輸入賬號密碼回車 driver.implicitly_wait(3) driver.get(‘https://passport.cnblogs.com/user/signin’)
input_username = driver.find_element_by_id('input1')input_pwd = driver.find_element_by_id('input2')signin = driver.find_element_by_id('signin')input_username.send_keys(username)input_pwd.send_keys(password)signin.click()# 2、破解滑動認證crack(driver)time.sleep(10) # 睡時間長一點,確定登錄成功
finally:driver.close()
if name == ‘main ’: login_cnblogs(username=‘linhaifeng’,password=‘xxxx’) 用類封裝的版本svcr
import time import random
from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By from PIL import Image
def simulate_reaction(func):
“”“模擬人類的反應時間”""
from functools import wraps
@wraps
def inner(self, *args, **kwargs):
time.sleep(random.uniform(0.2, 1))
ret = func(self, *args, **kwargs)
return ret
return inner
class SVCR: “”“識別滑動驗證碼 極驗驗證”""
def __init__(self, driver):self.driver = driverself.get_full_img = True# @simulate_reaction
def run(self):"""執行識別流程"""# 1. 點擊按鈕開始驗證self.click_start_btn()# 2. 根據驗證類型驗證return self.judge_and_auth()def judge_and_auth(self):"""判斷驗證類型并執行相應的驗證方法"""if True:return self.auth_slide()else:passdef auth_slide(self):def get_distance(img1, img2):"""計算滑動距離"""threshold = 60# 忽略可動滑塊部分start_x = 57for i in range(start_x, img1.size[0]):for j in range(img1.size[1]):rgb1 = img1.load()[i, j]rgb2 = img2.load()[i, j]res1 = abs(rgb1[0] - rgb2[0])res2 = abs(rgb1[1] - rgb2[1])res3 = abs(rgb1[2] - rgb2[2])if not (res1 < threshold and res2 < threshold and res3 < threshold):return i - 7 # 經過測試,誤差為大概為7def get_tracks(distance):"""制造滑動軌跡策略:勻加速再勻減速,超過一些,再回調,左右小幅度震蕩"""v = 0current = 0t = 0.2tracks = []# 正向滑動while current < distance+10:if current < distance*2/3:a = 2else:a = -3s = v*t + 0.5*a*(t**2)current += stracks.append(round(s))v = v + a*t# 往回滑動current = 0while current < 13:if current < distance*2/3:a = 2else:a = -3s = v*t + 0.5*a*(t**2)current += stracks.append(-round(s))v = v + a*t# 最后修正tracks.extend([2, 2, -3, 2])return tracks# 1. 截取完整圖片if self.get_full_img:time.sleep(2) # 等待圖片加載完畢img_before = self.get_img()else:img_before = self._img_before# 2. 點擊出現缺口圖片slider_btn = self.driver.find_element_by_class_name("geetest_slider_button")slider_btn.click()# 3. 截取缺口圖片time.sleep(2) # 等待圖片加載完畢img_after = self.get_img()# 4. 生成移動軌跡tracks = get_tracks(get_distance(img_before, img_after))# 5. 模擬滑動slider_btn = self.driver.find_element_by_class_name("geetest_slider_button")ActionChains(self.driver).click_and_hold(slider_btn).perform()for track in tracks:ActionChains(self.driver).move_by_offset(xoffset=track, yoffset=0).perform()# 6. 釋放鼠標time.sleep(0.5) # 0.5秒后釋放鼠標ActionChains(self.driver).release().perform()# 7. 驗證是否成功time.sleep(2)div_tag = self.driver.find_element_by_class_name("geetest_fullpage_click")if "display: block" in div_tag.get_attribute("style"):'''判斷模塊對話框是否存在,如果存在就說明沒有驗證成功,"display: block",重新去驗證'''self.get_full_img = Falsesetattr(self, "_img_before", img_before)return self.auth_slide()else:#如果驗證成功"display: none"time.sleep(1000)return True# @simulate_reaction
def click_start_btn(self, search_style="CLASS_NAME", search_content="geetest_radar_tip"):"""找到開始按鈕并點擊"""btn = getattr(self.driver, "find_element")(getattr(By, search_style), search_content)btn.click()def get_img(self):"""截取圖片"""div_tag = self.driver.find_element_by_class_name("geetest_slicebg")# 計算截取圖片大小img_pt = div_tag.location # {'x': 296, 'y': 15}img_size = div_tag.size # {'height': 159, 'width': 258}img_box = (img_pt["x"], img_pt["y"], img_pt["x"] + img_size["width"], img_pt["y"] + img_size["height"])# 保存當前瀏覽頁面self.driver.save_screenshot("snap.png")# 截取目標圖片img = Image.open("snap.png")return img.crop(img_box)
使用類 from selenium import webdriver
from svcr import SVCR
def auth(): driver = webdriver.Chrome() # browser.get(url) driver.get(“https://passport.cnblogs.com/user/signin”) #請求頁面 driver.implicitly_wait(3) # 第一步:輸入賬號、密碼,然后點擊登陸 input_name = driver.find_element_by_id(‘input1’) #找到輸入用戶名的框 input_pwd = driver.find_element_by_id(‘input2’) #找到輸入密碼的框 input_button = driver.find_element_by_id(‘signin’) #找到按鈕 input_name.send_keys(“name”)#博客園的賬號 input_pwd.send_keys(“pwd”)#博客園的密碼 input_button.click() #進行點擊 return driver
def main(): driver=auth() #進行驗證, _auth = SVCR(driver) _auth.run()
if name == ‘main ’: main()
總結
以上是生活随笔 為你收集整理的艺赛旗RPA验证码处理系列(三):破解极验滑动验证码 的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔 網站內容還不錯,歡迎將生活随笔 推薦給好友。