我正在编写一个python程序,该程序需要能够在网站上解决验证码。 我想使用 2 验证码。 我使用硒编写了一个 python 脚本,它将完成除解决验证码之外我需要做的所有事情。 当我单击 2captcha.com"API"选项卡时,将显示以下内容(以及其他参数):
您可以使用两种可用格式上传验证码:
多部分和基数64。
多键示例:
<form method="post" action="http://2captcha.com/in.php" enctype="multipart/form-data">
<input type="hidden" name="method" value="post">
Your key:
<input type="text" name="key" value="YOUR_APIKEY">
The CAPTCHA file:
<input type="file" name="file">
<input type="submit" value="download and get the ID">
</form>
YOUR_APIKEY - 是 32 个符号长度的密钥。
Base64 示例:
<form method="post" action="http://2captcha.com/in.php">
<input type="hidden" name="method" value="base64">
Your key:
<input type="text" name="key" value="YOUR_APIKEY">
The CAPTCHA file body in base64 format:
<textarea name="body">BASE64_FILE</textarea>
<input type="submit" value="download and get the ID">
</form>
YOUR_APIKEY - 是 32 个符号长度的密钥。
BASE64_FILE - 是 base 64 编码的图像正文。
我非常了解python,以及它的大部分科学和数学模块,但我对与Web相关的编程有点陌生。 上面的代码看起来像 html。 我将如何使python程序执行上述html指令?
我来自2CAPTCHA团队,我们有示例python。您可以使用以下代码:
"""
这是如何通过谷歌验证码v2的示例。我使用python + selenium + phantomJS来做到这一点。Phantom JS是用于自动化Web测试的无头浏览器。我用它来报废页面并绕过谷歌验证码。要使用它,您应该安装phantomJS(并从linux系统案例中的源代码构建)并安装Selenium python模块
谷歌验证码出现在iframe的页面上。您应该废弃其元素,单击图像并在单击"确定"后检查错误消息。
""
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import *
import re
import os
import sys
import time
import random
import requests
from PIL import Image
class capcha_resolver:
def __init__(self, captchakey, proxy = None):
"""
It is assumed that you have phantomjs installed into /bin folder on your linux system.
"""
self.TWOCAPTCHA_API_KEY = captchakey
phantom_args = []
if proxy:
self.PROXY = proxy
phantom_args = ['--proxy='+self.PROXY, '--proxy-type=http', '--proxy-type=https']
self.driver = webdriver.PhantomJS('/bin/phantomjs',service_args=phantom_args)
self.driver.set_page_load_timeout(20)
def fail(self, msg):
print "[!] Error: " + msg
self.driver.save_screenshot('error.png')
def get_page(self):
self.driver.get('https://www.google.com/recaptcha/api2/demo')
self.driver.save_screenshot('page.png')
return 0
def send_capcha(self, filename):
numbers = []
captchafile = {'file': open(filename, 'rb')}
data = {'key': self.TWOCAPTCHA_API_KEY, 'method': 'post'}
r = requests.post('http://2captcha.com/in.php', files=captchafile, data=data)
if r.ok and r.text.find('OK') > -1:
reqid = r.text[r.text.find('|')+1:]
print "[+] Capcha id: "+reqid
for timeout in range(40):
r = requests.get('http://2captcha.com/res.php?key={0}&action=get&id={1}'.format(self.TWOCAPTCHA_API_KEY, reqid))
if r.text.find('CAPCHA_NOT_READY') > -1:
print r.text
time.sleep(3)
if r.text.find('ERROR') > -1:
return []
if r.text.find('OK') > -1:
return list(r.text[r.text.find('|')+1:])
return []
def bypass_captcha(self):
"""
Google recaptcha could be found by id. Frame with checkbox has id which starts with I0, recapcha frame has id with I1
"""
capthcaboxframe = self.driver.find_element_by_xpath('//iframe[starts-with(@id, "I0")]')
self.driver.switch_to.frame(capthcaboxframe)
time.sleep(1)
checkbox = self.driver.find_element_by_id('recaptcha-anchor')
checkbox.click()
print "[*] Clicked on checkbox"
time.sleep(2)
self.driver.switch_to.default_content()
capcthaframe = self.driver.find_element_by_xpath('//iframe[starts-with(@id, "I1")]')
bounding_box = (
capcthaframe.location['x'], # left
capcthaframe.location['y'], # upper
(capcthaframe.location['x'] + capcthaframe.size['width']), # right
(capcthaframe.location['y'] + capcthaframe.size['height'])) # bottom
imgname = 'capcha.jpeg' #use jpeg because png images can exceed 2capcha file size limit
time.sleep(2)
self.driver.save_screenshot(imgname)
base_image = Image.open(imgname)
cropped_image = base_image.crop(bounding_box)
base_image = base_image.resize(cropped_image.size)
base_image.paste(cropped_image, (0, 0))
base_image.save(imgname)
numbers = self.send_capcha(imgname)
if numbers == []:
return -1
self.driver.switch_to.frame(capcthaframe)
picturetable = self.driver.find_element_by_css_selector('.rc-imageselect-table-3')
images = []
for row in picturetable.find_elements_by_tag_name('tr'):
for col in row.find_elements_by_tag_name('td'):
images.append(col.find_element_by_tag_name('img'))
if images == []:
self.fail("Found no captcha images")
return -1
print "[*] Got answer : " + str(numbers)
for number in numbers:
index = int(number) - 1
images[index].click()
print '[+] clicked on image '+str(index)
self.driver.save_screenshot('res.png')
verifybutton = self.driver.find_element_by_id('recaptcha-verify-button')
verifybutton.click()
print "[*] Clicked verify button"
time.sleep(2)
if self.driver.find_element_by_css_selector('.rc-imageselect-incorrect-response').is_displayed() or
self.driver.find_element_by_css_selector('.rc-imageselect-error-select-one').is_displayed() or
self.driver.find_element_by_css_selector('.rc-imageselect-error-select-more').is_displayed():
self.fail("Incorrect answer from 2captcha")
return -1
self.driver.switch_to.default_content()
self.driver.switch_to.frame(capthcaboxframe)
if self.driver.find_element_by_css_selector('.recaptcha-checkbox').get_attribute('aria-checked') == 'false':
self.fail("Capctha not passed")
return -1
self.driver.switch_to.default_content()
self.driver.save_screenshot('passed.png')
return 0
proxy = None
if len(sys.argv) < 2:
print "Usage: python resolver.py 2CAPCHA_API_KEY [PROXY]"
if len(sys.argv) > 2:
proxy = sys.argv[2]
resolver = capcha_resolver(sys.argv[1], proxy)
if resolver.get_page() == -1:
print "[!] Error while getting page"
else:
print "[+] Opened URL"
if resolver.bypass_captcha() == -1:
print "[!] Error on captcha resolving"
else:
print "[+] Resolved captcha"
祝你好运!
查看请求模块
url = 'http://2captcha.com/in.php'
files = {'file': open('image.png', 'rb')}
data = {'key': 'key', 'method': 'post'}
r = requests.post(url, files=files, data=data)
if r.ok:
# do something with the response data