我之前使用的是机械化模块,现在尝试使用请求模块
(当需要HTTPS和代理身份验证时,Python mechanize不起作用)
当我访问互联网时,我必须通过代理服务器
代理服务器需要身份验证。我写了以下代码。
import requests
from requests.auth import HTTPProxyAuth
proxies = {"http":"192.168.20.130:8080"}
auth = HTTPProxyAuth("username", "password")
r = requests.get("http://www.google.co.jp/", proxies=proxies, auth=auth)
当代理服务器需要基本身份验证时,上述代码可以很好地工作
现在我想知道当代理服务器需要摘要身份验证时我必须做什么
HTTPProxyAuth在摘要身份验证中似乎无效(r.status_code返回407)。
无需实现自己的 在大多数情况下
请求内置了对代理的支持,用于基本身份验证:
proxies = { 'https' : 'https://user:password@proxyip:port' }
r = requests.get('https://url', proxies=proxies)
查看更多文档
或者,如果您需要摘要身份验证,HTTPDigestAuth
可能会有所帮助
或者您可能需要像yutaka2487所做的那样尝试扩展它
注意:必须使用代理服务器的ip而不是其名称!
我编写了可用于代理身份验证的类(基于摘要身份验证)
我几乎从requests.auth.HTTPDigestAuth.借用了所有代码
import requests
import requests.auth
class HTTPProxyDigestAuth(requests.auth.HTTPDigestAuth):
def handle_407(self, r):
"""Takes the given response and tries digest-auth, if needed."""
num_407_calls = r.request.hooks['response'].count(self.handle_407)
s_auth = r.headers.get('Proxy-authenticate', '')
if 'digest' in s_auth.lower() and num_407_calls < 2:
self.chal = requests.auth.parse_dict_header(s_auth.replace('Digest ', ''))
# Consume content and release the original connection
# to allow our new request to reuse the same one.
r.content
r.raw.release_conn()
r.request.headers['Authorization'] = self.build_digest_header(r.request.method, r.request.url)
r.request.send(anyway=True)
_r = r.request.response
_r.history.append(r)
return _r
return r
def __call__(self, r):
if self.last_nonce:
r.headers['Proxy-Authorization'] = self.build_digest_header(r.method, r.url)
r.register_hook('response', self.handle_407)
return r
用法:
proxies = {
"http" :"192.168.20.130:8080",
"https":"192.168.20.130:8080",
}
auth = HTTPProxyDigestAuth("username", "password")
# HTTP
r = requests.get("http://www.google.co.jp/", proxies=proxies, auth=auth)
r.status_code # 200 OK
# HTTPS
r = requests.get("https://www.google.co.jp/", proxies=proxies, auth=auth)
r.status_code # 200 OK
我已经编写了一个Python模块(此处可用),它可以使用摘要方案通过HTTP代理进行身份验证。它在连接到HTTPS网站时工作(通过monkey补丁),并允许与网站进行身份验证。这应该适用于Python 2和3的最新requests
库。
以下示例获取网页https://httpbin.org/ip通过需要使用用户名user1
和密码password1
:的HTTP摘要认证的HTTP代理1.2.3.4:8080
import requests
from requests_digest_proxy import HTTPProxyDigestAuth
s = requests.Session()
s.proxies = {
'http': 'http://1.2.3.4:8080/',
'https': 'http://1.2.3.4:8080/'
}
s.auth = HTTPProxyDigestAuth('user1', 'password1')
print(s.get('https://httpbin.org/ip').text)
如果网站需要某种HTTP身份验证,可以通过以下方式指定给HTTPProxyDigestAuth
构造函数:
# HTTP Basic authentication for website
s.auth = HTTPProxyDigestAuth(('user1', 'password1'),
auth=requests.auth.HTTPBasicAuth('user1', 'password0'))
print(s.get('https://httpbin.org/basic-auth/user1/password0').text))
# HTTP Digest authentication for website
s.auth = HTTPProxyDigestAuth(('user1', 'password1'),,
auth=requests.auth.HTTPDigestAuth('user1', 'password0'))
print(s.get('https://httpbin.org/digest-auth/auth/user1/password0').text)
此代码段适用于两种类型的请求(http
和https
)。在当前版本的请求上进行了测试(2.23.0)。
import re
import requests
from requests.utils import get_auth_from_url
from requests.auth import HTTPDigestAuth
from requests.utils import parse_dict_header
from urllib3.util import parse_url
def get_proxy_autorization_header(proxy, method):
username, password = get_auth_from_url(proxy)
auth = HTTPProxyDigestAuth(username, password)
proxy_url = parse_url(proxy)
proxy_response = requests.request(method, proxy_url, auth=auth)
return proxy_response.request.headers['Proxy-Authorization']
class HTTPSAdapterWithProxyDigestAuth(requests.adapters.HTTPAdapter):
def proxy_headers(self, proxy):
headers = {}
proxy_auth_header = get_proxy_autorization_header(proxy, 'CONNECT')
headers['Proxy-Authorization'] = proxy_auth_header
return headers
class HTTPAdapterWithProxyDigestAuth(requests.adapters.HTTPAdapter):
def proxy_headers(self, proxy):
return {}
def add_headers(self, request, **kwargs):
proxy = kwargs['proxies'].get('http', '')
if proxy:
proxy_auth_header = get_proxy_autorization_header(proxy, request.method)
request.headers['Proxy-Authorization'] = proxy_auth_header
class HTTPProxyDigestAuth(requests.auth.HTTPDigestAuth):
def init_per_thread_state(self):
# Ensure state is initialized just once per-thread
if not hasattr(self._thread_local, 'init'):
self._thread_local.init = True
self._thread_local.last_nonce = ''
self._thread_local.nonce_count = 0
self._thread_local.chal = {}
self._thread_local.pos = None
self._thread_local.num_407_calls = None
def handle_407(self, r, **kwargs):
"""
Takes the given response and tries digest-auth, if needed.
:rtype: requests.Response
"""
# If response is not 407, do not auth
if r.status_code != 407:
self._thread_local.num_407_calls = 1
return r
s_auth = r.headers.get('proxy-authenticate', '')
if 'digest' in s_auth.lower() and self._thread_local.num_407_calls < 2:
self._thread_local.num_407_calls += 1
pat = re.compile(r'digest ', flags=re.IGNORECASE)
self._thread_local.chal = requests.utils.parse_dict_header(
pat.sub('', s_auth, count=1))
# Consume content and release the original connection
# to allow our new request to reuse the same one.
r.content
r.close()
prep = r.request.copy()
requests.cookies.extract_cookies_to_jar(prep._cookies, r.request, r.raw)
prep.prepare_cookies(prep._cookies)
prep.headers['Proxy-Authorization'] = self.build_digest_header(prep.method, prep.url)
_r = r.connection.send(prep, **kwargs)
_r.history.append(r)
_r.request = prep
return _r
self._thread_local.num_407_calls = 1
return r
def __call__(self, r):
# Initialize per-thread state, if needed
self.init_per_thread_state()
# If we have a saved nonce, skip the 407
if self._thread_local.last_nonce:
r.headers['Proxy-Authorization'] = self.build_digest_header(r.method, r.url)
r.register_hook('response', self.handle_407)
self._thread_local.num_407_calls = 1
return r
session = requests.Session()
session.proxies = {
'http': 'http://username:password@proxyhost:proxyport',
'https': 'http://username:password@proxyhost:proxyport'
}
session.trust_env = False
session.mount('http://', HTTPAdapterWithProxyDigestAuth())
session.mount('https://', HTTPSAdapterWithProxyDigestAuth())
response_http = session.get("http://ww3.safestyle-windows.co.uk/the-secret-door/")
print(response_http.status_code)
response_https = session.get("https://stackoverflow.com/questions/13506455/how-to-pass-proxy-authentication-requires-digest-auth-by-using-python-requests")
print(response_https.status_code)
通常,当使用HTTPS协议进行连接时,代理自动授权的问题也与其他类型的身份验证(ntlm、kerberos)有关。尽管存在大量问题(自2013年以来,也许还有更早的问题我没有发现):
请求中:摘要式代理身份验证、NTLM代理身份验证和Kerberos代理身份验证
在urlib3中:NTLM代理身份验证,NTLM代理权限
以及许多其他问题,这个问题仍然没有得到解决。
模块httplib
(python2)/http.client
(python3)的函数_tunnel
中的问题根源。在连接尝试不成功的情况下,它会引发OSError
,而不会返回响应代码(在我们的情况下为407)和构建自动化标头所需的额外数据。卢卡萨在这里做了解释。只要urllib3(或请求)的维护人员没有解决方案,我们就只能使用各种变通方法(例如,使用@Tey'的方法或做类似的事情)。在我的变通方法版本中,我们通过向代理服务器发送请求并处理收到的响应来预先准备必要的授权数据。
您可以使用requests.auth.HTTPDigestAuth
而不是requests.auth.HTTPProxyAuth
来使用摘要身份验证
对于那些仍然在这里的人来说,似乎有一个名为requests toolbelt的项目,它具有此功能以及其他常见但未内置的请求功能。
https://toolbelt.readthedocs.org/en/latest/authentication.html#httpproxydigestauth
这对我有效。事实上,我不知道这个版本中用户:密码的安全性:
import requests
import os
http_proxyf = 'http://user:password@proxyip:port'
os.environ["http_proxy"] = http_proxyf
os.environ["https_proxy"] = http_proxyf
sess = requests.Session()
# maybe need sess.trust_env = True
print(sess.get('https://some.org').text)
import requests
import os
# in my case I had to add my local domain
proxies = {
'http': 'proxy.myagency.com:8080',
'https': 'user@localdomain:password@proxy.myagency.com:8080',
}
r=requests.get('https://api.github.com/events', proxies=proxies)
print(r.text)
这里有一个不适用于http基本身份验证的答案,例如组织内的transperant代理。
import requests
url = 'https://someaddress-behindproxy.com'
params = {'apikey': '123456789'} #if you need params
proxies = {'https': 'https://proxyaddress.com:3128'} #or some other port
response = requests.get(url, proxies=proxies, params=params)
我希望这能帮助到别人。