有时我可以有效地处理socket.timeout,尽管有时我收到套接字超时错误并且我的脚本突然停止...我的异常处理中是否缺少什么?怎么会直接走下去呢?
在以下任一代码段中随机发生:
第一个片段:
for _ in range(max_retries):
try:
req = Request(url, headers={'User-Agent' :'Mozilla/5.0'})
response = urlopen(req,timeout=5)
break
except error.URLError as err:
print("URL that generated the error code: ", url)
print("Error description:",err.reason)
except error.HTTPError as err:
print("URL that generated the error code: ", url)
print("Error code:", err.code)
print("Error description:", err.reason)
except socket.timeout:
print("URL that generated the error code: ", url)
print("Error description: No response.")
except socket.error:
print("URL that generated the error code: ", url)
print("Error description: Socket error.")
if response.getheader('Content-Type').startswith('text/html'):
htmlBytes = response.read()
htmlString = htmlBytes.decode("utf-8")
self.feed(htmlString)
第二个片段
for _ in range(max_retries):
try:
req = Request(i, headers={'User-Agent' :'Mozilla/5.0'})
with urlopen(req,timeout=5) as response, open(aux, 'wb') as out_file:
shutil.copyfileobj(response, out_file)
with open(path, fname), 'a') as f:
f.write(("link" + str(intaux) + "-" + auxstr + str(index) + i[-4:] + " --- " + metadata[index%batch] + 'n'))
break
except error.URLError as err:
print("URL that generated the error code: ", i)
print("Error description:",err.reason)
except error.HTTPError as err:
print("URL that generated the error code: ", i)
print("Error code:", err.code)
print("Error description:", err.reason)
except socket.timeout:
print("URL that generated the error code: ", i)
print("Error description: No response.")
except socket.error:
print("URL that generated the error code: ", i)
print("Error description: Socket error.")
错误:
Traceback (most recent call last):
File "/mydir/crawler.py", line 202, in <module>
spider("urls.txt", maxPages=0, debug=1, dailyRequests=9600)
File "/mydir/crawler.py", line 142, in spider
parser.getLinks(url + "?start=" + str(currbot) + "&tab=" + auxstr,auxstr)
File "/mydir/crawler.py", line 81, in getLinks
htmlBytes = response.read()
File "/usr/lib/python3.5/http/client.py", line 455, in read
return self._readall_chunked()
File "/usr/lib/python3.5/http/client.py", line 561, in _readall_chunked
value.append(self._safe_read(chunk_left))
File "/usr/lib/python3.5/http/client.py", line 607, in _safe_read
chunk = self.fp.read(min(amt, MAXAMOUNT))
File "/usr/lib/python3.5/socket.py", line 575, in readinto
return self._sock.recv_into(b)
File "/usr/lib/python3.5/ssl.py", line 929, in recv_into
return self.read(nbytes, buffer)
File "/usr/lib/python3.5/ssl.py", line 791, in read
return self._sslobj.read(len, buffer)
File "/usr/lib/python3.5/ssl.py", line 575, in read
v = self._sslobj.read(len, buffer)
socket.timeout: The read operation timed out
编辑:
我注意到我错过了几行代码,这要归功于@tdelaney将它们添加到上面的代码中,如果您发布解决方案或如果您有更好的解决方法,我会将答案标记为正确
溶液:
for _ in range(max_retries):
try:
req = Request(url, headers={'User-Agent' :'Mozilla/5.0'})
response = urlopen(req,timeout=5)
break
except error.URLError as err:
print("URL that generated the error code: ", url)
print("Error description:",err.reason)
except error.HTTPError as err:
print("URL that generated the error code: ", url)
print("Error code:", err.code)
print("Error description:", err.reason)
except socket.timeout:
print("URL that generated the error code: ", url)
print("Error description: No response.")
except socket.error:
print("URL that generated the error code: ", url)
print("Error description: Socket error.")
if response.getheader('Content-Type').startswith('text/html'):
for _ in range(max_retries):
try:
htmlBytes = response.read()
htmlString = htmlBytes.decode("utf-8")
self.feed(htmlString)
break
except error.URLError as err:
print("URL that generated the error code: ", url)
print("Error description:",err.reason)
except error.HTTPError as err:
print("URL that generated the error code: ", url)
print("Error code:", err.code)
print("Error description:", err.reason)
except socket.timeout:
print("URL that generated the error code: ", url)
print("Error description: No response.")
except socket.error:
print("URL that generated the error code: ", url)
print("Error description: Socket error.")
python "Requests" 库使用自己的一组异常来处理与 HTTP 协议和套接字相关的错误。它会自动将从其嵌入式socket((函数返回的异常映射到requests.exceptions中定义的自定义异常。
因此,由此引起的异常...
import Requests
try:
req = Request("http://stackoverflow.com", headers={'User-Agent' :'Mozilla/5.0'})
urlopen(req,timeout=5)
except Timeout:
print "Session Timed Out!"
等效于由此引发的异常...
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect(("127.0.0.1", 80))
except socket.timeout:
print "Session Timed Out"
您的固定代码...
for _ in range(max_retries):
try:
req = Request(url, headers={'User-Agent' :'Mozilla/5.0'})
response = urlopen(req,timeout=5)
break
except error.URLError as err:
print("URL that generated the error code: ", url)
print("Error description:",err.reason)
except error.HTTPError as err:
print("URL that generated the error code: ", url)
print("Error code:", err.code)
print("Error description:", err.reason)
except Timeout:
print("URL that generated the error code: ", url)
print("Error description: Session timed out.")
except ConnectionError:
print("URL that generated the error code: ", url)
print("Error description: Socket error timed out.")