import urllib2
def download(url,user_agent = 'wswp',num_retries=2):
print 'downloading:',url
headers = {'User-Agent': 'Mozilla/5.0'}
request = urllib2.Request(url,headers=headers)
try:
html = urllib2.urlopen(request).read()
except urllib2.URLError as e:
print "download error:"
html = None
if num_retries>0:
if hasattr(e,'code') and 500<=e.code<600:
print "e.code = ",e.code
return download(url,num_retries-1)
return html
print download("http://www.huaru.cc/mobile/product/xsim.html")
结果:c: python27 python.exe e:/py2_7/untitled1/secondclass_agent下载: http://www.huaru.cc/mobile/product/xsim.html
用退出代码0
完成的过程
在python中,凹痕是关键。
import urllib2
def download(url,user_agent = 'wswp',num_retries=2):
print('downloading:', url)
headers = {'User-Agent': 'Mozilla/5.0'}
request = urllib2.Request(url, headers=headers)
try:
html = urllib2.urlopen(request).read()
except urllib2.URLError as e:
print("download error: {}".format(e))
html = None
if num_retries > 0:
if hasattr(e, 'code') and 500 <= e.code < 600:
print("e.code = ", e.code)
return download(url, num_retries-1)
return html
print download("http://www.huaru.cc/mobile/product/xsim.html")
显示如下:
('downloading:', 'http://www.huaru.cc/mobile/product/xsim.html')
download error: HTTP Error 404: Not Found
None
这是因为网页返回404。
在Python 2.7.10和3.6
上进行了测试检查pep8:https://www.python.org/dev/peps/pep-0008/#id17