类型错误:decode() 参数 'encoding' 必须是 str,而不是 None



我一直在尝试制作一个脚本来检查是否存在一个随机网站,如果它确实存在,则打开它,但我不断收到一堆不同的错误。这是我的代码:

from urllib.request import urlopen
from html.parser import HTMLParser
import os
from http.client import HTTPConnection
from urllib.parse import urljoin, urlunparse
from urllib.request import urlretrieve
from html.parser import HTMLParser    
class ImageParser(HTMLParser):
def handle_starttag(self, tag, attrs):
if tag != 'img':
return
if not hasattr(self, 'result'):
self.result = []
for name, value in attrs:
if name == 'src':
self.result.append(value)
def download_image(url, data):
if not os.path.exists('DOWNLOAD'):
os.makedirs('DOWNLOAD')
parser = ImageParser()
parser.feed(data)
dataSet = set(x for x in parser.result)
for x in sorted(dataSet) :
imageUrl = urljoin(url, x)
basename = os.path.basename(imageUrl)
targetFile = os.path.join('DOWNLOAD', basename)
print("Downloading...", imageUrl)
urlretrieve(imageUrl, targetFile)
def main():
host = "www.jejunu.ac.kr"
conn = HTTPConnection(host)
conn.request("GET", '/main')
resp = conn.getresponse()
charset = resp.msg.get_param('charset')
data = resp.read().decode(charset)
conn.close()
print("n>>>>>>>>> Download Images from", host)
url = urlunparse(('http', host, '', '', '', ''))
download_image(url, data)
if __name__ == '__main__':
main()

这是一个错误:

Traceback (most recent call last):
File "C:UsersuserDesktop웹프로그래밍 (1)test.py", line 43, in <module>
main()
File "C:UsersuserDesktop웹프로그래밍 (1)test.py", line 37, in main
data = resp.read().decode(charset)
TypeError: decode() argument 'encoding' must be str, not None

可能没有提供字符集,很可能是utf8,将main更改为以下内容:

def main():
host = "www.jejunu.ac.kr"
conn = HTTPConnection(host)
conn.request("GET", '/main')
resp = conn.getresponse()
charset = resp.msg.get_param('charset')
if charset is None:
charset = 'utf8'
data = resp.read().decode(charset)
conn.close()
print("n>>>>>>>>> Download Images from", host)
url = urlunparse(('http', host, '', '', '', ''))
download_image(url, data)

或者你可以尝试一种稍微不同的方法:

from urllib import request
reqURL = request.urlopen("www.jejunu.ac.kr")
encoding = reqURL.headers.get_content_charset()
data = reqURL.read().decode(charset)

最新更新