我正在从python 2升级到3。此代码在python 2中起作用,但在3中不行。添加n
和b'
。
我相信我的python 3错误地编写了我的json文件。
从Web URL提取JSON的代码:
def WebService_As_Source(Source_Id):
dst_path = SOURCECONFIG.GLOBAL_WorkPath
bdate = SOURCECONFIG.GLOBAL_DATE
print ("Extracting from Web Service...tt" + str(datetime.datetime.now()))
password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
uid = 'stack' #save lan your userid ######################################################## enter UserId
pwd = 'overflow'#save your lan password ######################################################## enter Password
top_level_url = SOURCECONFIG.WebServices_URL(Source_Id)
password_mgr.add_password(None, top_level_url, uid, pwd)
handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
# create "opener" (OpenerDirector instance)
opener = urllib.request.build_opener(handler)
# use the opener to fetch a URL
opener.open(top_level_url)
# Now all calls to urllib2.urlopen use our opener.
urllib.request.install_opener(opener)
req = urllib.request.Request(top_level_url)
response = urllib.request.urlopen(req)
data = response.readlines()
otf = open(os.path.join(dst_path + Source_Id+".json"), "w+") # open text file
rowcount = 0
for line in data:
rowcount = rowcount + 1
otf.write(str(line))
otf.close()
print(Source_Id +" json extracted.tt"+ str(datetime.datetime.now()))
我实际的JSON文件python 3产生的样本:
b'[ {n'b' "filterFlag" : "",n'b' "lookup" : "",n'b' "rule" : "",n'b' "prefix" : "",n'b' "validBDRAppName" : "",n'b' "vendor" : {n'b' "bookId" : "40302539",n'b' "bookName" : "NYC",n'b' "bookStatus" : "ACTIVE",n'b' "commProductType" : "",n'b' "businessDate" : "2019-08-05",n'b' "endOfDay" : null,n'b' "excludeFromAggregation" : "FALSE",n'b' "geoLocation" : "",n'b' "isHoliday" : "",n'b' "isOSFIBook" : false,n'b' "legalEntity" : "",n'b' "location" : "",n'b' "logicalDate" : "",n'b' "regulatoryType" : "Trading",n'b' "reportingLineBookName" : "NYC",n'b' "reportingLinePathName" : "super/user",n'b' "riskFilterType" : "USA",n'b' "statusId" : "",n'b' "transit" : "",n'b' "l8n" : ""n'b' },n'b' "bdr" : {n'b' "bookId" : "7447",n'b' "bookName" : "NY",n'b' "bookTransit" : "92218",n'b' "bookStatus" : "ACTIVE",n'b' "owner" : "",n'b' "empId" : "",n'b' "purpose" : "Trading",n'b' "appName" : "STRATEGY",n'b' "appCode" : "STRATEGY",n'b' "transitDesc" : "TOR",n'b' "appCategory" : "Front Office",n'b' "bookAppId" : "49512",n'b' "bookAppName" : "NY",n'b' "deskName" : "USA",n'b' "product" : "",n'b' "asOfDate" : "2019-08-05",n'b' "legalEntity" : "CANADA",n'b' "bookAppSecondaryName" : "NY",n'b' "strategy" : "NY",n'b' "lhu" : "FCC3",n'b' "masterBookName" : "NY"n'b' }n'b'}, {n'b' "filterFlag" : "",n'b' "lookup" : "",n'b' "rule" : "",n'b' "prefix" : "",n'b' "validBDRAppName" : "",n'b' "vendor" : {n'b' "bookId" : "40296540",n'b' "bookName" : "LDN",n'b' "bookStatus" : "ACTIVE",n'b' "commProductType" : "",n'b' "businessDate" : "2019-08-05",n'b' "endOfDay" : null,n'b' "excludeFromAggregation" : "FALSE",n'b' "geoLocation" : "",n'b' "isHoliday" : "",n'b' "isOSFIBook" : false,n'b' "legalEntity" : "",n'b' "location" : "",n'b' "logicalDate" : "",n'b' "regulatoryType" : "Trading",n'b' "reportingLineBookName" : "LDN",n'b' "reportingLinePathName" : "stack/overflow",n'b' "riskFilterType" : "NONE",n'b' "statusId" : "",n'b' "transit" : "",n'b' "l8n" : ""n'b' },n'b'
我的JSON文件python 2产品的样本:
我运行了代码以从Python 2中的URL中获取JSON,它以实际的JSON格式为我提供了数据,并且没有添加b'
'S和n
'S。
[ {
"filterFlag" : "",
"lookup" : "",
"rule" : "",
"prefix" : "",
"validBDRAppName" : "",
"vendor" : {
"bookId" : "40302539",
"bookName" : "NYC",
"bookStatus" : "ACTIVE",
"commProductType" : "",
"businessDate" : "2019-08-06",
"endOfDay" : null,
"excludeFromAggregation" : "FALSE",
"geoLocation" : "",
"isHoliday" : "",
"isOSFIBook" : false,
"legalEntity" : "",
"location" : "",
"logicalDate" : "",
"regulatoryType" : "Trading",
"reportingLineBookName" : "NYC",
"reportingLinePathName" : "super/user",
"riskFilterType" : "USA",
"statusId" : "",
"transit" : "",
"l8n" : ""
},
"bdr" : {
"bookId" : "7447",
"bookName" : "NY",
"bookTransit" : "92218",
"bookStatus" : "ACTIVE",
"owner" : "",
"empId" : "",
"purpose" : "Trading",
"appName" : "STRATEGY",
"appCode" : "STRATEGY",
"transitDesc" : "TOR",
"appCategory" : "Front Office",
"bookAppId" : "49512",
"bookAppName" : "NY",
"deskName" : "USA",
"product" : "",
"asOfDate" : "2019-08-06",
"legalEntity" : "CANADA",
"bookAppSecondaryName" : "NY",
"strategy" : "NY",
"lhu" : "FCC3",
"masterBookName" : "NY"
}
}, {
"filterFlag" : "",
"lookup" : "",
"rule" : "",
"prefix" : "",
"validBDRAppName" : "",
"vendor" : {
"bookId" : "40296540",
"bookName" : "LDN",
"bookStatus" : "ACTIVE",
"commProductType" : "",
"businessDate" : "2019-08-06",
"endOfDay" : null,
"excludeFromAggregation" : "FALSE",
"geoLocation" : "",
"isHoliday" : "",
"isOSFIBook" : false,
"legalEntity" : "",
"location" : "",
"logicalDate" : "",
"regulatoryType" : "Trading",
"reportingLineBookName" : "LDN",
"reportingLinePathName" : "stack/overflow",
"riskFilterType" : "NONE",
"statusId" : "",
"transit" : "",
"l8n" : ""
任何人都可以帮忙吗?
问题是 response.readlines()
(其中 response = urllib.request.urlopen(url)
(返回 bytes 的列表。在Python 2中,bytes
和str
是同一件事,但是在Python 3上,这不再是正确的。所以当你做
otf.write(str(line))
str()
调用是Python 2上的一个no-op,但是在Python 3上,您在字节对象上称为str
。这从来都不是您想做的:
>>> import urllib
... resp = urllib.request.urlopen('https://stackoverflow.com')
... dat = resp.readlines()
... first_line = dat[0]
... print(type(first_line))
... print(repr(first_line))
... print(repr(str(first_line)))
<class 'bytes'>
b'<!DOCTYPE html>rn'
"b'<!DOCTYPE html>\r\n'"
,如您所见,第一行是一个字节对象,str(first_line)
是一个字面的字符串,从b
和一些单引号开始。
取而代之的是,您要做的是 decode 根据其相应的编码。我对网络事物不是很熟悉,所以我不知道最好的方法是正确猜测您要请求的网站所使用的编码,但我确实知道第三方requests
库可以为您提供通常直接从响应中直接正确解码JSON。
如果使用urllib
,则必须手动进行解码,您需要
otf.write(line.decode('utf8'))
python 2没有区分字节字符串和Unicode字符串。python 3做,这是b''
表示的。
这条线
data = response.readlines()
可以是
data = response.read().decode(response.headers.get_content_charset()).split('/n')
应该弄清楚适当的编码,按照此答案