python从URL,python 3添加 n和b'写JSON文件



我正在从python 2升级到3。此代码在python 2中起作用,但在3中不行。添加nb'

我相信我的python 3错误地编写了我的json文件。

从Web URL提取JSON的代码:

def WebService_As_Source(Source_Id):
    dst_path = SOURCECONFIG.GLOBAL_WorkPath
    bdate = SOURCECONFIG.GLOBAL_DATE
    print ("Extracting from Web Service...tt" + str(datetime.datetime.now()))
    password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()

    uid = 'stack' #save lan your userid         ######################################################## enter UserId
    pwd = 'overflow'#save your lan password ######################################################## enter Password
    top_level_url = SOURCECONFIG.WebServices_URL(Source_Id)
    password_mgr.add_password(None, top_level_url, uid, pwd)
    handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
    # create "opener" (OpenerDirector instance)
    opener = urllib.request.build_opener(handler)
    # use the opener to fetch a URL
    opener.open(top_level_url)
    # Now all calls to urllib2.urlopen use our opener.
    urllib.request.install_opener(opener)
    req = urllib.request.Request(top_level_url)
    response = urllib.request.urlopen(req)
    data = response.readlines()
    otf = open(os.path.join(dst_path + Source_Id+".json"), "w+")  # open text file
    rowcount = 0
    for line in data:
        rowcount = rowcount + 1
        otf.write(str(line))
    otf.close()
    print(Source_Id +" json extracted.tt"+ str(datetime.datetime.now()))

我实际的JSON文件python 3产生的样本:

b'[ {n'b'  "filterFlag" : "",n'b'  "lookup" : "",n'b'  "rule" : "",n'b'  "prefix" : "",n'b'  "validBDRAppName" : "",n'b'  "vendor" : {n'b'    "bookId" : "40302539",n'b'    "bookName" : "NYC",n'b'    "bookStatus" : "ACTIVE",n'b'    "commProductType" : "",n'b'    "businessDate" : "2019-08-05",n'b'    "endOfDay" : null,n'b'    "excludeFromAggregation" : "FALSE",n'b'    "geoLocation" : "",n'b'    "isHoliday" : "",n'b'    "isOSFIBook" : false,n'b'    "legalEntity" : "",n'b'    "location" : "",n'b'    "logicalDate" : "",n'b'    "regulatoryType" : "Trading",n'b'    "reportingLineBookName" : "NYC",n'b'    "reportingLinePathName" : "super/user",n'b'    "riskFilterType" : "USA",n'b'    "statusId" : "",n'b'    "transit" : "",n'b'    "l8n" : ""n'b'  },n'b'  "bdr" : {n'b'    "bookId" : "7447",n'b'    "bookName" : "NY",n'b'    "bookTransit" : "92218",n'b'    "bookStatus" : "ACTIVE",n'b'    "owner" : "",n'b'    "empId" : "",n'b'    "purpose" : "Trading",n'b'    "appName" : "STRATEGY",n'b'    "appCode" : "STRATEGY",n'b'    "transitDesc" : "TOR",n'b'    "appCategory" : "Front Office",n'b'    "bookAppId" : "49512",n'b'    "bookAppName" : "NY",n'b'    "deskName" : "USA",n'b'    "product" : "",n'b'    "asOfDate" : "2019-08-05",n'b'    "legalEntity" : "CANADA",n'b'    "bookAppSecondaryName" : "NY",n'b'    "strategy" : "NY",n'b'    "lhu" : "FCC3",n'b'    "masterBookName" : "NY"n'b'  }n'b'}, {n'b'  "filterFlag" : "",n'b'  "lookup" : "",n'b'  "rule" : "",n'b'  "prefix" : "",n'b'  "validBDRAppName" : "",n'b'  "vendor" : {n'b'    "bookId" : "40296540",n'b'    "bookName" : "LDN",n'b'    "bookStatus" : "ACTIVE",n'b'    "commProductType" : "",n'b'    "businessDate" : "2019-08-05",n'b'    "endOfDay" : null,n'b'    "excludeFromAggregation" : "FALSE",n'b'    "geoLocation" : "",n'b'    "isHoliday" : "",n'b'    "isOSFIBook" : false,n'b'    "legalEntity" : "",n'b'    "location" : "",n'b'    "logicalDate" : "",n'b'    "regulatoryType" : "Trading",n'b'    "reportingLineBookName" : "LDN",n'b'    "reportingLinePathName" : "stack/overflow",n'b'    "riskFilterType" : "NONE",n'b'    "statusId" : "",n'b'    "transit" : "",n'b'    "l8n" : ""n'b'  },n'b'

我的JSON文件python 2产品的样本:

我运行了代码以从Python 2中的URL中获取JSON,它以实际的JSON格式为我提供了数据,并且没有添加b''S和n'S。

[ {
  "filterFlag" : "",
  "lookup" : "",
  "rule" : "",
  "prefix" : "",
  "validBDRAppName" : "",
  "vendor" : {
    "bookId" : "40302539",
    "bookName" : "NYC",
    "bookStatus" : "ACTIVE",
    "commProductType" : "",
    "businessDate" : "2019-08-06",
    "endOfDay" : null,
    "excludeFromAggregation" : "FALSE",
    "geoLocation" : "",
    "isHoliday" : "",
    "isOSFIBook" : false,
    "legalEntity" : "",
    "location" : "",
    "logicalDate" : "",
    "regulatoryType" : "Trading",
    "reportingLineBookName" : "NYC",
    "reportingLinePathName" : "super/user",
    "riskFilterType" : "USA",
    "statusId" : "",
    "transit" : "",
    "l8n" : ""
  },
  "bdr" : {
    "bookId" : "7447",
    "bookName" : "NY",
    "bookTransit" : "92218",
    "bookStatus" : "ACTIVE",
    "owner" : "",
    "empId" : "",
    "purpose" : "Trading",
    "appName" : "STRATEGY",
    "appCode" : "STRATEGY",
    "transitDesc" : "TOR",
    "appCategory" : "Front Office",
    "bookAppId" : "49512",
    "bookAppName" : "NY",
    "deskName" : "USA",
    "product" : "",
    "asOfDate" : "2019-08-06",
    "legalEntity" : "CANADA",
    "bookAppSecondaryName" : "NY",
    "strategy" : "NY",
    "lhu" : "FCC3",
    "masterBookName" : "NY"
  }
}, {
  "filterFlag" : "",
  "lookup" : "",
  "rule" : "",
  "prefix" : "",
  "validBDRAppName" : "",
  "vendor" : {
    "bookId" : "40296540",
    "bookName" : "LDN",
    "bookStatus" : "ACTIVE",
    "commProductType" : "",
    "businessDate" : "2019-08-06",
    "endOfDay" : null,
    "excludeFromAggregation" : "FALSE",
    "geoLocation" : "",
    "isHoliday" : "",
    "isOSFIBook" : false,
    "legalEntity" : "",
    "location" : "",
    "logicalDate" : "",
    "regulatoryType" : "Trading",
    "reportingLineBookName" : "LDN",
    "reportingLinePathName" : "stack/overflow",
    "riskFilterType" : "NONE",
    "statusId" : "",
    "transit" : "",
    "l8n" : ""

任何人都可以帮忙吗?

问题是 response.readlines()(其中 response = urllib.request.urlopen(url)(返回 bytes 的列表。在Python 2中,bytesstr是同一件事,但是在Python 3上,这不再是正确的。所以当你做

        otf.write(str(line))

str()调用是Python 2上的一个no-op,但是在Python 3上,您在字节对象上称为str。这从来都不是您想做的:

>>> import urllib 
... resp = urllib.request.urlopen('https://stackoverflow.com') 
... dat = resp.readlines() 
... first_line = dat[0] 
... print(type(first_line)) 
... print(repr(first_line))
... print(repr(str(first_line)))
<class 'bytes'>
b'<!DOCTYPE html>rn'
"b'<!DOCTYPE html>\r\n'"

,如您所见,第一行是一个字节对象,str(first_line)是一个字面的字符串,从b和一些单引号开始。

取而代之的是,您要做的是 decode 根据其相应的编码。我对网络事物不是很熟悉,所以我不知道最好的方法是正确猜测您要请求的网站所使用的编码,但我确实知道第三方requests库可以为您提供通常直接从响应中直接正确解码JSON。

如果使用urllib,则必须手动进行解码,您需要

之类的东西
        otf.write(line.decode('utf8'))

python 2没有区分字节字符串和Unicode字符串。python 3做,这是b''表示的。

这条线

data = response.readlines()

可以是

data = response.read().decode(response.headers.get_content_charset()).split('/n')

应该弄清楚适当的编码,按照此答案

最新更新