Unicode和lxml将数据对象化



我得到了标准的UnicodeEncodeError: 'ascii' codec can't encode character u'xa9' in position 15: ordinal not in range(128),但我无法获得正常的补救措施。

我已经尝试将第72行更改为gName = getattr(root, u"name", "").encode('utf-8').strip(),它给了我AttributeError: no such child: encode。我在第84行尝试了gName.encode('utf-8'),得到了相同的结果。

test_data = ("""
    <rsp stat="ok">
        <group id="34427465497@N01" iconserver="1" iconfarm="1" lang=""
                     ispoolmoderated="0" is_member="0" is_moderator="0" is_admin="0">
            <name>DanceFloor - [ © Plz Read Rules ]</name>
            <members>245</members>
            <pool_count>133</pool_count>
            <topic_count>106</topic_count>
            <restrictions photos_ok="1" videos_ok="1" images_ok="1" screens_ok="1"
                 art_ok="1" safe_ok="1" moderate_ok="0" restricted_ok="0" has_geo="0" />
        </group>
    </rsp>
""")
from lxml import html, etree, objectify
import re
import time
import flickrapi
g, u, gt = 0, 0, 0
fErr = ''
t = open(r'C:MircPythonTemp Filestext.xml', 'r')
td = t.read()
tst = 1   # # True for test data, False for live data
ext = 0   # # True for external test data, False for internal
if tst:
    if ext:
        t = open(r'C:MircPythonTemp Filestext.xml', 'r')
        td = t.read()
    else:
        td = test_data
    api_key = 'test'
    api_secret =  'test'
else:
        KeyFile = open(KF_path, 'r')
        for line in KeyFile:
            # line = line [:-2]
            if 'api_key' in line.lower():
                api_key = line.strip("api_key = '")[:-2]
            if 'api_secret' in line.lower():
                api_secret = line.strip("api_secret = '")[:-2]
        KeyFile.close()
flickr = flickrapi.FlickrAPI(api_key, api_secret, format='rest')
api_key = api_secret = ""
uNSIDfile = 'Mirc! dl filesFav TestGrp.ttxt'
Output_File = 'C:Mirc! dl filesFav TestGrpOut.ttxt'
InFile = open(uNSIDfile, 'r')
OutFile = open(Output_File, 'w')
for gid in InFile:
    gid = gid[:-1]
    if tst:
        Grp = objectify.fromstring(td)
    else:
        Grp = objectify.fromstring(flickr.groups_getInfo(group_id=gid))
    fErr = ''
    mn   = Grp.xpath(u'//group')[0].attrib
    res  = Grp.xpath(u'//restrictions')[0].attrib
    root = Grp.group
    gNSID   = gid
    gAlias  = ""
##### gName is here
    gName   = getattr(root, u"name", "")
    Images  = getattr(root, 'pool_count', (-1))
    Mbr     = getattr(root, "members", (-1))
    Sft     = int(res["safe_ok"]) + (int(res["moderate_ok"]) * 2) + 
                        (int(res["restricted_ok"]) * 4)
    Is_Mem  = int(mn["is_member"]) + (int(mn["is_moderator"]) * 2) + 
                        (int(mn["is_admin"]) * 4)
    O18     = True if Sft > 3 else False
    Priv    = getattr(root, "privacy", (-1))
#####  Error comes here  ############
    ttup = '"{}"t"{}"t"{}"t'.format(gNSID, gAlias, gName)
    tup = '{0}{6}{1}{6}{2}{6}{3}{6}{4}{6}{5}n'.format(ttup, Images, Mbr, Sft, O18,
                         Priv, "t")
    OutFile = open(Output_File, mode='ab')
    OutFile.write(tup)
    OutFile.close()
InFile.close()
if tst and ext:
    t.close()

为什么不尝试以utf8 的形式写入文件

OutFile = open(Output_File,'ab', 'utf8')
OutFile.write(tup)
OutFile.close()

最新更新