使用Python3处理XML



我正在python3中从事XML处理程序。我的程序输出有问题。

我尝试以任何可能的方式更改循环,但输出不是应该的。由于我是Python的新手

我的代码就是这样:

import csv
import xml.etree.ElementTree as ET
def xml_t(inputfile,outputfile):
    tree = ET.parse(inputfile)
    root = tree.getroot()
    for Node in root.iter('Node'):
        nodeName = Node.get('nodeName')
        for LicenseItem in root.iter('LicenseItem'):
            LicenseItem_child = LicenseItem.get('LicenseItem')
            LicenseDesc = LicenseItem.get('LicenseDesc')
            AssignedValue = LicenseItem.get('AssignedValue')
            LicenseStatus = LicenseItem.get('LicenseStatus')
            LicenseEndDate = LicenseItem.get('LicenseEndDate')
            if LicenseItem_child == LicenseDesc == AssignedValue == LicenseStatus == LicenseEndDate == None:
                continue
            output_csv = (nodeName,LicenseItem_child,LicenseDesc,AssignedValue,LicenseStatus,LicenseEndDate)
            with open (outputfile,'a') as files:
                write = csv.writer(files, lineterminator ='r', delimiter = ',')
                write.writerow(output_csv)

if __name__ == '__main__':
    inputfile = 'filename.xml'
    outputfile = 'output.csv'
    xml_t(inputfile,outputfile)

我期望的输出就像:

nodeName,LicenseItem,LicenseDesc,AssignedValue,LicenseStatus,LicenseEndDate
TH1S9303VAS04,,,,,
ES1S9306RANH2,LLE0xxxxx1,NQA Client & NQA Server,1,Normal,PERMANENT
ES1S9306RANH2,LLE0Ixxxx1,IPV6 GTLV2,1,Normal,PERMANENT
ES1S9306RANH2,LLE0xxxxx1,MPLS,1,Normal,PERMANENT
TH1S9303DCN02,LLE0xxxxx1,NQA Client & NQA Server,1,Normal,PERMANENT
TH1S9303DCN02,LLE0xxxxx1,IPV6 GTLV2,1,Normal,PERMANENT
TH1S9303DCN02,LLE0xxxxx1,MPLS,1,Normal,PERMANENT
TH1S5310CSH2,,,,,
TH1S9303DCN01,LLE0yyyyy1,NQA Client & NQA Server,1,Normal,PERMANENT
TH1S9303DCN01,LLE0yyyyy1,IPV6 GTLV2,1,Normal,PERMANENT
TH1S9303DCN01,LLE0yyyyy1,MPLS,1,Normal,PERMANENT
TH1S5310CSH1,,,,,
ES1S9306RANH1,LLE0ttttt1,NQA Client & NQA Server,1,Normal,PERMANENT
ES1S9306RANH1,LLE0ttttt1,IPV6 GTLV2,1,Normal,PERMANENT
ES1S9306RANH1,LLE0ttttt1,MPLS,1,Normal,PERMANENT
,,,,,

和我正在处理的XML文件是:

?xml version="1.0" encoding="UTF-8"?>
<LicenseInfo>
    <Node nodeName="TH1S9303VAS04">
        <LicenseItem details="TH1S9303VAS04" />
    </Node>
    <Node nodeName="ES1S9306RANH2">
        <LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="NQA Client &amp; NQA Server" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
        <LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="IPV6 GTLV2" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
        <LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="MPLS" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
    </Node>
    <Node nodeName="TH1S9303DCN02">
            <LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="NQA Client &amp; NQA Server" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
            <LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="IPV6 GTLV2" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
            <LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="MPLS" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
    </Node>
    <Node nodeName="TH1S5310CSH2">
            <LicenseItem details="TH1S5310CSH2" />
    </Node>
    <Node nodeName="TH1S9303DCN01">
            <LicenseItem LicenseItem="LLE0yyyyy1" LicenseDesc="NQA Client &amp; NQA Server" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
            <LicenseItem LicenseItem="LLE0yyyyy1" LicenseDesc="IPV6 GTLV2" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
            <LicenseItem LicenseItem="LLE0yyyyy1" LicenseDesc="MPLS" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
    </Node>

输出而不包括标签" nodeName"正是我想要的,但是当我在输出文件中添加nodename时,事物开始变得wierd!

我将适合任何帮助。感谢Advnace

root.iter('nodeName'(将返回所有节点wiht wiht wit'nodeName',因此您的程序在每个" node"中都在所有"许可证"上循环。您可以按照下面的方式更改代码,输出将是您想要的:

import csv
import xml.etree.ElementTree as ET
def xml_t(inputfile,outputfile):
    tree = ET.parse(inputfile)
    root = tree.getroot()
    for Node in root.iter('Node'):
        nodeName = Node.get('nodeName')
        for LicenseItem in Node:    # the only change, change root.iter() to Node
            LicenseItem_child = LicenseItem.get('LicenseItem')
            LicenseDesc = LicenseItem.get('LicenseDesc')
            AssignedValue = LicenseItem.get('AssignedValue')
            LicenseStatus = LicenseItem.get('LicenseStatus')
            LicenseEndDate = LicenseItem.get('LicenseEndDate')
            if LicenseItem_child == LicenseDesc == AssignedValue == LicenseStatus == LicenseEndDate == None:
                continue
            output_csv = (nodeName,LicenseItem_child,LicenseDesc,AssignedValue,LicenseStatus,LicenseEndDate)
            with open (outputfile,'a') as files:
                write = csv.writer(files, lineterminator ='r', delimiter = ',')
                write.writerow(output_csv)

if __name__ == '__main__':
    inputfile = 'filename.xml'
    outputfile = 'output.csv'
    xml_t(inputfile,outputfile)

此处(请注意,代码不使用任何外部库(。
该代码使用" N/A",其中有一个空值。您可以更换它。

import xml.etree.ElementTree as ET
xml = '''<LicenseInfo>
    <Node nodeName="TH1S9303VAS04">
        <LicenseItem details="TH1S9303VAS04" />
    </Node>
    <Node nodeName="ES1S9306RANH2">
        <LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="NQA Client &amp; NQA Server" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
        <LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="IPV6 GTLV2" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
        <LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="MPLS" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
    </Node>
    <Node nodeName="TH1S9303DCN02">
            <LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="NQA Client &amp; NQA Server" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
            <LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="IPV6 GTLV2" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
            <LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="MPLS" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
    </Node>
    <Node nodeName="TH1S5310CSH2">
            <LicenseItem details="TH1S5310CSH2" />
    </Node>
    <Node nodeName="TH1S9303DCN01">
            <LicenseItem LicenseItem="LLE0yyyyy1" LicenseDesc="NQA Client &amp; NQA Server" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
            <LicenseItem LicenseItem="LLE0yyyyy1" LicenseDesc="IPV6 GTLV2" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
            <LicenseItem LicenseItem="LLE0yyyyy1" LicenseDesc="MPLS" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
    </Node>
    </LicenseInfo>
'''
ATTRS = ['LicenseItem', 'LicenseDesc', 'AssignedValue', 'LicenseStatus', 'LicenseEndDate']
root = ET.fromstring(xml)
nodes = root.findall('.//Node')
lines = []
for node in nodes:
    node_name = node.attrib['nodeName']
    license_items = node.findall('.//LicenseItem')
    for item in license_items:
        line = [node_name]
        for attr in ATTRS:
            line.append(item.attrib.get(attr, 'N/A'))
        lines.append(line)
for l in lines:
    print(l)

输出

['TH1S9303VAS04', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A']
['ES1S9306RANH2', 'LLE0xxxxx1', 'NQA Client & NQA Server', '1', 'Normal', 'PERMANENT']
['ES1S9306RANH2', 'LLE0xxxxx1', 'IPV6 GTLV2', '1', 'Normal', 'PERMANENT']
['ES1S9306RANH2', 'LLE0xxxxx1', 'MPLS', '1', 'Normal', 'PERMANENT']
['TH1S9303DCN02', 'LLE0xxxxx1', 'NQA Client & NQA Server', '1', 'Normal', 'PERMANENT']
['TH1S9303DCN02', 'LLE0xxxxx1', 'IPV6 GTLV2', '1', 'Normal', 'PERMANENT']
['TH1S9303DCN02', 'LLE0xxxxx1', 'MPLS', '1', 'Normal', 'PERMANENT']
['TH1S5310CSH2', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A']
['TH1S9303DCN01', 'LLE0yyyyy1', 'NQA Client & NQA Server', '1', 'Normal', 'PERMANENT']
['TH1S9303DCN01', 'LLE0yyyyy1', 'IPV6 GTLV2', '1', 'Normal', 'PERMANENT']
['TH1S9303DCN01', 'LLE0yyyyy1', 'MPLS', '1', 'Normal', 'PERMANENT']

最新更新