使用 python 脚本比较 XML 文件



有 2 个 XML 文件,需要比较这两个 XML 以验证数据是否相同。现在,书籍 ID 在两个 XML 中的顺序不同。 但是脚本应该能够根据书籍 ID 比较这两个 XML。 有人可以帮助我吗?

输出:两个 XML 文件中的数据相同。

文本1.xml

<?xml version="1.0"?>
<?xml-stylesheet href="catalog.xsl" type="text/xsl"?>
<!DOCTYPE catalog SYSTEM "catalog.dtd">
<catalog>
<product description="Cardigan Sweater" product_image="cardigan.jpg">
<catalog_item gender="Men's">
<item_number>QWZ5671</item_number>
<cool_number>QWZ5671</cool_number>
<price>39.5</price>
<size description="Medium">
<color_swatch image="red_cardigan.jpg">Red</color_swatch>
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
</size>
</catalog_item>
<catalog_item gender="Women's">
<item_number>RRX986</item_number>
<price>42.50</price>
<size description="Small">
<color_swatch image="red_cardigan.jpg">Red</color_swatch>
<color_swatch image="navy_cardigan.jpg">Nay</color_swatch>
<color_swatch image="burgundy_cardigan.jpg">Burundy</color_swatch>
</size>
</catalog_item>
</product>
</catalog>

文本2.xml

<?xml version="1.0"?>
<?xml-stylesheet href="catalog.xsl" type="text/xsl"?>
<!DOCTYPE catalog SYSTEM "catalog.dtd">
<catalog>
<product description="Cardigan Sweater" product_image="cardigan.jpg">
<catalog_item gender="Women's">
<item_number>RRX9856</item_number>
<price>42.50</price>
<size description="Small">
<color_swatch image="red_cardigan.jpg">Red</color_swatch>
<color_swatch image="navy_cardigan.jpg">Navy</color_swatch>
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
</size>
</catalog_item>
<catalog_item gender="Men's">
<item_number>QWZ5671</item_number>
<price>39.95</price>
<size description="Medium">
<color_swatch image="red_cardigan.jpg">Red</color_swatch>
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
</size>
</catalog_item>      
</product>
</catalog>

试试这个:

from lxml import etree
root_1 = etree.parse('test1.xml').getroot()
root_2 = etree.parse('test2.xml').getroot()
d1, d2 = [], []
for node in root_1.findall('.//catalog_item'):
for x in node.iter():
if x.attrib:
d1.append(x.attrib.values()[0])
if x.text.strip():
d1.append(x.text.strip())
for node in root_2.findall('.//catalog_item'):
for x in node.iter():
if x.attrib:
d2.append(x.attrib.values()[0])
if x.text.strip():
d2.append(x.text.strip())
print('Data is same in both XML files') if set(d1) == set(d2) else print('Data is different in both XML files')

另一种方法

它将存储女巫属性是不同的字典。

from lxml import etree
from collections import defaultdict
root_1 = etree.parse('test1.xml').getroot()
root_2 = etree.parse('test2.xml').getroot()
d1, d2 = [], []
for node in root_1.findall('.//catalog_item'):
item = defaultdict(list)
for x in node.iter():
if x.attrib:
item[x.attrib.keys()[0]].append(x.attrib.values()[0])
if x.text.strip():
item[x.tag].append(x.text.strip())
d1.append(dict(item))
for node in root_2.findall('.//catalog_item'):
item = defaultdict(list)
for x in node.iter():
if x.attrib:
item[x.attrib.keys()[0]].append(x.attrib.values()[0])
if x.text.strip():
item[x.tag].append(x.text.strip())
d2.append(dict(item))
d1 = sorted(d1, key = lambda x: x['item_number'])
d2 = sorted(d2, key = lambda x: x['item_number'])
res_dict = defaultdict(list)
for x, y in zip(d1, d2):
for key1, key2 in zip(x.keys(), y.keys()):
if key1 == key2 and sorted(x[key1]) != sorted(y[key2]):
res_dict[x['item_number'][0]].append({key1: list(set(x[key1]) ^ set(y[key2]))})
print('Data is same in both XML files') if res_dict == {} else print('Data is different in both XML files n', dict(res_dict))

最新更新