替换基于csv列的xml值



我正在尝试基于csv列替换xml文件中的类名。实际上xml文件是注释文件。

这是xml:的格式

<annotation>
<folder>./test_xmls</folder>
<filename>000048_Panorama.jpg</filename>
<path>./images000048_Panorama.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>4000</width>
<height>2000</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>AAAA</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>

我的csv包含original列,并更改为`column。

格式为:

|original | change to|
-------------------------- 
| AAAA  | class_A |
..................

Csv有20000多行,其中包括80000个xml文件中的所有<name>AAAA</name>

我想将xml的名称(如AAAA(与csv列相匹配。如果它存在于original列中,那么我想用change to中的相应值来替换,如AAAAclass_A

我试着写python代码,但它不起作用。我的代码在这里

import xml.etree.ElementTree as ET
import os
import pandas as pd
from collections import defaultdict
import csv
from csv import reader

with open('table.csv', mode='r') as inp:
reader = csv.reader(inp)
dict_from_csv = {rows[0]:rows[2] for rows in reader}
#print(dict_from_csv)
root_path = "./xmls"
xml_list = sorted(os.listdir(root_path))
for xml_file in xml_list:
xml_path = os.path.join(root_path,xml_file)
# parse xml file
tree = ET.parse(xml_path)
# get root node
root = tree.getroot()
for member in root.findall('object'):
sub_child = member[0].text
print(sub_child)
for key, value in dict_from_csv.items():
if sub_child in key:
sub_child = sub_child.replace(sub_child, value)
#print(xml)
xml_file.write(sub_child)  
print("Classes are changed : " + xml_path)

如有任何帮助,我们将不胜感激。

谢谢

以下代码应该可以执行您想要的操作:

import lxml.html   # check https://pypi.org/project/lxml/
from csv import reader
from os.path import exists
import glob

def update_xml(path: str) -> None:
with open('./convertions.csv', 'r') as convertions, open(path, 'r') as annotation:  # noqa: E501
tree = lxml.html.fromstring(annotation.read())
csv_reader = reader(convertions)
for idx, row in enumerate(csv_reader, start=1):
if idx == 1:
continue
original, change_to = row
tags = tree.xpath(f".//name[text()='{original}']")
for tag in tags:
tag.text = change_to
print(f'Changed class {original} to {change_to} in {path}')
with open(path, 'wb') as annotation:
new_content = lxml.html.tostring(tree)
if new_content.strip():
annotation.write(new_content)
print(f'Processing on {path} done')

if __name__ == '__main__':
for xml_file in glob.glob('*.xml'):
if exists(xml_file):
update_xml(path=xml_file)

annotation.xml:

<annotation>
<folder>./test_xmls</folder>
<filename>000048_Panorama.jpg</filename>
<path>./images000048_Panorama.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>4000</width>
<height>2000</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>AAAA</name>
<name>BBBB</name>
<name>CCCC</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox></bndbox></object></annotation>

conversions.csv:

original,change to
AAAA,class_A
BBBB,class_B
CCCC,class_C
import lxml.html  # check https://pypi.org/project/lxml/
from csv import reader
import xml.etree.ElementTree as ET
if __name__ == '__main__':
with open('./table.csv', 'r') as convertions:
csv_reader = reader(convertions)
root_path = "./xmls"
xml_list = sorted(os.listdir(root_path))
for xml_file in xml_list:
xml_path = os.path.join(root_path,xml_file)
#tree = lxml.html.fromstring(xml_path.read())
# parse xml file
tree = ET.parse(xml_path)
for idx, row in enumerate(csv_reader, start=1):
if idx == 1:
continue
original_col, change_to = row
tags = tree.xpath(f".//name[text()='{original_col}']")
for tag in tags:
tag.text = change_to
print(f'Changed class {original_col} to {change_to}')

new_content = lxml.html.tostring(tree)
print(new_content)
if new_content.strip():
tree.write(new_content)

相关内容

  • 没有找到相关文章

最新更新