我想做一个QAbstractItemModel
,从一系列 Xml 文件中获取数据,这些文件都位于同一个目录中。由于 PyQt5 不再支持 QDomDocument
(或者至少我找不到让它工作的方法(,我不得不求助于QXmlStreamReader
。我正在将数据本身放入一个巨大的python字典中(嗯...按照计算机科学标准,它并不完全是巨大的(,它包含各种键下的其他字典,以创建一个树状结构。
这是我到目前为止的代码:
class DataModel(QtCore.QAbstractItemModel):
def __init__(self, settingsDirectory, parent = None):
super(DataModel, self).__init__(parent)
settingsDirectory.setNameFilters(["*.xml"])
files = settingsDirectory.entryList()
print(files)
self.data = {}
for i in range(len(files)):
filePath = str(files[i])
file = QtCore.QFile(settingsDirectory.absolutePath() + "/" + str(filePath))
fileOpens = file.open(file.ReadOnly | file.Text)
if fileOpens:
parser = QtCore.QXmlStreamReader(file)
print("--------Beginning parsing----------")
print("Reading file: "+str(filePath))
while not parser.atEnd():
parser.readNext()
token = parser.tokenType()
print("Reading tag: " + str(parser.name()))
print("Tag type is: " + str(token))
if token == parser.StartDocument:
self.data["XML Version"] = str(parser.documentVersion())
self.data["XML Encoding"] = str(parser.documentEncoding())
if token == parser.StartElement:
tokenName = parser.name()
if parser.tokenType() == parser.Characters:
tokenText = parser.text()
print("This tag has a text value: " + str(tokenText))
print("current data: " + str(self.data))
if token == parser.EndElement:
if tokenText != None:
self.data[tokenName] = tokenText
else:
self.data[tokenName] = {}
tokenName = None
tokenText = None
else:
print(self.tr("xml file did not open properly"))
print(self.data)
虽然这段代码不会崩溃或任何东西,但它确实有一些我不知道为什么会发生或如何解决的问题:
1.由于某种原因,tokenName
永远不会从None
改变 - 已解决
2.self.data
字典的结构不会变成树状的,不知道为什么:|
示例数据:
<?xml version="1.0" encoding="UTF-8"?>
<tag>
<description>This is a text</description>
<types>
<typesAllowed></typesAllowed>
<typesEnabled></typesEnabled>
</types>
</tag>
产生最终结果:
{'XML Encoding': 'UTF-8', 'XML Version': '1.0', 'typesAllowed': 'ntt', None: 'n', 'typesEnabled': 'ntt', 'description': 'This is a text'}
而不是通缉的:
{'XML Encoding': 'UTF-8', 'XML Version': '1.0', 'tag': {'description': 'this is a text', typesAllowed': 'ntt', 'typesEnabled': 'ntt'}}
我知道这些问题很可能是我对StreamReader
如何工作原理了解不足的结果,因此欢迎任何和所有提示:)
编辑 1:
tokenName
更改是一个愚蠢的定位错误,愚蠢的我。 代码反映了修复。
编辑2:
添加了示例和示例输出
这个问题现在已经解决了;我对这个问题采取了不同的方法。
我基本上取了一个列表,如果StartElement
令牌具有属性parseAs
== "element"
,我将tuple
的(name, {})
附加到其中,并将一个计算的字符串(parseText
函数(放入最后一个tuple
的字典中。当它遇到一个EndElement
令牌时,它会找到带有 name
== 的tuple
,tokenName
(当前令牌的名称(将其作为带有键name
的条目放入前一个tuple
的字典中。
关于它是如何工作的还有一些细节,但如果我包括它们,我可能会过于复杂地解释(它如何知道何时向self.data
提交currData
等(。
class DataModel(QtCore.QAbstractItemModel):
def __init__(self, settingsDirectory, parent = None):
super(DataModel, self).__init__(parent)
settingsDirectory.setNameFilters(["*.xml"])
files = settingsDirectory.entryList()
print(files)
self.data = {}
self.parsingLog = {}
for i in range(len(files)):
filePath = str(files[i])
file = QtCore.QFile(settingsDirectory.absolutePath() + "/" + str(filePath))
fileOpens = file.open(file.ReadOnly | file.Text)
if fileOpens:
parser = QtCore.QXmlStreamReader(file)
currData = []
haveStartToken = False
print(self.tr("--------Beginning parsing--------"))
print(self.tr("Reading file: "+str(filePath)))
print(self.tr("---------------------------------"))
while not parser.atEnd():
if not parser.hasError():
parser.readNext()
token = parser.tokenType()
print(self.tr("--------------------"))
print(self.tr("Token type: " + str(self.printTokenType(token))))
if token == parser.StartElement:
tokenName = parser.name()
attributes = parser.attributes()
parseAs = attributes.value("parseAs")
print(self.tr("Reading StartElement: " + str(tokenName)))
print(self.tr("parseAs: " + str(parseAs)))
if parseAs == "text":
textValue = self.parseText(parser.readElementText())
print(self.tr("Text Value: " + str(textValue)))
if len(currData) != 0:
currData[len(currData)-1][1][tokenName] = textValue
else:
print(self.tr("*******Terminating application*******"))
print(self.tr("Reason: currData is empty"))
print(self.tr("*******Terminating application*******"))
sys.exit()
elif parseAs == "element":
currData.append((tokenName, {}))
else:
print(self.tr("******WARNING******"))
print(self.tr("parseAs attribute is not given correctly"))
print(self.tr("******WARNING******"))
print(self.tr("--------------------"))
elif token == parser.EndElement:
tokenName = parser.name()
print(self.tr("Reading EndElement: " + str(tokenName)))
print(self.tr("currData before: " + str(currData)))
if not haveStartToken:
startToken = currData[0][0]
haveStartToken = True
for i in currData:
if i[0] == tokenName:
print(self.tr("Closing token: " + str(tokenName)))
if i[0] != startToken:
currData[len(currData)-2][1][tokenName] = currData[len(currData)-1][1]
del currData[len(currData)-1]
print(self.tr("currData after: " + str(currData)))
print(self.tr("--------------------"))
elif i[0] == startToken:
print(self.tr("This is the final token, writing to self.data"), end = "")
self.data[startToken] = currData[0][1]
for i in range(5):
time.sleep(0.25)
print(self.tr("."), end = "")
print(self.tr("done."))
print(self.tr("--------------------"))
elif token == parser.Characters:
print(self.tr("Characters value: " + str(parser.text())))
print(self.tr("--------------------"))
elif token == parser.StartDocument:
self.parsingLog["File: "+str(filePath)] = {}
self.parsingLog["File: "+str(filePath)]["XML Version"] = str(parser.documentVersion())
self.parsingLog["File: "+str(filePath)]["XML Encoding"] = str(parser.documentEncoding())
print(self.tr("File Version: " + str(self.parsingLog["File: "+str(filePath)]["XML Version"])))
print(self.tr("File Encoding: " + str(self.parsingLog["File: "+str(filePath)]["XML Encoding"])))
elif token == parser.EndDocument:
print(self.tr("Cleaning up"), end = "")
for i in range(5):
time.sleep(0.25)
print(self.tr("."), end = "")
time.sleep(0.1)
print(self.tr("done."))
print(self.tr("self.data: " + str(self.data)))
print(self.tr("types of data: yesNo (should be str) - " +
str(type(self.data["building"]["specialSlot"]["yesNo"])) +
" - id - should be int - " + str(type(self.data["building"]["specialSlot"]["id"])) +
" - isItFloat - should be float - " + str(type(self.data["building"]["specialSlot"]["isItFloat"]))))
print(self.tr("--------------------"))
else:
print(self.tr("XML file is not well-formatted"))
else:
print(self.tr("xml file did not open properly"))
def parseText(self, text):
if isinstance(text, str):
if text == "":
return str(text)
for i in text:
if i not in ("0123456789."):
return str(text)
for j in text:
if j not in ("0123456789"):
return float(text)
return int(text)
else:
return ValueError
def printTokenType(self, token):
if token == QtCore.QXmlStreamReader.NoToken:
return "NoToken"
elif token == 1:
return "Invalid"
elif token == QtCore.QXmlStreamReader.StartDocument:
return "StartDocument"
elif token == QtCore.QXmlStreamReader.EndDocument:
return "EndDocument"
elif token == QtCore.QXmlStreamReader.StartElement:
return "StartElement"
elif token == QtCore.QXmlStreamReader.EndElement:
return "EndElement"
elif token == QtCore.QXmlStreamReader.Characters:
return "Characters"
elif token == QtCore.QXmlStreamReader.Comment:
return "Comment"
elif token == QtCore.QXmlStreamReader.DTD:
return "DTD"
elif token == QtCore.QXmlStreamReader.EntityReference:
return "EntityReference"
elif token == QtCore.QXmlStreamReader.ProcessingInstruction:
return "ProcessingInstruction"