合并节点名称中具有"_LIST"共享相同名称的 XML 节点,并在根级别合并



下面是输入XML,我正在寻找所需的输出 -

<xml>
<a>
<element0>987</element0>
</a>
<a>
<a_list_one>
<a_lag_one>
<element1>123</element1>
<element2>456</element2>
</a_lag_one>
</a_list_one>
<a_list_one>
<a_lag_one>
<element1>789</element1>
<element2>678</element2>
</a_lag_one>                
</a_list_one>
<a_list_two>
<a_lag_two>
<a_list_three>
<a_lag_three>
<element3>570</element3>
<element4>678</element4>
</a_lag_three>
</a_list_three>
<a_list_three>
<a_lag_three>
<element3>989</element3>
<element4>231</element4>
</a_lag_three>
</a_list_three>
</a_lag_two>
<a_lag_two>
<a_list_three>
<a_lag_three>
<element3>570</element3>
<element4>678</element4>
</a_lag_three>
</a_list_three>
<a_list_three>
<a_lag_three>
<element3>9873</element3>
<element4>278</element4>
</a_lag_three>
</a_list_three>
<a_list_four>
<a_lag_four>
<element5>9121</element5>
<element6>9879</element6>
</a_lag_four>
</a_list_four>
<a_list_three>
<a_lag_four>
<element5>098</element5>
<element6>231</element6>
</a_lag_four>
</a_list_three>
</a_lag_two>
</a_list_two>
<a_list_four>
<a_lag_four>
<element5>654</element5>
<element6>7665</element6>
</a_lag_four>
</a_list_four>
</a>
<b>
<b_list_one>
<b_lag_one>
<element8>123</element8>
<element9>456</element9>
</b_lag_one>
</b_list_one>
</b>
<b>
<b_list_one>
<b_lag_one>
<element8>789</element8>
<element9>678</element9>
</b_lag_one>            
</b_list_one>
</b>
</xml>

所需的 XML 是:

<xml>
<a>
<element0>987</element0>
<a_list_one>
<a_lag_one>
<element1>123</element1>
<element2>456</element2>
</a_lag_one>
<a_lag_one>
<element1>789</element1>
<element2>678</element2>
</a_lag_one>
</a_list_one>
<a_list_two>
<a_lag_two>
<a_list_three>
<a_lag_three>
<element3>570</element3>
<element4>678</element4>
</a_lag_three>
<a_lag_three>
<element3>989</element3>
<element4>231</element4>
</a_lag_three>
</a_list_three>
</a_lag_two>
<a_lag_two>
<a_list_three>
<a_lag_three>
<element3>570</element3>
<element4>678</element4>
</a_lag_three>
<a_lag_three>
<element3>9873</element3>
<element4>278</element4>
</a_lag_three>
<a_lag_four>
<element5>098</element5>
<element6>231</element6>
</a_lag_four>
</a_list_three>
<a_list_four>
<a_lag_four>
<element5>9121</element5>
<element6>9879</element6>
</a_lag_four>
</a_list_four>
</a_lag_two>
</a_list_two>
<a_list_four>
<a_lag_four>
<element5>654</element5>
<element6>7665</element6>
</a_lag_four>
</a_list_four>      
</a>
<b>
<b_list_one>
<b_lag_one>
<element8>123</element8>
<element9>456</element9>
</b_lag_one>
<b_lag_one>
<element8>789</element8>
<element9>678</element9>
</b_lag_one>            
</b_list_one>
</b>
</xml>

我正在寻找 XSL,它可以转换为所需的输出。在这里,共享相同名称且还包含"_LIST"的节点应合并在一起。但是,此逻辑应仅在第一个"_LIST"节点内发生,不应应用于内部节点。其次,在根级别,还要合并节点。例如,这里应该只有一个"a"标签和"b"标签。请帮忙。

下面是 XSLT 1.0 的解决方案

<xsl:stylesheet version="1.0"
xmlns:msxml="urn:schemas-microsoft-com:xslt"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes" omit-xml-declaration="yes"/>
<xsl:key name="xmlChildren" match="xml/*" use="local-name()"/>
<xsl:key name="list" match="*[contains(local-name(),'_list')]" use="generate-id(..)"/>
<!-- Select the child nodes of the xml node. -->
<xsl:template match="xml/*">
<!-- Get the name of the current node. -->
<xsl:variable name="localName" select="local-name()"/>
<!-- Is this the first child of the xml node with this name? -->
<xsl:if test="generate-id(.) = generate-id(key('xmlChildren', $localName)[1])">
<xsl:copy>
<!-- Output all of the xml grandchild nodes of any xml child node with same name as the current node. -->
<xsl:apply-templates select="key('xmlChildren', $localName)/*">
<xsl:with-param name="parentName" select="$localName"/>
</xsl:apply-templates>
</xsl:copy>
</xsl:if>
</xsl:template>
<!-- Select the nodes with a local name that contains '_list'. -->
<xsl:template match="*[contains(local-name(),'_list')]">
<xsl:param name="parentName"/>
<xsl:variable name="parentID" select="generate-id(..)"/>
<!-- Get the name of the current node. -->
<xsl:variable name="localName" select="local-name()"/>
<xsl:choose>
<!-- Is this list a first generation grandchild of xml? -->
<xsl:when test="parent::*/parent::xml">
<!-- Is this the first instance of this list? -->
<xsl:if test="generate-id(.) = generate-id(key('xmlChildren', $parentName)/*[local-name()=$localName][1])">
<xsl:copy>
<xsl:apply-templates select="key('xmlChildren', $parentName)/*[local-name()=$localName]/*"/>
</xsl:copy>
</xsl:if> 
</xsl:when>
<xsl:otherwise>
<!-- Is this the first instance of this list? -->
<xsl:if test="generate-id(.) = generate-id(key('list', $parentID)[local-name()=$localName][1])">
<xsl:copy>
<xsl:apply-templates select="key('list', $parentID)[local-name() = $localName]/*"/>
</xsl:copy>
</xsl:if>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>  
</xsl:stylesheet>

我认为在 XQuery 3 中您可以使用两个嵌套的for .. group by表达式来解决这个问题:

/*/element { node-name(.) } {
for $child-element at $pos in *
group by $element-name := node-name($child-element)
order by $pos[1]
return
element { $element-name } {
for $grand-child at $pos in $child-element/*
let $grand-child-name := node-name($grand-child)
group by $key := $grand-child-name, $handle := contains(string($grand-child-name), '_list')
order by $pos[1]
return
if ($handle)
then
element { $key } {
$grand-child/*
}
else $grand-child
}
}

https://xqueryfiddle.liberty-development.net/pPgCcor

对于 XSLT 1,我会像已经建议的解决方案一样键,但我认为为每个键使用两种不同的匹配模式会更容易,一种用于由创建副本并处理组的子节点的键建立的组中的第一项,第二个为空以禁止处理组的重复元素名称:

<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:key name="child-group" match="/*/*" use="name()"/>
<xsl:key name="grand-child-group" match="/*/*/*[contains(local-name(), '_list')]" use="name()"/>
<xsl:template match="@* | node()">
<xsl:copy>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="/*/*[generate-id() = generate-id(key('child-group', name())[1])]">
<xsl:copy>
<xsl:apply-templates select="key('child-group', name())/node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="/*/*[not(generate-id() = generate-id(key('child-group', name())[1]))]"/>
<xsl:template match="/*/*/*[contains(local-name(), '_list')][generate-id() = generate-id(key('grand-child-group', name())[1])]">
<xsl:copy>
<xsl:apply-templates select="key('grand-child-group', name())/node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="/*/*/*[contains(local-name(), '_list')][not(generate-id() = generate-id(key('grand-child-group', name())[1]))]"/>  
</xsl:stylesheet>

https://xsltfiddle.liberty-development.net/jyH9rN5

根据您的评论,我还尝试使 XQuery 3 解决方案递归:

declare function local:group($elements as element()*) as element()*
{
for $child-element at $pos in $elements
let $child-name := node-name($child-element)
group by $name-group := $child-name, $match := contains(string($child-name), '_list')
order by $pos[1]
return
if ($match)
then element { $name-group } {
local:group($child-element/*)
}
else if (not($child-element/*))
then $child-element
else $child-element/element {$name-group} { local:group(*) }
};
/*/element { node-name(.) } {
for $child-element at $pos in *
group by $element-name := node-name($child-element)
order by $pos[1]
return element { $element-name } {
local:group($child-element/*)
}
}

https://xqueryfiddle.liberty-development.net/pPgCcor/1

最新更新