复制一个包含doctype、实体和注释的XML文件XSLT3



我有一系列XML文档,这些文档正在从一个文件夹复制到另一个文件夹,用msxsl.exe 1.1.0.1和XSLT1.0样式表进行转换,然后复制回原始文件夹。我不知道为什么doctype、实体和注释没有被复制,但目前它们是用javascript插入样式表中的。我必须用XSLT3.0替换javascript,这样它才能与saxon HE11一起使用。

doctype是XML中最高的元素,这也是我想要的输出:

<!DOCTYPE dmodule [
<!ENTITY ICN-XXX12-001-01 SYSTEM "ICN-XXX12-001-01.SWF" NDATA swf >
<!ENTITY ICN-XXX49-001-01 SYSTEM "ICN-XXX49-001-01 SYSTEM.CGM" NDATA cgm >
<!ENTITY ICN-AAA235-000000-0-A-001-01 SYSTEM "ICN-AAA235-000000-0-A-001-01.wrlzip" NDATA WRLZIP>
<!NOTATION cgm PUBLIC "-//USA-DOD//NOTATION Computer Graphics Metafile//EN" >
<!NOTATION swf PUBLIC "-//S1000D//NOTATION X-SHOCKWAVE-FLASH 3D Models Encoding//EN" >
<!NOTATION WRLZIP SYSTEM "WRLZIP">
]>
<dmodule xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:dc="http://www.purl.org/dc/elements/1.1/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:xlink="http://www.w3.org/1999/xlink"
xsi:noNamespaceSchemaLocation="../schema/proced.xsd">
<content>
<figure>
<title/>
<graphic infoEntityIdent="ICN-XXX49-001-01"/>
</figure>
<proceduralStep>
<para>Check the brake system function.</para>
<multimedia>
<title>Brake function</title>
<multimediaObject autoPlay="1" fullScreen="0" infoEntityIdent="ICN-XXX12-001-01" multimediaType="other"/>
</multimedia>
</proceduralStep>
<multimedia>
<multimediaObject infoEntityIdent="ICN-AAA235-000000-0-A-001-01"
multimediaType="3D"
xlink:href="ICN-AAA235-000000-0-A-001-01.wrlzip"
xlink:type="simple"/>
</multimedia>
</content>
</dmodule>

实体在@infoEntityIdent上从各种元素引用,但并不总是有文件类型的指示:

<graphic infoEntityIdent="ICN-XXX49-001-01"/>
<multimediaObject autoPlay="1" fullScreen="0" infoEntityIdent="ICN-XXX12-001-01"
multimediaType="other"/>
<multimediaObject infoEntityIdent="ICN-AAA235-000000-0-A-001-01"
multimediaType="3D" xlink:href="ICN-AAA235-000000-0-A-001-01.wrlzip"
xlink:type="simple"/>

我可以正确插入doctype,但我不知道如何访问实体和符号:

<xsl:template match="/">
<xsl:text>&#xA;</xsl:text>
<xsl:text disable-output-escaping="yes">&lt;!DOCTYPE </xsl:text>
<xsl:value-of select="local-name(child::*)"/>
<xsl:text> [</xsl:text> 
<!-- entities and notations here -->  
<xsl:text disable-output-escaping="yes">]&gt;</xsl:text>
<xsl:text>&#xA;</xsl:text>
<xsl:copy> 
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>

电流输出:

<!DOCTYPE dmodule []>
<dmodule xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:dc="http://www.purl.org/dc/elements/1.1/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:xlink="http://www.w3.org/1999/xlink"
xsi:noNamespaceSchemaLocation="../schema/proced.xsd">
<content>
<figure>
<title/>
<graphic infoEntityIdent="ICN-XXX49-001-01"/>
</figure>
<proceduralStep>
<para>Check the brake system function.</para>
<multimedia>
<title>Brake function</title>
<multimediaObject autoPlay="1" fullScreen="0" infoEntityIdent="ICN-XXX12-001-01" multimediaType="other"/>
</multimedia>
</proceduralStep>
<multimedia>
<multimediaObject infoEntityIdent="ICN-AAA235-000000-0-A-001-01"
multimediaType="3D"
xlink:href="ICN-AAA235-000000-0-A-001-01.wrlzip"
xlink:type="simple"/>
</multimedia>
</content>

这是样式表中继承的javascript,它确实给出了所需的结果:

<msxsl:script language="JavaScript" implements-prefix="js">
<![CDATA[
function doctype(root) {
var fso = new ActiveXObject('Scripting.FileSystemObject');
var basepath = unescape(
root
.item(0)
.url
.replace(/^file:/{3,}/, '')
.replace(/^file:/, '')
.replace(/[^/]+$/, '')
.replace(///g, '\')
);
var entities = [];
var notations = [];
var needSVGNotations = false;
if (root.item(0).doctype) {
entities = root.item(0).doctype.entities;
notations = root.item(0).doctype.notations;
}
var syntax = 'n<!DOCTYPE ' + root.item(0).documentElement.nodeName + ' [n';
for (var i = 0; i < entities.length; i++) {
var entity = entities.item(i);
var s = entity.xml;
syntax += s + 'n';
}
for (var i = 0; i < notations.length; i++) {
var s = notations.item(i).xml;
syntax += s + 'n';
}
syntax += ']>n';
return syntax;
}
]]>
</msxsl:script>

这是使用javascript的模板:

<xsl:template match="/">
<xsl:value-of select="js:doctype(.)" disable-output-escaping="yes"/>
<xsl:copy>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>

DTD/DOCTYPE中的信息不是XSLT数据模型的一部分,因此它不会从XML解析器传递给XSLT处理器——这意味着在纯XSLT中无法做到这一点。

Andrew Welch提供了一个名为LEXEV的实用程序,它对XML文档进行预处理,以根据元素和属性创建DTD的表示,然后可以使用XSLT以正常方式进行转换(或保持不变(。以在最后被后处理回DTD语法。我已经很多年没用它了,但我希望它仍然有效。

Martin Honnen关于使用unparsed-entity-uri()的建议就是一切,因为它返回了所需的所有信息。他还调整了我的代码,使其更加简洁,并更正了我最初在analyze-string中使用的RegEx。

<xsl:template match="/">
<xsl:call-template name="getDocType"/>
<xsl:copy>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>

<xsl:template name="getDocType">
<xsl:text>&#xA;</xsl:text>
<xsl:text disable-output-escaping="yes">&lt;!DOCTYPE </xsl:text>
<!-- doctype will either be pm or dmodule -->
<xsl:value-of select="local-name(child::*)"/>
<xsl:text> [</xsl:text>
<!-- get a list of all @infoEntityIdent(s). Declare as attribute()* for unparsed-entity-uri() -->
<xsl:variable name="infoEntityIdent" as="attribute()*" select="(descendant::symbol | descendant::barCode | descendant::multimedia/multimediaObject | descendant::graphic)/@infoEntityIdent"/>
<xsl:text>&#xA;</xsl:text>

<!-- write out the entity declaration -->
<xsl:for-each select="$infoEntityIdent">
<xsl:text disable-output-escaping="yes">&lt;!ENTITY </xsl:text>
<xsl:value-of select="."/>
<xsl:text> SYSTEM "</xsl:text>
<xsl:variable name="uri" as="xs:anyURI" select="unparsed-entity-uri(.)"/>
<!-- remove everything before, and including, the Original directory, leaving any graphics directory -->      
<xsl:value-of select="replace($uri,'^.*Original/(.*)$','$1')"/>
<xsl:text>" NDATA </xsl:text>
<!-- print out the extension -->
<xsl:value-of select="replace($uri, '.*?([^.]+)$', '$1')"/>
<!-- close the declaration -->
<xsl:text disable-output-escaping="yes">&gt;</xsl:text>
<xsl:text>&#xA;</xsl:text>
</xsl:for-each>

<!-- get a list of notations -->
<xsl:variable name="notations" select="$infoEntityIdent ! unparsed-entity-uri(.) ! replace(., '.*?([^.]+)$', '$1')"/>
<xsl:for-each select="distinct-values($notations)">
<xsl:choose>
<xsl:when test="matches(.,'JPE?G','i')">
<xsl:text disable-output-escaping="yes" expand-text="1">&lt;!NOTATION {.} PUBLIC "+//ISBN 0-7923-9432-1::Graphic Notation//NOTATION Joint Photographic Experts Group Raster//EN"&gt;</xsl:text>
</xsl:when>
<xsl:when test="matches(.,'cgm', 'i')">
<xsl:text disable-output-escaping="yes" expand-text="1">&lt;!NOTATION {.} PUBLIC "-//USA-DOD//NOTATION Computer Graphics Metafile//EN"&gt;</xsl:text>
</xsl:when>
<xsl:when test="matches(.,'wrlzip', 'i')">
<xsl:text disable-output-escaping="yes" expand-text="1">&lt;!NOTATION {.} SYSTEM "WRLZIP"&gt;</xsl:text>
</xsl:when>
<xsl:when test="matches(.,'svg', 'i')">
<xsl:text disable-output-escaping="yes" expand-text="1">&lt;!NOTATION {.} SYSTEM "SVG"&gt;</xsl:text>
</xsl:when>
<xsl:when test="matches(.,'tiff', 'i')">
<xsl:text disable-output-escaping="yes" expand-text="1">&lt;!NOTATION {.} SYSTEM "TIFF"&gt;</xsl:text>
</xsl:when>
<xsl:when test="matches(.,'png', 'i')">
<xsl:text disable-output-escaping="yes" expand-text="1">&lt;!NOTATION {.} PUBLIC "-//W3C//NOTATION Portable Network Graphics//EN"&gt;</xsl:text>
</xsl:when>
<xsl:when test="matches(.,'swf', 'i')">
<xsl:text disable-output-escaping="yes" expand-text="1">&lt;!NOTATION {.} PUBLIC "-//S1000D//NOTATION X-SHOCKWAVE-FLASH 3D Models Encoding//EN"&gt;</xsl:text>
</xsl:when>
</xsl:choose>
<xsl:text>&#xA;</xsl:text>
</xsl:for-each>    
<xsl:text disable-output-escaping="yes">]&gt;</xsl:text>
<xsl:text>&#xA;</xsl:text>
<xsl:text>&#xA;</xsl:text>
</xsl:template>

相关内容

  • 没有找到相关文章

最新更新