使用 Nokogiri 和 Ruby 解析具有自定义结构的 XML



可能的重复项:
Nokogiri/Xpath 命名空间查询

我有一个这样的XML文件:

<?xml version="1.0" encoding="UTF-8"?>
<Structure xmlns="http://www.SDMX.org/resources/SDMXML/schemas/v2_0/message" xmlns:Structure="http://www.SDMX.org/resources/SDMXML/schemas/v2_0/structure" xmlns:common="http://www.SDMX.org/resources/SDMXML/schemas/v2_0/common" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.SDMX.org/resources/SDMXML/schemas/v2_0/structure SDMXMessage.xsd">
  <Header>
    <ID>001-0001</ID>
    <Test>true</Test>
    <Name xml:lang="en">Producer deliveries of major grains, Canada and selected provinces</Name>
    <Name xml:lang="fr">Livraisons des producteurs des principaux grains, Canada et certaines provinces</Name>
    <Prepared>2012-11-22T18:42:35-05:00</Prepared>
    <Sender id="STATCAN" />
    <Receiver id="STATCAN" />
  </Header>
  <CodeLists>
    <structure:CodeList xmlns:structure="http://www.SDMX.org/resources/SDMXML/schemas/v2_0/structure" id="GEO" agencyID="STATCAN" version="1.0" isFinal="false">
      <structure:Name xml:lang="en">Geography</structure:Name>
      <structure:Name xml:lang="fr">Géographie</structure:Name>
      <structure:Code value="7">
        <structure:Description xml:lang="en">Canada</structure:Description>
        <structure:Description xml:lang="fr">Canada</structure:Description>
      </structure:Code>
      <structure:Code value="8">
        <structure:Description xml:lang="en">Eastern Canada</structure:Description>
        <structure:Description xml:lang="fr">L'Est du Canada</structure:Description>
      </structure:Code>
      <structure:Code value="9">
        <structure:Description xml:lang="en">Quebec</structure:Description>
        <structure:Description xml:lang="fr">Québec</structure:Description>
      </structure:Code>
      <structure:Code value="10">
        <structure:Description xml:lang="en">Ontario</structure:Description>
        <structure:Description xml:lang="fr">Ontario</structure:Description>
      </structure:Code>
      <structure:Code value="3">
        <structure:Description xml:lang="en">Western Canada</structure:Description>
        <structure:Description xml:lang="fr">L'Ouest du Canada</structure:Description>
      </structure:Code>
      <structure:Code value="2">
        <structure:Description xml:lang="en">Prairie provinces</structure:Description>
        <structure:Description xml:lang="fr">Provinces des Prairies</structure:Description>
      </structure:Code>
      <structure:Code value="4">
        <structure:Description xml:lang="en">Manitoba</structure:Description>
        <structure:Description xml:lang="fr">Manitoba</structure:Description>
      </structure:Code>
      <structure:Code value="5">
        <structure:Description xml:lang="en">Saskatchewan</structure:Description>
        <structure:Description xml:lang="fr">Saskatchewan</structure:Description>
      </structure:Code>
      <structure:Code value="6">
        <structure:Description xml:lang="en">Alberta</structure:Description>
        <structure:Description xml:lang="fr">Alberta</structure:Description>
      </structure:Code>
      <structure:Code value="1">
        <structure:Description xml:lang="en">British Columbia</structure:Description>
        <structure:Description xml:lang="fr">Colombie-Britannique</structure:Description>
      </structure:Code>
    </structure:CodeList>
    <structure:CodeList xmlns:structure="http://www.SDMX.org/resources/SDMXML/schemas/v2_0/structure" id="TYP" agencyID="STATCAN" version="1.0" isFinal="false">
      <structure:Name xml:lang="en">Type of grain</structure:Name>
      <structure:Name xml:lang="fr">Type de céréales</structure:Name>
      <structure:Code value="1">
        <structure:Description xml:lang="en">All grains, total</structure:Description>
        <structure:Description xml:lang="fr">Céréales, totaux</structure:Description>
      </structure:Code>
      <structure:Code value="2">
        <structure:Description xml:lang="en">Wheat, total</structure:Description>
        <structure:Description xml:lang="fr">Blé, total</structure:Description>
      </structure:Code>
      <structure:Code value="3">
        <structure:Description xml:lang="en">Wheat, excluding durum</structure:Description>
        <structure:Description xml:lang="fr">Blé (sauf le blé dur)</structure:Description>
      </structure:Code>
      <structure:Code value="4">
        <structure:Description xml:lang="en">Durum wheat</structure:Description>
        <structure:Description xml:lang="fr">Blé dur</structure:Description>
      </structure:Code>
      <structure:Code value="5">
        <structure:Description xml:lang="en">Oats</structure:Description>
        <structure:Description xml:lang="fr">Avoine</structure:Description>
      </structure:Code>
      <structure:Code value="6">
        <structure:Description xml:lang="en">Barley</structure:Description>
        <structure:Description xml:lang="fr">Orge</structure:Description>
      </structure:Code>
      <structure:Code value="7">
        <structure:Description xml:lang="en">Rye</structure:Description>
        <structure:Description xml:lang="fr">Seigle</structure:Description>
      </structure:Code>
      <structure:Code value="8">
        <structure:Description xml:lang="en">Flaxseed</structure:Description>
        <structure:Description xml:lang="fr">Lin</structure:Description>
      </structure:Code>
      <structure:Code value="9">
        <structure:Description xml:lang="en">Canola (rapeseed)</structure:Description>
        <structure:Description xml:lang="fr">Canola (colza)</structure:Description>
      </structure:Code>
    </structure:CodeList>
  </CodeLists>
  <Concepts>
    <structure:Concept xmlns:structure="http://www.SDMX.org/resources/SDMXML/schemas/v2_0/structure" />
  </Concepts>
  <KeyFamilies>
    <structure:KeyFamily xmlns:structure="http://www.SDMX.org/resources/SDMXML/schemas/v2_0/structure" version="1.0" isFinal="false">
      <structure:Components>
        <structure:Dimension codelist="GEO" />
        <structure:Dimension codelist="TYP" />
        <structure:TimeDimension conceptRef="TIME_PERIOD" />
        <structure:PrimaryMeasure conceptRef="OBS_VALUE" />
      </structure:Components>
    </structure:KeyFamily>
  </KeyFamilies>
</Structure>

如何使用Nokogiri从中提取不同的CodeLists及其相关的NameDescription?我可以获取Header元素的NameSenderReceiver的值,但是由于每个元素开头的structure:关键字,无法获取CodeList块中的值。

有人可以帮忙吗?

如果您使用的是 XPath,则应为这些元素提供前缀。 以下是一些可能的查询:

ns = {"structure" => "http://www.SDMX.org/resources/SDMXML/schemas/v2_0/structure"}
doc.xpath("//structure:CodeList", ns)
doc.xpath("//structure:CodeList/structure:Name", ns)
doc.xpath("//structure:CodeList/structure:Code/structure:Description", ns)

要使前缀"结构"起作用,您需要通过将哈希作为第二个参数传递来让 Nokogiri 知道前缀和它引用的 URI 之间的映射。

编辑:根据注释更新以正确使用命名空间。

相关内容

  • 没有找到相关文章

最新更新