从非结束节点HTML元素(如LI元素)获取文本值的Javascript



我正在尝试编写一个脚本,该脚本将遍历HTML源代码,并创建DOM的JSON文件,然后使用d3.js在树视图中显示该文件。我遇到的问题是,不仅要显示元素(TITLE、p、LI等),还要显示元素的值。如果我只对端节点元素(没有子元素)执行此操作,那么这就足够简单了。但是,我有时需要它作为父对象,如下面的UL中的项目II和B.

        <ul class="level-1">
            <li>I</li>
            <li>II
              <ul class="level-2">
                <li>A</li>
                <li>B
                  <ul class="level-3">
                    <li>1</li>
                    <li>2</li>
                    <li>3</li>
                  </ul>
                </li>
                <li>C</li>
              </ul>
            </li>
            <li>III</li>
        </ul>   

从我下面的函数来看,这一行适用于端节点,但如果不打印每个孩子的所有内容,我就无法为父母找到这一点。

   $output.append(', "value": "' + $(child).text() + '"}');

我已经尝试了jQuery中的一些第一个孩子的东西,但无法使其发挥作用。我还想让它尽可能通用,以便在任何html源中提供。换句话说,我不想说如果(nodeName='LI')then-do列出项目特定的东西-

  var createJsonOutput = function(domObject) {
    var $currentChildren = domObject.children();
    var $childrenCnt = $currentChildren.length
      $.each($currentChildren, function(idx,child) {
         $output.append('{"name": "' + child.nodeName + '"');
         //does the child have children?
         if ($(child).children().length > 0) {
                $output.append(',"children": [');
                createJsonOutput($(child));
                $output.append(']}');
         } else if (child.nodeName != 'TEXTAREA' && child.nodeName != 'SCRIPT') {
                $output.append(', "value": "' + $(child).text() + '"}');
         } else {
                $output.append('}');
         }
         if ((idx + 1) < $childrenCnt) {
            $output.append(',');
         }
      });
  };
  createJsonOutput($('html'));

示例(未格式化)JSON:

{"name": "HTML","children": [{"name": "HEAD","children": [{"name": "META", "value": ""},{"name": "TITLE", "value": "Node-Link Tree"},{"name": "SCRIPT"},{"name": "SCRIPT"},{"name": "LINK", "value": ""}]},{"name": "BODY","children": [{"name": "DIV","children": [{"name": "UL","children": [{"name": "LI", "value": "I"},{"name": "LI","children": [{"name": "UL","children": [{"name": "LI", "value": "A"},{"name": "LI","children": [{"name": "UL","children": [{"name": "LI", "value": "1"},{"name": "LI", "value": "2"},{"name": "LI", "value": "3"}]}]},{"name": "LI", "value": "C"}]}]},{"name": "LI", "value": "III"}]}]},{"name": "DIV","children": [{"name": "TEXTAREA"},{"name": "P", "value": "tree time!"}]},{"name": "DIV", "value": ""},{"name": "SCRIPT"}]}]}

您可以编写一个函数来只返回当前元素的文本

jQuery.fn.justtext = function() { 
    return $(this).clone()
            .children()
            .remove()
            .end()
            .text();
};

http://viralpatel.net/blogs/2011/02/jquery-get-text-element-without-child-element.html

children()只返回元素节点,但例如第二个<li/>中的II是一个文本节点。

您可以使用contents(),它还返回文本节点

我做了一些似乎可以满足你需求的东西。

function objectify(node, recursing) {
    var n, out = [];
    if (node.shift && node.shift.call) {
        while (n = node.shift()) {
            out.push(objectify(n, true));
        }
        return out;
    }
    n = {name: node.tagName};
    if (node.children && node.children.length) {
        n.children = objectify(out.slice.call(node.children), true);
    } else if (node.textContent) {
        n.value = node.textContent;
    }
    return recursing ? n : JSON.stringify(n);
}
// test it
console.log(objectify(document.getElementsByClassName('level-1')[0]));​

如果我正确地理解了您遇到的问题,这将通过仅在节点没有子节点的情况下获取节点的文本内容来避免问题。您可能可以将同样的东西应用于您的代码。

在上面的UL上运行的输出示例:

{"name":"UL","children":[{"name":"LI","value":"I"},{"name":"LI","children":[{"name":"UL","children":[{"name":"LI","value":"A"},{"name":"LI","children":[{"name":"UL","children":[{"name":"LI","value":"1"},{"name":"LI","value":"2"},{"name":"LI","value":"3"}]}]},{"name":"LI","value":"C"}]}]},{"name":"LI","value":"III"}]}

这对你有用吗?

http://jsfiddle.net/hCDjd/

最新更新