在 swift 中解析 HTML



谁能帮我解决这个问题:

我有一个这样格式化的HTTP页面:

<ul class="ms-spbTree" id="_ul">
    <li id="XXX$username">Person0
        <ul id="XXX$username_ul">
            <li id="XXX$username">Person1
                <ul id="XXX$username_ul"></ul>
            </li>
            <li id="XXX$username">Person2
                <ul id="XXX$username_ul"></ul>
            </li>
            <li id="XXX$username">Person3
                <ul id="XXX$username_ul"></ul>
            </li>
            <li id="XXX$username">Person4
                <ul id="XXX$username_ul">
                    <li id="XXX$username">Person5
                        <ul id="XXX$username_ul"></ul>
                    </li>
                    <li id="XXX$username">Person6
                        <ul id="XXX$username_ul"></ul>
                    </li>
                    <li id="XXX$username">Person7
                        <ul id="XXX$username_ul"></ul>
                    </li>
                </ul>
            </li>
            <li id="XXX$username">Person8
                <ul id="XXX$username_ul"></ul>
            </li>
        </ul>
    </li>
</ul>

我的函数是:

func loadTutorial(webString: NSString)
{
    var data : NSData = webString.dataUsingEncoding(NSUTF8StringEncoding)!
    var tutorialsParser = TFHpple(HTMLData: data)
    var tutorialsXPathString = "//ul/li"
    var tutorialNodes = tutorialsParser.searchWithXPathQuery(tutorialsXPathString) as NSArray
    if(tutorialNodes.count == 0)
    {
        println("empty here")
    }
    else
    {
        for element in tutorialNodes
        {
            var elementTwo: TFHppleElement = element as TFHppleElement
         //   var tutorial = Tutorial()
            println("(elementTwo.firstChild.content)")
       //     println(elementTwo.raw)
            let userscanner = NSScanner(string:elementTwo.raw)
            var userscanned: NSString?
            if userscanner.scanUpToString("li id="", intoString:nil){
                userscanner.scanString("li id="", intoString:nil)
                if userscanner.scanUpToString("">", intoString:&userscanned) {
                    let newResult: String = userscanned as String
                    println("NewResultValue: (newResult)")
                }
            }
        }
    }
}

但我得到的是以下列表:

Person0
Person1
Person2
Person3
Person4
Person5
Person6
Person7
Person8

我只想检索

Person0
Person1
Person2
Person3
Person4
Person8

或者我列出,所以我知道 Person5、6 和 7 有 Person4 作为经理。

我试过

var tutorialsXPathString = "//ul[not(contains(@style, 'style="display: none;"'))/li"

因为所有子人(5,6,7)的标签中都有这个,但它不起作用:(

任何帮助将不胜感激。

几个想法:

  1. //的使用说"在HTML中的任何位置找到它"。如果要控制要考虑的级别,只需使用/并从文档的根目录开始遵循此级别。例如,要获得第二级,而不是第一级或第三级,您可以执行以下操作:

    let tutorialsParser = TFHpple(HTMLData: data)
    let tutorialsXPathString = "/html/body/ul/li/ul/li"
    if let tutorialNodes = tutorialsParser.searchWithXPathQuery(tutorialsXPathString) as? [TFHppleElement] {
        for element in tutorialNodes {
            let content = element.firstChild.content.stringByTrimmingCharactersInSet(NSCharacterSet.whitespaceAndNewlineCharacterSet())
            let identifier = element.attributes["id"] as String
            println("id = (identifier); content = (content)")
        }
    }
    
  2. 请注意,我不确定您为什么使用扫描程序,但是如果您想要元素的属性,则可以使用attributes方法。

  3. 我还将tutorialNodes定义为TFHppleElement对象的数组,这稍微简化了for循环。

  4. 如果你想要顶级/ul/li后跟第二级,而不是第三级,你可以执行以下操作:

    let tutorialsParser = TFHpple(HTMLData: data)
    let tutorialsXPathString = "/html/body/ul/li"
    if let tutorialNodes = tutorialsParser.searchWithXPathQuery(tutorialsXPathString) as? [TFHppleElement] {
        for element in tutorialNodes {
            let content = element.firstChild.content.stringByTrimmingCharactersInSet(NSCharacterSet.whitespaceAndNewlineCharacterSet())
            let identifier = element.attributes["id"] as String
            println("id = (identifier); content = (content)")
            if let ul = element.childrenWithTagName("ul") as? [TFHppleElement] {
                if let li = ul.first?.childrenWithTagName("li") as? [TFHppleElement] {
                    for element in li {
                        let content = element.firstChild.content.stringByTrimmingCharactersInSet(NSCharacterSet.whitespaceAndNewlineCharacterSet())
                        let identifier = element.attributes["id"] as String
                        println("  child id = (identifier); content = (content)")
                    }
                }
            }
        }
    }
    

    或者你可以做这样的事情:

    let tutorialsParser = TFHpple(HTMLData: data)
    let tutorialsXPathString = "/html/body/ul/li"
    if let tutorialNodes = tutorialsParser.searchWithXPathQuery(tutorialsXPathString) as? [TFHppleElement] {
        for element in tutorialNodes {
            let content = element.firstChild.content.stringByTrimmingCharactersInSet(NSCharacterSet.whitespaceAndNewlineCharacterSet())
            let identifier = element.attributes["id"] as String
            println("id = (identifier); content = (content)")
            if let children = element.searchWithXPathQuery("/html/body/li/ul/li") as? [TFHppleElement] {
                for element in children {
                    let content = element.firstChild.content.stringByTrimmingCharactersInSet(NSCharacterSet.whitespaceAndNewlineCharacterSet())
                    let identifier = element.attributes["id"] as String
                    println("  child id = (identifier); content = (content)")
                }
            }
        }
    }
    

相关内容

  • 没有找到相关文章

最新更新