使用 Node JS 进行 html 操作



我想从源(链接或文件,...(获取html,并且 从中查找值。 HTML 格式为 :

<!doctype html>
<html>
<body>
<main>
<section id="serp">
<div>
<article>a</article>
<article>b</article>
<article>c</article>
<article>d</article>
</div>
</section>
</main>
</body>
</html>

首先,我使用了Cheerio。 根据我写的文档:

const cheerio = require('cheerio');
const $ = cheerio.load(myhtml);
const content = $('#serp div').children();
console.log(content); // null

根据相同的程序,我使用了X射线和jsdom,但所有这些都 打印空。

我已经做了以下工作:

let myhtml = `<!doctype html>
<html>
<body>
<main>
<section id="serp">
<div>
<article>a</article>
<article>b</article>
<article>c</article>
<article>d</article>
</div>
</section>
</main>
</body>
</html>`;
const cheerio = require('cheerio');
const $ = cheerio.load(myhtml);
const content = $('#serp div').children();
console.log(content);
console.log(`html: ${content.html()}`);

它将以下内容输出到控制台:

initialize {
'0': 
{ type: 'tag',
name: 'article',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [ [Object] ],
parent: 
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [Object],
parent: [Object],
prev: [Object],
next: [Object] },
prev: 
{ type: 'text',
data: 'n        ',
parent: [Object],
prev: null,
next: [Circular] },
next: 
{ type: 'text',
data: 'n        ',
parent: [Object],
prev: [Circular],
next: [Object] } },
'1': 
{ type: 'tag',
name: 'article',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [ [Object] ],
parent: 
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [Object],
parent: [Object],
prev: [Object],
next: [Object] },
prev: 
{ type: 'text',
data: 'n        ',
parent: [Object],
prev: [Object],
next: [Circular] },
next: 
{ type: 'text',
data: 'n        ',
parent: [Object],
prev: [Circular],
next: [Object] } },
'2': 
{ type: 'tag',
name: 'article',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [ [Object] ],
parent: 
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [Object],
parent: [Object],
prev: [Object],
next: [Object] },
prev: 
{ type: 'text',
data: 'n        ',
parent: [Object],
prev: [Object],
next: [Circular] },
next: 
{ type: 'text',
data: 'n        ',
parent: [Object],
prev: [Circular],
next: [Object] } },
'3': 
{ type: 'tag',
name: 'article',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [ [Object] ],
parent: 
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [Object],
parent: [Object],
prev: [Object],
next: [Object] },
prev: 
{ type: 'text',
data: 'n        ',
parent: [Object],
prev: [Object],
next: [Circular] },
next: 
{ type: 'text',
data: 'n      ',
parent: [Object],
prev: [Circular],
next: null } },
options: 
{ withDomLvl1: true,
normalizeWhitespace: false,
xml: false,
decodeEntities: true },
_root: 
initialize {
'0': 
{ type: 'root',
name: 'root',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [Object],
parent: null,
prev: null,
next: null },
options: 
{ withDomLvl1: true,
normalizeWhitespace: false,
xml: false,
decodeEntities: true },
length: 1,
_root: [Circular] },
length: 4,
prevObject: 
initialize {
'0': 
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [Object],
parent: [Object],
prev: [Object],
next: [Object] },
options: 
{ withDomLvl1: true,
normalizeWhitespace: false,
xml: false,
decodeEntities: true },
_root: initialize { '0': [Object], options: [Object], length: 1, _root: [Circular] },
length: 1,
prevObject: initialize { '0': [Object], options: [Object], length: 1, _root: [Circular] } } }
html: a
Process finished with exit code 0

最新更新