我想从源(链接或文件,...(获取html,并且 从中查找值。 HTML 格式为 :
<!doctype html>
<html>
<body>
<main>
<section id="serp">
<div>
<article>a</article>
<article>b</article>
<article>c</article>
<article>d</article>
</div>
</section>
</main>
</body>
</html>
首先,我使用了Cheerio。 根据我写的文档:
const cheerio = require('cheerio');
const $ = cheerio.load(myhtml);
const content = $('#serp div').children();
console.log(content); // null
根据相同的程序,我使用了X射线和jsdom,但所有这些都 打印空。
我已经做了以下工作:
let myhtml = `<!doctype html>
<html>
<body>
<main>
<section id="serp">
<div>
<article>a</article>
<article>b</article>
<article>c</article>
<article>d</article>
</div>
</section>
</main>
</body>
</html>`;
const cheerio = require('cheerio');
const $ = cheerio.load(myhtml);
const content = $('#serp div').children();
console.log(content);
console.log(`html: ${content.html()}`);
它将以下内容输出到控制台:
initialize {
'0':
{ type: 'tag',
name: 'article',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [ [Object] ],
parent:
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [Object],
parent: [Object],
prev: [Object],
next: [Object] },
prev:
{ type: 'text',
data: 'n ',
parent: [Object],
prev: null,
next: [Circular] },
next:
{ type: 'text',
data: 'n ',
parent: [Object],
prev: [Circular],
next: [Object] } },
'1':
{ type: 'tag',
name: 'article',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [ [Object] ],
parent:
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [Object],
parent: [Object],
prev: [Object],
next: [Object] },
prev:
{ type: 'text',
data: 'n ',
parent: [Object],
prev: [Object],
next: [Circular] },
next:
{ type: 'text',
data: 'n ',
parent: [Object],
prev: [Circular],
next: [Object] } },
'2':
{ type: 'tag',
name: 'article',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [ [Object] ],
parent:
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [Object],
parent: [Object],
prev: [Object],
next: [Object] },
prev:
{ type: 'text',
data: 'n ',
parent: [Object],
prev: [Object],
next: [Circular] },
next:
{ type: 'text',
data: 'n ',
parent: [Object],
prev: [Circular],
next: [Object] } },
'3':
{ type: 'tag',
name: 'article',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [ [Object] ],
parent:
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [Object],
parent: [Object],
prev: [Object],
next: [Object] },
prev:
{ type: 'text',
data: 'n ',
parent: [Object],
prev: [Object],
next: [Circular] },
next:
{ type: 'text',
data: 'n ',
parent: [Object],
prev: [Circular],
next: null } },
options:
{ withDomLvl1: true,
normalizeWhitespace: false,
xml: false,
decodeEntities: true },
_root:
initialize {
'0':
{ type: 'root',
name: 'root',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [Object],
parent: null,
prev: null,
next: null },
options:
{ withDomLvl1: true,
normalizeWhitespace: false,
xml: false,
decodeEntities: true },
length: 1,
_root: [Circular] },
length: 4,
prevObject:
initialize {
'0':
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: {},
'x-attribsNamespace': {},
'x-attribsPrefix': {},
children: [Object],
parent: [Object],
prev: [Object],
next: [Object] },
options:
{ withDomLvl1: true,
normalizeWhitespace: false,
xml: false,
decodeEntities: true },
_root: initialize { '0': [Object], options: [Object], length: 1, _root: [Circular] },
length: 1,
prevObject: initialize { '0': [Object], options: [Object], length: 1, _root: [Circular] } } }
html: a
Process finished with exit code 0