我试图使用超级代理和cheerio从kijiji抓取数据进行学习,同时我收到错误"无法读取未定义的属性'父'"。(我尝试了craigslist,这很好)。它发生在我得到正确的结果"对象"之后。啦似乎有些不对劲。有人对此有任何想法吗?提前感谢您的帮助。
[ERROR] 17:30:24 TypeError
TypeError: Cannot read property 'parent' of undefined
at Function.exports.update (C:Userszhi_xienodejsneomovienode_modulesch
eeriolibparse.js:55:25)
at module.exports (C:Userszhi_xienodejsneomovienode_modulescheeriolib
parse.js:17:11)
at Function.exports.load (C:Userszhi_xienodejsneomovienode_moduleschee
riolibstatic.js:19:14)
at C:Userszhi_xienodejsneomovieroutescrawlerusers.js:35:27
at Request.callback (C:Userszhi_xienodejsneomovienode_modulessuperagen
tlibnodeindex.js:746:30)
at Request.<anonymous> (C:Userszhi_xienodejsneomovienode_modulessupera
gentlibnodeindex.js:135:10)
at Request.emit (events.js:95:17)
at ClientRequest.<anonymous> (C:Userszhi_xienodejsneomovienode_modules
superagentlibnodeindex.js:921:12)
at ClientRequest.emit (events.js:95:17)
at HTTPParser.parserOnIncomingClient [as onIncoming] (http.js:1692:21)
以下是代码:
var superagent = require('superagent');
var cheerio = require('cheerio');
var url = require('url');
var kijijiUrl = 'http://www.kijiji.ca/b-immobilier/ville-de-montreal/c34l1700281?ad=wanted';
superagent.get(kijijiUrl).end(function (err, res) {
if (err) {
return console.error(err);
}
var topicUrls = [];
var $ = cheerio.load(res.text);
// get all links in that page
$('div.container-results').children('table').each(function (idx, element) {
var $element = $(element);
var href = url.resolve(kijijiUrl, $element.attr('data-vip-url'));
topicUrls.push(href);
});
for(i = 0; i<topicUrls.length; i++){
superagent.get(topicUrls[i])
.end(function (err, sres) {
//normal err handler
if (err) {
return next(err);
}
var $ = cheerio.load(sres.text);
var object = {};
$('table.ad-attributes').find('tr').not('.divider').each(function(i, element){
var titles = $(element).children('th').text().trim();
var values = $(element).children('td').text().trim();
object[titles] = values;
});
console.log(object);
});
}
})
正如 chrki 已经说过的,cheerio.load()
是用空值(如 null 或空字符串)调用的。在调用 .load()
之前检查该值是否包含某些内容:
if (res.text) {
var $ = cheerio.load(res.text);
} else {
// do something else
}
我有相同的错误消息
$ = cheerio.load(response.Body);
应该是
$ = cheerio.load(response.body);
所以Cheerio没有什么可以加载的。