尝试使用噩梦.js从搜索结果中获取数据,但出现错误:"Cannot read property 'click' of undefined"



我正在尝试解决这个练习:

a( 通过在谷歌搜索中输入"数据表",然后单击正确的结果来导航到 https://datatables.net/。
b( 您将找到一个包含一些数据的表格示例。请从表中获取/提取数据到数组中。 在数组中,您将拥有对象。对象将是表中的一行。对象属性将是每个表列中的数据。
c( 请将数组导出为 CSV。

因为我从来没有做过噩梦.js所以我用谷歌搜索并找到了我需要 https://github.com/XanderGriff/webscraping-with-nightmare/blob/master/main.mjs 的东西,但一开始它不起作用,我稍微更改了代码,但仍然有错误。


// Package Definitions
var csvWriter = require('csv-write-stream');
var fs = require('fs');
var Nightmare = require('nightmare');

// Constant Definitions
const GOOGLE = 'https://www.google.com';
const QUERY = 'datatables';
const SEARCHBAR = 'form[action*="/search"] [name=q]';
const SEARCHBUTTON = 'form[action*="/search"] [type=submit]';
const SEARCH_RESULT_ID = 'h3.r > a';
const DATATABLES_LINK = 'https://datatables.net/';
const DATATABLES_SIZE_SELECTOR = 'select[name="example_length"]';
const DATATABLES_DATA_SELECTOR = 'table#example tr';
(async ()=>{
let nightmare;
try {
nightmare = Nightmare({ show: true });
await nightmare
// Navigate to Google
.goto(GOOGLE)
.type(SEARCHBAR, QUERY)
.click(SEARCHBUTTON)
.wait(SEARCH_RESULT_ID)
await nightmare.evaluate( (SEARCH_RESULT_ID, 
DATATABLES_LINK) => {
// filter results based on css selectors to choose link with 
proper url
// indexed at 0 to access element from single-element array 
produced in above instruction
(Array.from(document.querySelectorAll(SEARCH_RESULT_ID))).filter(a 
=> a.href === DATATABLES_LINK)[0].click();
}, SEARCH_RESULT_ID, DATATABLES_LINK)
// Adjust datatable to show all entries
await nightmare
.wait(DATATABLES_SIZE_SELECTOR)
.select(DATATABLES_SIZE_SELECTOR, 100);
// Retrieve values from datatable
await nightmare.evaluate( (DATATABLES_DATA_SELECTOR) => {
let table_rows = 
Array.from(document.querySelectorAll(DATATABLES_DATA_SELECTOR));
// delineate between keys and vals from retrieved table data
let table_keys_row = 
Array.from((table_rows[0]).querySelectorAll('th')).map(e => 
e.innerHTML); //array of strings
let table_data_rows = table_rows.slice(1,58); //array 
of arrays of HTML elements
let array_of_row_objects = [];
let row_object = {};
let formatted_values = [];
table_data_rows.forEach( row => {
// grab innerHTML from each element in the row
formatted_values = 
Array.from(row.querySelectorAll('td')).map(e => e.innerHTML);
// place each of the element values in an object with each value 
associated with its respective key 
row_object = {};
for(i = 0; i < table_keys_row.length; i++) {
row_object[table_keys_row[i]] = 
formatted_values[i];
}
array_of_row_objects.push(row_object);
})
return array_of_row_objects;
}, DATATABLES_DATA_SELECTOR)
// Write to CSV
writer.pipe(fs.createWriteStream('output.csv'));
result.forEach( obj => {
writer.write(obj);
});
writer.end();
console.log("Wrote values to CSV...")
} catch (error) {
console.error(error);
throw error;
} finally {
await nightmare.end();
}
})();

我使用 DEBUG=nightmare 运行代码并收到这个:

nightmare queuing process start +0ms
nightmare queueing action "goto" for https://www.google.com +3ms
nightmare queueing action "type" +0ms
nightmare queueing action "click" +0ms
nightmare queueing action "wait" +0ms
nightmare running +1ms
nightmare queueing action "evaluate" +5s
nightmare running +1ms
{ TypeError: Cannot read property 'click' of undefined
at fn (<anonymous>:8:106)
at javascript (<anonymous>:23:21)
at <anonymous>:38:3
at EventEmitter.electron.ipcRenderer.on 
(/home/anna/automation/node_modules/electron/dist/resources/electron.a 
sar/renderer/web-frame-init.js:36:30)
at emitMany (events.js:147:13)
at EventEmitter.emit (events.js:224:7) code: -1 }
nightmare running +12ms
nightmare electron child process exited with code 0: success! 
+55ms
(node:10610) UnhandledPromiseRejectionWarning: TypeError: Cannot 
read property 'click' of undefined
at fn (<anonymous>:8:106)
at javascript (<anonymous>:23:21)
at <anonymous>:38:3
at EventEmitter.electron.ipcRenderer.on 
(/home/anna/automation/node_modules/electron/dist/resources/electron. 
asar/renderer/web-frame-init.js:36:30)
at emitMany (events.js:147:13)
at EventEmitter.emit (events.js:224:7)
(node:10610) UnhandledPromiseRejectionWarning: Unhandled promise 
rejection. This error originated either by throwing inside of an 
async function without a catch block, or by rejecting a promise 
which was not handled with .catch(). (rejection id: 1)
(node:10610) [DEP0018] DeprecationWarning: Unhandled promise 
rejections are deprecated. In the future, promise rejections that 
are not handled will terminate the Node.js process with a non-zero 
exit code.

如果有人知道或面临这个问题,请帮助我弄清楚。

var csvWriter = require('csv-write-stream');
var writer = csvWriter();
var fs = require('fs');
var Nightmare = require('nightmare');
(async () => {
let nightmare;
try {
nightmare = Nightmare({ show: true });
await nightmare
.goto('https://www.google.com')
.type('form[action*="/search"] [name=q]', 'datatables')
.click('form[action*="/search"] [type=submit]')
.wait(2500)
await nightmare.evaluate( () => {
var link = document.querySelector(".r a");
link.click();
})
await nightmare
.wait('select[name="example_length"]')
.select('select[name="example_length"]', 100);
await nightmare.evaluate( () => {
let tableRows = Array.from(document.querySelectorAll('table#example tr'));
let tableKeysRow = Array.from((tableRows[0]).querySelectorAll('th')).map(e => e.innerHTML); 
let tableDataRows = tableRows.slice(1,58); 
let arrayOfRowObjects = [];
let rowObject = {};
let formattedValues = [];
tableDataRows.forEach( row => {
formattedValues = Array.from(row.querySelectorAll('td')).map(e => e.innerHTML);
rowObject = {};
for(i = 0; i < tableKeysRow.length; i++) {
rowObject[tableKeysRow[i]] = formattedValues[i];
}
arrayOfRowObjects.push(rowObject);
})
return arrayOfRowObjects;
})
.then( result => {
writer.pipe(fs.createWriteStream('outputData.csv'));
result.forEach( obj => {
writer.write(obj);
});
writer.end();
})
} catch (error) {
console.error(error);
throw error;
} finally {
await nightmare.end();
}
})();

相关内容

最新更新