- 节点版本:v10.19.0
- Npm版本:6.13.4
-
Npm包csvtojson包链接
csvtojson({ "delimiter": ";", "fork": true }) .fromStream(fileReadStream) .subscribe((dataObj) => { console.log(dataObj); }, (err) => { console.error(err); }, (success) => { console.log(success); });
在尝试处理大型CSV文件(约130万条记录(时,在成功处理某些记录(例如,在400多条记录之后(后,我面临错误">CSV分析错误:错误:unclosed_quote."。从CSV文件中,我看不到数据格式有任何问题,但是,由于在列/字段值中发现了"\n"字符,解析器可能会引发此错误。
- 此软件包是否已有可用的解决方案?或
- 有解决这个错误的方法吗?或
- 有没有一种方法可以跳过这些有任何错误的CSV行——不仅仅是这一行,让整个CSV到JSON解析都能正常工作,而处理过程不会陷入困境
如有任何帮助,我们将不胜感激。
我已经玩过这个了,可以使用CSV文件行挂钩,CSV文件行钩子,你可以检查无效行,修复或简单地使其无效。
下面的示例将简单地跳过无效行(缺少结束引号(
example.js
const fs = require("fs");
let fileReadStream = fs.createReadStream("test.csv");
let invalidLineCount = 0;
const csvtojson = require("csvtojson");
csvtojson({ "delimiter": ";", "fork": true })
.preFileLine((fileLineString, lineIdx)=> {
let invalidLinePattern = /^['"].*[^"'];/;
if (invalidLinePattern.test(fileLineString)) {
console.log(`Line #${lineIdx + 1} is invalid, skipping:`, fileLineString);
fileLineString = "";
invalidLineCount++;
}
return fileLineString
})
.fromStream(fileReadStream)
.subscribe((dataObj) => {
console.log(dataObj);
},
(err) => {
console.error("Error:", err);
},
(success) => {
console.log("Skipped lines:", invalidLineCount);
console.log("Success");
});
测试.csv
Name;Age;Profession
Bob;34;"Sales,Marketing"
Sarah;31;"Software Engineer"
James;45;Driver
"Billy, ;35;Manager
"Timothy;23;"QA
此正则表达式的效果更好
/^(?:[^"\]|\.| quot;(?:\.|[^>\](">$/g
这是一个更复杂的大文件工作脚本,通过读取每行
import csv from 'csvtojson'
import fs from 'fs-extra'
import lineReader from 'line-reader'
import { __dirname } from '../../../utils.js'
const CSV2JSON = async(dumb, editDumb, headers, {
options = {
trim: true,
delimiter: '|',
quote: '"',
escape: '"',
fork: true,
headers: headers
}
} = {}) => {
try {
log(`nnStarting CSV2JSON - Current directory: ${__dirname()} - Please wait..`)
await new Promise((resolve, reject) => {
let firstLine, counter = 0
lineReader.eachLine(dumb, async(line, last) => {
counter++
// log(`line before convert: ${line}`)
let json = (
await csv(options).fromString(headers + 'nr' + line)
.preFileLine((fileLineString, lineIdx) => {
// if it its not the first line
// eslint-disable-next-line max-len
if (counter !== 1 && !fileLineString.match(/^(?:[^"\]|\.|"(?:\.|[^"\])*")*$/g)) {
// eslint-disable-next-line max-len
console.log(`Line #${lineIdx + 1} is invalid. It has unescaped quotes. We will skip this line.. Invalid Line: ${fileLineString}`)
fileLineString = ''
}
return fileLineString
})
.on('error', e => {
e = `Error while converting CSV to JSON.
Line before convert: ${line}
Error: ${e}`
throw new BaseError(e)
})
)[0]
// log(`line after convert: ${json}`)
if (json) {
json = JSON.stringify(json).replace(/\"/g, '')
if (json.match(/^(?:[^"\]|\.|"(?:\.|[^"\])*")*$/g)) {
await fs.appendFile(editDumb, json)
}
}
if (last) {
resolve()
}
})
})
} catch (e) {
throw new BaseError(`Error while converting CSV to JSON - Error: ${e}`)
}
}
export { CSV2JSON }