错误:右双引号 (") 和字段分隔符之间的数据



我正在尝试使用Google Apps脚本从Google Drive中获取CSV并将其放入Big Query中。当我上传时,我得到这个错误:

"读取数据时出错,错误消息:分析从位置560550开始的行时检测到错误。错误:数据位于右双引号("(和字段分隔符之间">

我试着查看文件的字节位置,以及它在CSV边界之外的方式(它只会达到大约501500个字节(。

以下是我正在使用的CSV链接,它是一个网站的摘录:https://drive.google.com/file/d/1k3cGlTSA_zPQCtUkt20vn6XKiLPJ7mFB/view?usp=sharing

这是我的相关代码:

function csvToBigQuery(exportFolder, csvName, bqDatasetId){
try{
//get most recent export from Screaming Frog
var mostRecentFolder = [];    
while(exportFolder.hasNext()){
var folder = exportFolder.next();
var lastUpdated = folder.getLastUpdated();
if(mostRecentFolder.length == 0)
mostRecentFolder = [folder.getLastUpdated(),folder.getId()];
else if(lastUpdated > mostRecentFolder[0])
mostRecentFolder = [lastUpdated, folder.getId()];
}    
var folderId = mostRecentFolder[1];
var file = DriveApp.getFolderById(folderId).getFilesByName(csvName + '.csv').next();

if(!file)
throw "File doesn't exist";

//get csv and add date column.
//getBlob().getDataAsString().replace(/(["'])(?:(?=(\?))2[sS])*?1/g, function(e){return e.replace(/r?n|r/g, ' ')})
var rows = Utilities.parseCsv(file.getBlob().getDataAsString());
Logger.log(rows);
var numColumns = rows[0].length;    

rows.forEach(function(row){
row[numColumns] = date;
});
rows[0][numColumns] = 'Date';

let csvRows = rows.map(values =>values.map(value => JSON.stringify(value).replace(/\"/g, '""')));
let csvData = csvRows.map(values => values.join(',')).join('n');
//log(csvData)
var blob = Utilities.newBlob(csvData, 'application/octet-stream');


//create job for inserting to BQ.
var loadJob = {
configuration: {
load: {
destinationTable: {
projectId: bqProjectId,
datasetId: bqDatasetId,
tableId: csvName
},
autodetect: true,  // Infer schema from contents.
writeDisposition: 'WRITE_APPEND',
}
}
};

//append to table in BQ.
BigQuery.Jobs.insert(loadJob, bqProjectId, blob);


}catch(e){
Logger.log(e); 
}
}

修改点:

从你的错误信息中,我认为可能有部分没有包含在双倍配额中。所以,我搜索了一下。当我看到你的CSV数据,并且你的CSV资料用下面的脚本用""替换"(|.+?)"时,发现711行有这个值。

function sample() {
var id = "###";  // File ID of your CSV file.
// This is your script.
var file = DriveApp.getFileById(id);
var rows = Utilities.parseCsv(file.getBlob().getDataAsString());
var numColumns = rows[0].length;
var date = "sample";
rows.forEach(function(row){
row[numColumns] = date;
});
rows[0][numColumns] = 'Date';
let csvRows = rows.map(values =>values.map(value => JSON.stringify(value).replace(/\"/g, '""')));
let csvData = csvRows.map(values => values.join(',')).join('n');

// I added below script for checking your CSV data.
var res = csvData.replace(/"(|.+?)"/g, "");
DriveApp.createFile("sample.txt", res);
}

行711如下。

"https://supergoop.com/products/lip-shield-trio/?utm_source=Gorgias&utm_medium=CustomerCare&utm_campaign=crosssellhello","text/html; charset=utf-8","200","OK","Non-Indexable","Canonicalised","Lip Shield Trio - Restores, Protects + Water-resistant – Supergoop!","67","595","Moisturizing lip protection made from antioxidant-rich coconut, avocado, and grape seed oil.","92","576","","0","Lip Shield Trio","15","Lip Shield Trio","15","Why We Love It","14","Ingredients","11","","","","https://supergoop.com/products/lip-shield-trio","","","","","451488","754","1.686","5","","12","4","0.590","205","80","8","5","","","","","f6d1476960d22b1c5964581e161bdd49","0.064","","","","","HTTP/1.1","https://supergoop.com/products/lip-shield-trio/?utm_source=Gorgias&utm_medium=CustomerCare&utm_campaign=crosssellhello%5C"

根据这个值,我发现"被用于"https://supergoop.com/products/lip-shield-trio/?utm_source=Gorgias&utm_medium=CustomerCare&utm_campaign=crosssellhello"。我认为你的问题可能是由于这个原因。

那么为了避免这个问题,下面的修改如何呢?

修改的脚本:

发件人:
let csvRows = rows.map(values =>values.map(value => JSON.stringify(value).replace(/\"/g, '""')));
收件人:
let csvRows = rows.map(values =>values.map(value => JSON.stringify(value).replace(/\"/g, '""').replace(/\"/g, '')));

发件人:
var rows = Utilities.parseCsv(file.getBlob().getDataAsString());
收件人:
var rows = Utilities.parseCsv(file.getBlob().getDataAsString().replace(/\/g, ''));
  • 通过此修改,我可以确认您的脚本和修改后的脚本之间的文件大小减少了2个字节。而且,当使用修改后的脚本将上述检查脚本用于CSV数据时,我可以确认所有行都没有值

最新更新