我有一个谷歌应用程序脚本来处理这个"csv";每天归档。文件越来越大,并且开始超时。管道界定的";csv";文件在某些记录的注释字段中包含新行和下一行。这会导致这些记录在记录真正结束之前被打破。以下代码将删除位于记录中间的多余新行和下一行,并将数据格式化为有用的csv格式。有没有更有效的方法来编写这些代码?
以下是片段:
function cleanCSV(csvFileId){
//The file we receive has line breaks in the middle of the records, this removes the line breaks and converts the file to a csv.
var content = DriveApp.getFileById(csvFileId).getBlob().getDataAsString();
var identifyNewLine = content.replace(/rnd{1,5}|/g,"~~$&"); //This marks the beginning of a new record with double tildes before we can remove all the line breaks.
var noReturnsContent = identifyNewLine.replace(/rn/g, ""); //Removes Returns
var newContent = noReturnsContent.replace(/~~/g,"rn"); //returns one record per client
var noEndQuote = newContent.replace(/'|/g,"|"); // removes trailing single quote
var csvContent = noEndQuote.replace(/|'/g,"|"); // removes leading single quote
//Logger.log(csvContent);
var sheetId = DriveApp.getFolderById(csvFolderId).createFile(csvFileName, csvContent, MimeType.CSV).getId();
return sheetId;
}
以下是文件示例:
前三行replace
可以合并为一行,您只需要删除所有后面没有1到5位数字和|
、.replace(/rn(?!d{1,5}|)/g,"")
的rn
。
如果使用备选方案.replace(/'|||'/g,"|")
,最后两行replace
也可以合并为一行。
使用
function cleanCSV(csvFileId){
//The file we receive has line breaks in the middle of the records, this removes the line breaks and converts the file to a csv.
var content = DriveApp.getFileById(csvFileId).getBlob().getDataAsString();
var newContent = content.replace(/rn(?!d{1,5}|)/g,""); // remove line endings not followed with 1-5 digits and |
var csvContent = newContent.replace(/'|||'/g,"|"); // removes trailing/leading single quote
//Logger.log(csvContent);
var sheetId = DriveApp.getFolderById(csvFolderId).createFile(csvFileName, csvContent, MimeType.CSV).getId();
return sheetId;
}