计算数组在JS中另一个多维数组(20k个元素)中的单词(12k元素)出现次数



我一直在尝试让它工作,但我发现它的工作方式,使用regex会导致内存不足和/或出现错误:Uncaught SyntaxError: Invalid regular expression: b + +b : Nothing to repeat.

这就是功能:

function countSearchTerms() {
const filteredTerms = nGramsSht.getRange(6, 2, nGramsSht.getLastRow() - 5, 1).getValues().filter(e => e != '');
const searchTermData = nGramFinalDataSht.getRange(1, 1, nGramFinalDataSht.getLastRow(), 1).getValues().filter(e => e != '');
let occurrences = [];
for (let r = 0; r < filteredTerms.length; r++) {
let count = 0;
for (let a = 0; a < searchTermData.length; a++) {
if ((new RegExp("\b" + filteredTerms[r].toString() + "\b").test(searchTermData[a]))) {
count++;
}
}
occurrences.push([count])
}
if (occurrences.length > 0) {
nGramsSht.getRange(6, 3, nGramsSht.getLastRow() - 5, 1).clearContent();
nGramsSht.getRange(6, 3, occurrences.length, 1).setValues(occurrences);
}
}

我会使用这个答案的方法,但如何计算a中出现在b中的单词?

function wordcount() {
const ss = SpreadsheetApp.getActive();
const sh = ss.getSheetByName("Sheet0");
sh.clearContents();
const a = [["A"], ["earth"], ["20"], ["tunnel"], ["house"], ["earth A"], ["$100"], ["house $100"]];
const b = [["A"], ["A Plane is expensive"], ["peaceful earth"], ["20 years"], ["tunnel"], ["tiny house"], ["earth B612"], ["$100"], ["house $100"]]
sh.getRange(1, 1, a.length, a[0].length).setValues(a);
let o = [... new Set(a.slice().flat().join(' ').split(' '))].map(w => [w, sh.createTextFinder(w).matchCase(true).findAll().length]);
o.unshift(["Words","Count"]);
sh.getRange(sh.getLastRow() + 2,1,o.length,o[0].length).setValues(o);
}

这样试试:

function wordcount() {
const ss = SpreadsheetApp.getActive();
const sh = ss.getSheetByName("Sheet0");
sh.clearContents();
const a = [["A"], ["earth"], ["20"], ["tunnel"], ["house"], ["earth A"], ["$100"], ["house $100"]];
const b = [["A"], ["A Plane is expensive"], ["peaceful earth"], ["20 years"], ["tunnel"], ["tiny house"], ["earth B612"], ["$100"], ["house $100"]]
sh.getRange(1, 1, b.length, b[0].length).setValues(b);
let o = [... new Set(a.slice().flat().join(' ').split(' '))].map(w => [w, sh.createTextFinder(w).matchCase(true).findAll().length]);
o.unshift(["Words", "Count"]);
sh.getRange(sh.getLastRow() + 2, 1, o.length, o[0].length).setValues(o);
}
/tr>和平的地球/td>/td>单词数隧道<100美元>
搜索
A
飞机价格昂贵
20年隧道小房子
earth B612
100美元
house$100
A2
earth
201
house

也许您的数据使regexp失败。您应该为regexp转义它,并检查它是否为空。我希望它能帮助

function escapeRegExp(string) {
return string.replace(/([.*+?^=!:${}()|[]/\])/g, "\$1");
}
var term = escapeRegExp(filteredTerms[r].toString().trim())
if (term && (new RegExp("\b" + term + "\b").test(searchTermData[a]))) {
count++;
}

最新更新