在短语中的搜索字符串之前和之后搜索单词的算法



假设我有下面的短语数组

const senetences = [
{ "text": "And a moment I Yes." },
{ "text": "Wait a moment I Yes." },
{ "text": "And a moment I Hello, Guenta, trenteuno." },
{ "text": "Okay a moment. Hello. Perfect." },
{ "text": "And a moment." },
{ "text": "And a moment I Hello, Guenta, trenteuno." },
{ "text": "Just a moment in Quinta trenteuno." },
{ "text": "And a moment in Quinta trenteuno." },
{ "text": "Wait a moment I Hello, Guenta, trenteuno." },
{ "text": "Just a moment in Quinta trenteuno." }
]

现在我搜索。。。假设CCD_ 1。所以我需要得到确切单词moment前后的一些单词以及它们在整个数组中的匹配分数。

示例输出

[
"text": "And a moment", "score": 5, "percent": 50,
"text": "moment I Hello", "score": 3, "percent": 30,
"text": "moment in Quinta", "score": 3, "percent": 30,
"text": "Wait a moment", "score": 2, "percent": 20,
"text": "moment I Yes", "score": 2, "percent": 20,
"text": "Just a moment", "score": 2, "percent": 20,
"text": "Okay a moment", "score": 1, "percent": 10
]

score是它发生的时间,percent是发生的时间除以句子总数。

我能够在senetences上循环后获得单词,但在循环后被卡住了。

const string = "moment";
const words = [];
senetences.map((a) => {
const arrayString = a.text.toLowerCase().split(' ');
const index = arrayString.indexOf(string.toLowerCase());
words.push(`${arrayString[index - 2]} ${arrayString[index - 1]} ${arrayString[index]}`);
words.push(`${arrayString[index]} ${arrayString[index + 1]} ${arrayString[index + 2]}`);
})

在那之后,我陷入了如何在senetences数组中查找的困境。

const output = []
senetences.map((a) => {
phrases.map((p) => {
const index = a.text.toLowerCase().indexOf(p)
if (index !== -1) {
output.push(a.text)
}
})
})

您可以将字符串和计数存储在对象中:

function f(search, length, sentences) {
const words = sentences.reduce((acc, { text }) => {
const arrayString = text.replaceAll(/.|,/g, '').split(' ');
const index = arrayString.findIndex(el => el.toLowerCase() === search.toLowerCase());
if (index >= length) {
const key = arrayString.slice(index - length, index + 1).join(' ');
acc[key] = (acc[key] ?? 0) + 1;
}
if (index < arrayString.length - length) {
const key = arrayString.slice(index, index + length + 1).join(' ');
acc[key] = (acc[key] ?? 0) + 1;
}
return acc;
}, {})
return Object.entries(words)
.map(el => ({ text: el[0], score: el[1], percent: 100 * el[1] / sentences.length }))
.sort((l, r) => r.score - l.score);
}
const sentences = [
{"text": "And a moment I Yes."},
{"text": "Wait a moment I Yes."},
{"text": "And a moment I Hello, Guenta, trenteuno."},
{"text": "Okay a moment. Hello. Perfect."},
{"text": "And a moment."},
{"text": "And a moment I Hello, Guenta, trenteuno."},
{"text": "Just a moment in Quinta trenteuno."},
{"text": "And a moment in Quinta trenteuno."},
{"text": "Wait a moment I Hello, Guenta, trenteuno."},
{"text": "Just a moment in Quinta trenteuno."}
];
console.log(f('moment', 3, sentences));

这个函数应该可以工作,我添加了一个精度参数,设置你想在搜索中使用的单词数量,并添加了一些代码来替换句子中的所有非文本字符。

function searchAndScoreWords(sentences,searchPhrase,precision){
//construct wordsArray
const wordsArray = [];
sentences.map((sentence) => {
const arrayString = sentence.text.toLowerCase().replace(/(.)|(,)|(')|(!)|(?)/g,'').split(' ');
const index = arrayString.indexOf(searchPhrase.toLowerCase());
if(index!==-1){
//if enough words before the search pharse push the words before phrase to array
if(index>=precision-1){
let words = [];
for(let i=index-precision+1;i<=index;i++){
words.push(arrayString[i]);
}
wordsArray.push(words.join(' '));
}
//if enough words after the search pharse push the words before phrase to array
if(index<=arrayString.length-precision){
let words = [];
for(let i=index;i<=index+precision-1;i++){
words.push(arrayString[i]);
}
wordsArray.push(words.join(' '));
}
}
})
//generate scores 
let output = [];
for(let i=0;i<wordsArray.length;i++){
let occurrences = 0;
for(let j=0;j<wordsArray.length;j++){
if(wordsArray[i]===wordsArray[j]){
occurrences++;
}
}
if(!output.find(e=>e.text===wordsArray[i])){
output.push({
text:wordsArray[i],
score:occurrences,
percent:occurrences/sentences.length*100
});
}

}
return output;
}
const sentences = [
{ "text": "And a moment, I Yes." },
{ "text": "Wait a moment' I Yes." },
{ "text": "And a moment? I Hello, Guenta, trenteuno." },
{ "text": "Okay a moment. Hello. Perfect." },
{ "text": "And a moment." },
{ "text": "And a moment! I Hello, Guenta, trenteuno." },
{ "text": "Just a moment in Quinta trenteuno." },
{ "text": "And a moment in Quinta trenteuno." },
{ "text": "Wait a moment I Hello, Guenta, trenteuno." },
{ "text": "Just a moment in Quinta trenteuno." }
]
console.log(searchAndScoreWords(sentences,"moment",3));

相关内容

最新更新