这是一段代码,用于逐字比较两个句子,并返回具有某些条件的单词匹配数:
提示:第一句中的单词 ::::第二句中的单词
1(保护::::我应该结果不匹配
2(保护::::保护应结果匹配
3(保护::::保护应结果匹配
4(他::::我应该结果不匹配
5(i::::我应该结果匹配,但只有一次而不是两次:(让我解释一下(
我们将此字符串作为第一句话:
let speechResult = "they're were protecting him i knew that i was aware";
如您所见,它有两个 i,但这里的第二句话中只有一个i:
let expectSt = ['i was sent to earth to protect you'];
因此,我们应该将这种匹配视为一次出现而不是两次,如果我们在第二句中也出现了两次i,那么我们会将i匹配视为两次出现。
6(是::::是结果应该匹配
这是我到目前为止的代码:
// Sentences we should compare word by word
let speechResult = "they're were protecting him i knew that i was aware";
let expectSt = ['i was sent to earth to protect you'];
// Create arrays of words from above sentences
let speechResultWords = speechResult.split(/s+/);
let expectStWords = expectSt[0].split(/s+/);
// Here you are..
//console.log(speechResultWords)
//console.log(expectStWords)
// Count Matches between two sentences
function includeWords(){
// Declare a variable to hold the count number of matches
let countMatches = 0;
for(let a = 0; a < speechResultWords.length; a++){
for(let b = 0; b < expectStWords.length; b++){
if(speechResultWords[a].includes(expectStWords[b])){
console.log(speechResultWords[a] + ' includes in ' + expectStWords[b]);
countMatches++
}
} // End of first for loop
} // End of second for loop
return countMatches;
};
// Finally initiate the function to count the matches
let matches = includeWords();
console.log('Matched words: ' + matches);
您可以使用Map
计算想要的单词,并通过检查字数来迭代给定的单词。
function includeWords(wanted, seen) {
var wantedMap = wanted.split(/s+/).reduce((m, s) => m.set(s, (m.get(s) || 0) + 1), new Map),
wantedArray = Array.from(wantedMap.keys()),
count = 0;
seen.split(/s+/)
.forEach(s => {
var key = wantedArray.find(t => s === t || s.length > 3 && t.length > 3 && (s.startsWith(t) || t.startsWith(s)));
if (!wantedMap.get(key)) return;
console.log(s, key)
++count;
wantedMap.set(key, wantedMap.get(key) - 1);
});
return count;
}
let matches = includeWords('i was sent to earth to protect you', 'they're were protecting him i knew that i was aware');
console.log('Matched words: ' + matches);
.as-console-wrapper { max-height: 100% !important; top: 0; }
我认为这应该有效:
let speechResult = "they're were protecting him i knew that i was aware";
let expectSt = ['i was sent to earth to protect you'];
function includeWords(){
let countMatches = 0;
let ArrayFromStr = speechResult.split(" ");
let Uniq = new Set(ArrayFromStr)
let NewArray = [Uniq]
let str2 = expectSt[0]
for (word in NewArray){
if (str2.includes(word)){
countMatches += 1
}
}
return countMatches;
};
let matches = includeWords();
我得到 speechResult,将其制作成一个数组,删除重复项,再次转换为数组,然后检查 expectSt 字符串是否包含 NewArray 数组中的每个单词。
遍历字符串并使用空字符串更新匹配单词的索引,并将匹配项存储在数组中。
let speechResult = "they're were protecting him i knew that i was aware";
let expectSt = ['i was sent to earth to protect you'];
// Create arrays of words from above sentences
let speechResultWords = speechResult.split(/s+/);
let expectStWords = expectSt[0].split(/s+/);
const matches = [];
speechResultWords.forEach(str => {
for(let i=0; i<expectStWords.length; i++) {
const innerStr = expectStWords[i];
if(innerStr && (str.startsWith(innerStr) || innerStr.startsWith(str)) && (str.includes(innerStr) || innerStr.includes(str))) {
if(str.length >= innerStr.length) {
matches.push(innerStr);
expectStWords[i] = '';
} else {
matches.push(str);
}
break;
}
}
});
console.log(matches.length);
通过使用词干分析,您可以直觉地认为具有相同词干的单词是相同的。
例如
- 对于动词: 保护,保护,保护,...
- 但也是复数:球,球
您可能想做的是:
词干- :使用一些词干分析器(这将有其优点和缺点((例如PorterStemmer,它似乎有一个js implem(
- 计算那个"词干空间"上的出现次数,这是微不足道的
注意:用"\s"拆分可能还不够,考虑逗号和更一般的标点符号。如果您有更多需求,关键字是标记化。
下面是一个使用 PorterStemmer 的示例,其中包含一些糟糕的自制标记化
const examples = [
['protecting','i'],
['protecting','protect'],
['protect','protecting'],
['him','i'],
['i','i'],
['they're were protecting him i knew that i was aware','i was sent to earth to protect you'],
['i i', 'i i i i i']
]
function tokenize(s) {
// this is not good, get yourself a good tokenizer
return s.split(/s+/).filter(x=>x.replace(/[^a-zA-Z0-9']/g,''))
}
function countWords(a, b){
const sa = tokenize(a).map(t => stemmer(t))
const sb = tokenize(b).map(t => stemmer(t))
const m = sa.reduce((m, w) => (m[w] = (m[w] || 0) + 1, m), {})
return sb.reduce((count, w) => {
if (m[w]) {
m[w]--
return count + 1
}
return count
}, 0)
}
examples.forEach(([a,b], i) => console.log(`ex ${i+1}: ${countWords(a,b)}`))
<script src="https://cdn.jsdelivr.net/gh/kristopolous/Porter-Stemmer/PorterStemmer1980.js"></script>
我认为它将通过比较句子的标记来提供原始解决方案。但我可以看到两个陷阱:
- 您应该通过
OR
操作数比较主IF
子句中两个句子的标记 - 可以在
SET
集合中添加这两个匹配项以避免任何重复。
您可以使用以下函数获取两个句子/一组字符串之间所有匹配单词的计数。
function matchWords(str1, str2){
let countMatches = 0;
let strArray = str1.split(" ");
let uniqueArray = [...new Set(strArray)];
uniqueArray.forEach( word => {
if (str2.includes(word)){
countMatches += 1
}
})
return countMatches;
};
console.log("Count:", matchWords("Test Match Words".toLowerCase(),"Result Match Words".toLowerCase());
上面的代码已经过测试并正常工作。