JavaScript 模式比较



我正在使用nodeJs研究一种小型机器学习理论算法。 我的目标是将许多数组模式与一个源模式进行比较,然后返回如何 类似,它们表示为百分比。对于示例模式1,可能与源模式相似80%。

确定一个阵列与另一个阵列相似度的最佳方法是什么?

到目前为止我做了什么..

//source   
var soureSequence = [0.53,0.55,0.50,0.40,0.50,0.52,0.58,0.60]
//patterns to compare
var sequence1 = [0.53,0.54,0.49,0.40,0.50,0.52,0.58,0.60]
var sequence2 = [0.53,0.55,0.50,0.42,0.50,0.53,0.57,0.62]

由于我选择了基于百分比的结果,我认为我应该基于数组中从第一个值到第二个值的百分比变化来建立我的源模式。

var percentChange = (firstVal, secondVal) => {
var pChange = ((parseFloat(secondVal) - firstVal) / 
Math.abs(firstVal)) * 100.00;
//To avoid NaN , Infinity , and Zero
if(!pChange || pChange == 0){
return 0.00000001
}
return pChange;
}

在这里,我将从我的源序列生成我的源模式

var storePattern = function(sequence){
var pattern = [];
for(var i = 0 ; i < sequence.length ; i++){
let $change = percentChange(sequence[i] , sequence[i + 1]);
if(i != sequence.length && $change ){
pattern.push($change)
}
}
return pattern;
}

var sourcePattern = storePattern(soureSequence);

现在我将创建更多要比较的模式

var testPattern1 = storePattern(sequence1);
var testPattern2 = storePattern(sequence2);

以下是我的比较函数

var processPattern = function(source , target){
var simularityArray = [];
for(var i = 0 ; i < target.length ; i++){
//Compare percent change at indexof testPattern to sourcePattern of same index
let change = Math.abs(percentChange(target[i] , source[i]));
simularityArray.push(100.00 - change);
}
var rating = simularityArray.reduce((a,b) => {
return a + b
});
//returns percent rating based of average of similarity pattern
rating = rating / parseFloat(source.length + ".00");
return rating;
}

现在我可以尝试估计相似性

var similarityOfTest1 = processPattern(sourcePattern , testPattern1)

我的问题是这仅适用于相同值范围内的序列..例如0.50 , 0.52.. 这些值的百分比变化对于 0.20 , 0.22 是不一样的,但值差异是相同的,即 ->0.02

我想过基于价值的模式的差异,但在这一点上我迷路了。

所有答案都将被考虑。感谢您的帮助!

使用reduce来获得与平均值的差异。

//patterns to compare
var sequence1 = [0.53,0.54,0.49,0.40,0.50,0.52,0.58,0.60]
var sequence2 = [0.53,0.55,0.50,0.42,0.50,0.53,0.57,0.62]
function diff(sequence){
var soureSequence = [0.53,0.55,0.50,0.40,0.50,0.52,0.58,0.60]
var delta = soureSequence.reduce(function (r, a, i, aa) {
i && r.push(a - sequence[i]);
return r;
}, []),
average = delta.reduce(function (a, b) { return a + b; }) / delta.length;

return {delta:delta, average:average}
}
console.log('sequence1',diff(sequence1));
console.log('sequence2',diff(sequence2));

根据我的经验,两个向量(数组)的相似性是使用点积 ex 来测量的。就像它在那个链接中所说的那样,您将数组的每个相应元素相乘,将它们相加,然后除以每个数组的大小(每个组件平方和的平方根)。Rosetta Code 有一个 JavaScript 中点积的示例,复制在这里

// dotProduct :: [Int] -> [Int] -> Int
const dotProduct = (xs, ys) => {
const sum = xs => xs ? xs.reduce((a, b) => a + b, 0) : undefined;
return xs.length === ys.length ? (
sum(zipWith((a, b) => a * b, xs, ys))
) : undefined;
}
// zipWith :: (a -> b -> c) -> [a] -> [b] -> [c]
const zipWith = (f, xs, ys) => {
const ny = ys.length;
return (xs.length <= ny ? xs : xs.slice(0, ny))
.map((x, i) => f(x, ys[i]));
}

所以,你会打电话

const score1 = dotProduct(sourceSequence, sequence1);
const score2 = dotProduct(sourceSequence, sequence2);

以较大的顺序越接近 sourceSequence。

我不确定你需要机器学习。你有一个源模式,你有一些输入,你基本上想要执行模式的差异。

机器学习可以用来找到模式,假设你有一些启发式的方法来测量误差(如果你使用无监督学习技术),或者你有样本集来训练网络。

但是,如果您只是想测量一种模式和另一种模式之间的差异,那么只需执行比较操作即可。您需要做的是确定测量结果的差异以及如何对结果进行归一化。

我不知道你想如何衡量相似性。我通过计算相应项目的差异并累积这些差异来查看源数组的总和会导致多大的偏差。您可以按照自己喜欢的方式进行计算。

function check([x,...xs],[y,...ys], state = {sumSource: 0, sumDiff: 0}){
state.sumSource += x;
state.sumDiff += Math.abs(x-y);
return xs.length ? check(xs,ys,state) : (100 - 100 * state.sumDiff / state.sumSource).toFixed(4) + "% similarity";
}
var soureSequence = [0.53,0.55,0.50,0.40,0.50,0.52,0.58,0.60],
sequence1     = [0.53,0.54,0.49,0.40,0.50,0.52,0.58,0.60],
sequence2     = [0.53,0.55,0.50,0.42,0.50,0.53,0.57,0.62];
console.log(check(soureSequence,sequence1));
console.log(check(soureSequence,sequence2));

最新更新