在 JavaScript 中将 0 - 100 个百分位数"Left join"到统计值上



我们遇到了一个似乎很有挑战性的数据操作问题,涉及两个对象的javascript数组。第一个数组stats存储篮球统计信息,第二个数组pctiles存储与统计信息相关联的统计信息的百分位数。请参阅下面的statspctiles:的一个小示例

let stats = [
{ group: 'a', fgPct: 0.44, fg2Pct: 0.48 },
{ group: 'a', fgPct: 0.41, fg2Pct: 0.46 },
{ group: 'b', fgPct: 0.40, fg2Pct: 0.34 },
{ group: 'b', fgPct: 0.38, fg2Pct: 0.31 }
]
let pctiles = [
{ group: 'a', pctile: 0, fgPct: 0.385, fg2Pct: 0.425 },
{ group: 'a', pctile: 1, fgPct: 0.398, fg2Pct: 0.436 },
{ group: 'a', pctile: 2, fgPct: 0.412, fg2Pct: 0.458 },
{ group: 'a', pctile: 3, fgPct: 0.431, fg2Pct: 0.465 },
{ group: 'a', pctile: 4, fgPct: 0.452, fg2Pct: 0.475 },
{ group: 'a', pctile: 5, fgPct: 0.468, fg2Pct: 0.491 },
{ group: 'b', pctile: 0, fgPct: 0.371, fg2Pct: 0.291 },
{ group: 'b', pctile: 1, fgPct: 0.382, fg2Pct: 0.301 },
{ group: 'b', pctile: 2, fgPct: 0.392, fg2Pct: 0.306 },
{ group: 'b', pctile: 3, fgPct: 0.412, fg2Pct: 0.323 },
{ group: 'b', pctile: 4, fgPct: 0.425, fg2Pct: 0.342 },
{ group: 'b', pctile: 5, fgPct: 0.434, fg2Pct: 0.346 }
]

对于每组abpctiles的完整版本从0到100,而fgPctfg2Pct的值对应于我们更大的stats数组中这些度量的第0、第1、第2等百分位数。

我们需要"左联接">pctilesstats,并且我使用引号表示左联接,因为从技术上讲它不是左联接。必要的输出见下文:

let statsWithPctiles = [
{ group: 'a', fgPct: 0.44, fg2Pct: 0.48, fgPctPctile: 4, fg2PctPctile: 5 },
{ group: 'a', fgPct: 0.41, fg2Pct: 0.46, fgPctPctile: 2, fg2PctPctile: 3 },
{ group: 'b', fgPct: 0.40, fg2Pct: 0.34, fgPctPctile: 3, fg2PctPctile: 4 },
{ group: 'b', fgPct: 0.38, fg2Pct: 0.31, fgPctPctile: 1, fg2PctPctile: 3 }
]

澄清第一个对象的输出:

  • 0.44的fgPct,对于组a,在第三个%ile(.431(和第四个%ile之间(.452(。始终将fgPctPctile设置为上%ile(4(
  • 对于组afg2Pct为0.48,介于第4%ile(.475(和第5%ile(.491(之间。始终将fg2PctPctile设置为上%ile(5(

我们需要对stats中每个对象中的每个度量执行此操作,查看stats值在哪两个百分位数之间,并根据上百分位数设置metricPtile值,这需要在一定程度上有效地执行,因为我们实际的stats对象数组可能有几百个对象,每个对象都有50个或更多的度量。

编辑:我正在为此制定解决方案,并将很快发布,尽管我担心它的性能不太好。

第2版:更新我们当前的慢速方法:

// for this example, we can ignore the toObject(), however I need these for some reason to get to the object in our fetched results...
// map over the "stats" array
let statsWithPctiles = stats.map((rowA, idxA) => {
// first filter for group 'a' or 'b' (filter to match stats object)
let thesePctiles = pctiles.filter((rowB, idxB) => {
return (rowB.toObject().group === rowA._id.group);
});
// second, for loop over the keys
Object.keys(rowA).forEach((metric, idxC) => {
if (metric !== '_id' && metric !== 'group') {
let filteredPctiles = thesePctiles.filter((rowC, idxD) => {
return (rowC.toObject()[metric] > rowA[metric]);
});
let mappedValues = filteredPctiles.map(rowD => rowD.toObject()[metric]);
let maxPctile = Math.max(...mappedValues);
rowA[`${metric}Pctile`] = maxPctile
}
});
});

这还不起作用,因为我还没有正确地获得内部forEach()中的代码。

你可以试试这个。

let stats = [
{ group: 'a', fgPct: 0.44, fg2Pct: 0.48 },
{ group: 'a', fgPct: 0.41, fg2Pct: 0.46 },
{ group: 'b', fgPct: 0.40, fg2Pct: 0.34 },
{ group: 'b', fgPct: 0.38, fg2Pct: 0.31 }
]
let pctiles = [
{ group: 'a', pctile: 0, fgPct: 0.385, fg2Pct: 0.425 },
{ group: 'a', pctile: 1, fgPct: 0.398, fg2Pct: 0.436 },
{ group: 'a', pctile: 2, fgPct: 0.412, fg2Pct: 0.458 },
{ group: 'a', pctile: 3, fgPct: 0.431, fg2Pct: 0.465 },
{ group: 'a', pctile: 4, fgPct: 0.452, fg2Pct: 0.475 },
{ group: 'a', pctile: 5, fgPct: 0.468, fg2Pct: 0.491 },
{ group: 'b', pctile: 0, fgPct: 0.371, fg2Pct: 0.291 },
{ group: 'b', pctile: 1, fgPct: 0.382, fg2Pct: 0.301 },
{ group: 'b', pctile: 2, fgPct: 0.392, fg2Pct: 0.306 },
{ group: 'b', pctile: 3, fgPct: 0.412, fg2Pct: 0.323 },
{ group: 'b', pctile: 4, fgPct: 0.425, fg2Pct: 0.342 },
{ group: 'b', pctile: 5, fgPct: 0.434, fg2Pct: 0.346 }
]
let statsWithPctiles = stats;
stats.forEach(function(stat,stat_index){
let fgPCT = false;
let fg2PCT = false;
let group_pctiles = pctiles.filter(pctiless => pctiless.group == stat["group"]).sort((a, b)=> a.pctile - b.pctile).forEach(function(pctile){
if(stat["fgPct"] <= pctile["fgPct"] && fgPCT == false)
{
statsWithPctiles[stat_index]["fgPctPctile"] = pctile["pctile"];
fgPCT = true;
}
if(stat["fg2Pct"] <= pctile["fg2Pct"] && fg2PCT == false)
{
statsWithPctiles[stat_index]["fg2PctPctile"] = pctile["pctile"];
fg2PCT = true;
}
});

});
console.log(statsWithPctiles);

最新更新