以下是我的Map、Reduce和finalize函数。有4条记录与键匹配,当用较少的数据调用mapreduce(100秒)时,结果正确,但当用较多的数据调用mapreduce时(只有1000秒),结果始终为2。我检查了Reduce函数,在我看来它是正确的,即使它在内部被多次调用以获取更大的数据。这越来越奇怪了,我花了很长时间,仍然没能把它做好。
var map1 = function(){
var mapPosCnt = 0, mapPosSum = 0, mapZeroCnt = 0;
if (isNumber(this.val1)){
if(this.val1.toPrecision(10) > 0.0000000000){
mapPosCnt = 1;
mapPosSum = this.val1;
}else{
mapZeroCnt = 1;
}
}else{
mapPosCnt = 0, mapPosSum = 0, mapZeroCnt = 0;
}
emit({key1: this.key1, key2: this.key2+'', val1: 'val1'}
,{key1: this.key1, key2: this.key2+'', posCnt: mapPosCnt, posSum: mapPosSum, posAvg: 0, zeroCnt: mapZeroCnt, val1: this.val1});
}
var reduce1 = function(key, values){
var retval = {key1: key.CE, key2: key.key2, posCnt: 0, posSum: 0, posAvg: 0, zeroCnt: 0, val1: 0};
values.forEach(function(value){
if (isNumber(value.val1)){
if(value.val1.toPrecision(10) > 0.0000000000){
retval.posCnt += 1;
retval.posSum += value.val1;
}else{
retval.zeroCnt += 1;
}
}
})
return retval;
}
var finalize1 = function(key, value){
value.key2 = value.key2.toString();
if(value.posCnt > 0){
value.posSum = Math.round(value.posSum * Math.pow(10, 6)) / Math.pow(10, 6);
value.posAvg = Math.round((value.posSum/value.posCnt) * Math.pow(10, 6)) / Math.pow(10, 6);
}
return value;
}
collection1.mapReduce(map1, reduce1, {out: {merge: 'collection2'}, finalize: finalize1}, function(err, collection){});
如果没有文档来测试它,我猜有点,但我可以看到几个问题:
- 发射/减少的值不应包括关键字段。因此,您的发射值应该是这样的:
{ posCnt: mapPosCnt, posSum: mapPosSum, zeroCnt: mapZeroCnt }
- reduce函数不应该像您所做的那样尝试重新应用发射逻辑,而是应该通过对值求和来聚合具有相同键的值
所以reduce1
应该看起来像:
var reduce1 = function(key, values){
var retval = { posCnt: 0, posSum: 0, zeroCnt: 0 };
values.forEach(function(value){
retval.posCnt += value.posCnt;
retval.posSum += value.posSum;
retval.zeroCnt += value.zeroCnt;
});
return retval;
};