我尝试使用两次MapReduce聚合来获取每月的唯一用户数。
第一个 MR 函数计算出一个mr_buyer_payment集合,如下所示:
{ "_id" : { "u" : "01329f19-27b0-435b-9ca1-450984024a31", "tid" : ISODate("2013-09-01T00:00:00Z") }, "value" : { "payment" : 38, "count_pay" : 1 } }
{ "_id" : { "u" : "264dd104-b934-490b-988e-5822fd7970f6", "tid" : ISODate("2013-09-01T00:00:00Z") }, "value" : { "payment" : 4.99, "count_pay" : 1 } }
{ "_id" : { "u" : "27bb8f72-a13e-4676-862c-02f41fea1bc0", "tid" : ISODate("2013-09-01T00:00:00Z") }, "value" : { "payment" : 11.98, "count_pay" : 2 } }
第二个MR函数适用于小数据集,但是当查询增长超过100条记录时,它会得到错误的结果,一些值是NaN。
调试日志在 Reduce 函数(如 v.payment)中显示了一些值,v.count_user变得不确定。
date:Sun Jun 30 2013 17:00:00 GMT-0700 (PDT) value:undefined / 162 / undefined
MR 结果信息是有线的:
{
"result" : "mr_buyer_all",
"timeMillis" : 29,
"counts" : {
"input" : 167,
"emit" : 167,
"reduce" : 6, // it should be 3, as same as "output" number
"output" : 3
},
"ok" : 1,
}
这是第二个 MR 函数:
db.mr_buyer_payment.mapReduce(
function(){
var key = this._id.tid;
var value = {
payment:this.value.payment,
count_pay:this.value.count_pay,
count_user:1
};
if (value.count_pay>0)
{
print("date:"+key+" u:"+this._id.u+"value:"+value.payment+" / "+value.count_pay+" / "+value.count_user);
emit(key,value);
}
},
function(key,values){
var result = {revenue:0,count_pay:0,user:0};
values.forEach(function(v){
if (!v.count_user)
{
print("date:"+key+" "+"value:"+v.payment+" / "+v.count_pay+" / "+v.count_user);
} else
{
result.revenue += v.payment;
result.count_pay += v.count_pay;
result.user += v.count_user;
}
});
return result;
},
{
out:{replace:"mr_buyer_all"}
}
)
Reduce函数中的子文档应使用与地图函数中相同的格式。所以解决方案是:
function(key,values){
// the following key must be as same as the object in map
var r = {payment:0,count_pay:0,count_user:0}
values.forEach(function(v){
r.payment += v.payment;
r.count_pay += v.count_pay;
r.count_user += v.count_user;
});
return r;
},