在我的Rails 3.2项目中,我使用MongoDB (Mongoid)使用map/reduce对一些结果进行分组,例如:
def count_and_group_by(context)
raise "No #{context} attribute" unless %w(action browser country).include? context
map = %Q{
function() {
key = this.#{context};
value = {count: 1};
emit(key, value);
}
}
reduce = %Q{
function(key, values) {
var reducedValue = {count: 0};
values.forEach(function(value) {
reducedValue.count += value.count;
});
return reducedValue;
}
}
map_reduce = self.map_reduce(map, reduce).out(inline: true)
Hash[map_reduce.map {|v| [v["_id"],v["value"]["count"].to_i]}]
end
一旦我对MyClass.count_and_group_by("action")
之类的东西使用该方法,我得到的结果格式如下:
{"change_password"=>31, "invalid_ip"=>32, "login_failure"=>74, "login_success"=>63, "logout"=>34}
现在我通常做的是尝试根据属性分组结果,喜欢基于动作属性,浏览器和城市属性查找结果,并且我分别使用每个新调用分别执行,如:MyClass.count_and_group_by("action")
, MyClass.count_and_group_by("browser")
, MyClass.count_and_group_by("city")
。
是否可以一次发出多个键,这样我就可以一次对结果进行分组,并获得如下结果:
{"action" => {
"change_password"=>31,
"invalid_ip"=>32,
"login_failure"=>74,
"login_success"=>63,
"logout"=>34},
"browser" => {}
"city" => {}}
任何帮助将是非常感激的。
欢呼
这通常是可能的,但是对于这种类型的操作,使用聚合框架可以获得更好的性能。目前在用Mongoid定义的类上还没有一个"聚合"方法,但是有一个.collection
访问器可以公开底层驱动程序对象。所以你可以从这里调用.aggregate()
:
result = this.collection.aggregate([
# Include each field and an array for "type" in all documents
{ "$project" => {
"action" => 1,
"browser" => 1,
"country" => 1,
"type" => { "$const" => [ "action", "browser", "country" ] },
}},
# Unwind that "type" array
{ "$unwind" => "$type" },
# Group by "type" and the values of each field which matches
{ "$group" => {
"_id" => {
"type" => "$type",
"value" => {
"$cond" => [
{ "$eq" => [ "$type", "action" ] },
"$action",
{ "$cond" => [
{ "$eq" => [ "$type", "browser" ] },
"$browser",
"$country"
]}
]
}
},
"count" => { "$sum" => 1 }
}},
# Just in case all fields were not present in all documents
{ "$match" => { "_id.value" => { "$ne" => null } } },
# Group to a single document with each "type" as the keys
{ "$group" => {
"_id" => null,
"action" => {
"$addToSet" => {
"$cond" => [
{ "$eq" => [ "$_id.type", "action" ] },
{ "value" => "$_id.value", "count": "$count" },
null
]
}
},
"browser" => {
"$addToSet" => {
"$cond" => [
{ "$eq" => [ "$_id.type", "browser" ] },
{ "value" => "$_id.value", "count": "$count" },
null
]
}
},
"country" => {
"$addToSet" => {
"$cond" => [
{ "$eq" => [ "$_id.type", "country" ] },
{ "value" => "$_id.value", "count": "$count" },
null
]
}
}
}},
# Filter out any null values from the conditional allocation
{ "$project" => {
"action" => { "$setDifference" => [ "$action", [null] ] },
"browser" => { "$setDifference" => [ "$browser", [null] ] },
"country" => { "$setDifference" => [ "$country", [null] ] }
}}
])
这使用了较新的MongoDB 2.6引入的 $setDifference
操作符,以便从结果数组中过滤任何空值。以前的版本也可以做同样的事情,对处理几乎没有影响,只需要更多的步骤:
result = this.collection.aggregate([
# Include each field and an array for "type" in all documents
{ "$project" => {
"action" => 1,
"browser" => 1,
"country" => 1,
"type" => { "$const" => [ "action", "browser", "country" ] },
}},
# Unwind that "type" array
{ "$unwind" => "$type" },
# Group by "type" and the values of each field which matches
{ "$group" => {
"_id" => {
"type" => "$type",
"value" => {
"$cond" => [
{ "$eq" => [ "$type", "action" ] },
"$action",
{ "$cond" => [
{ "$eq" => [ "$type", "browser" ] },
"$browser",
"$country"
]}
]
}
},
"count" => { "$sum" => 1 }
}},
# Just in case all fields were not present in all documents
{ "$match" => { "_id.value" => { "$ne" => null } } },
# Group to a single document with each "type" as the keys
{ "$group" => {
"_id" => null,
"action" => {
"$addToSet" => {
"$cond" => [
{ "$eq" => [ "$_id.type", "action" ] },
{ "value" => "$_id.value", "count": "$count" },
null
]
}
},
"browser" => {
"$addToSet" => {
"$cond" => [
{ "$eq" => [ "$_id.type", "browser" ] },
{ "value" => "$_id.value", "count": "$count" },
null
]
}
},
"country" => {
"$addToSet" => {
"$cond" => [
{ "$eq" => [ "$_id.type", "country" ] },
{ "value" => "$_id.value", "count": "$count" },
null
]
}
}
}},
# Filter out any null values from the conditional allocation
{ "$unwind": "$country" },
{ "$match": { "country": { "$ne": null } } },
{ "$group": {
"_id": "$_id",
"action": { "$first": "$action" },
"browser": { "$first": "$browser" },
"country": { "$push": "$country" }
}},
{ "$unwind": "$browser" },
{ "$match": { "browser": { "$ne": null } } },
{ "$group": {
"_id": "$_id",
"action": { "$first": "$action" },
"browser": { "$push": "$browser" },
"country": { "$first": "$country" }
}},
{ "$unwind": "$action" },
{ "$match": { "action": { "$ne": null } } },
{ "$group": {
"_id": "$_id",
"action": { "$push": "$action" },
"browser": { "$first": "$browser" },
"country": { "$first": "$country" }
}}
])
输出与键/值形式有点不同,但可以很容易地操纵成一个与您目前所做的相同的后处理。输入如下:
{ "action" : "change_password", "browser" : "ie", "country" : "US" }
{ "action" : "change_password", "browser" : "ie", "country" : "UK" }
{ "action" : "change_password", "browser" : "chrome", "country" : "AU" }
得到的结果如下:
{
"_id" : null,
"action" : [
{
"value" : "change_password",
"count" : 3
}
],
"browser" : [
{
"value" : "ie",
"count" : 2
},
{
"value" : "chrome",
"count" : 1
}
],
"country" : [
{
"value" : "US",
"count" : 1
},
{
"value" : "UK",
"count" : 1
},
{
"value" : "AU",
"count" : 1
}
]
}
所以你对mapReduce的输出有一点不同,但是任何mapReduce的输出也"不完全"是你想要的输出格式。在本机代码中实现,聚合框架运行得更快