我有这个示例项目集合:
{
"_id": "1",
"field1": "value1",
"field2": "value2",
"category": "phones",
"user": "1",
"tags": [
"tag1",
"tag3"
]
},
{
"_id": "2",
"field1": "value1",
"field2": "value2",
"category": "phones",
"user": "1",
"tags": [
"tag2",
"tag3"
]
},
{
"_id": "3",
"field1": "value1",
"field2": "value2",
"category": "bikes",
"user": "1",
"tags": [
"tag3",
"tag4"
]
},
{
"_id": "4",
"field1": "value1",
"field2": "value2",
"category": "cars",
"user": "2",
"tags": [
"tag1",
"tag2"
]
}
我想搜索由特定用户(即用户:1)创建的项目,并按类别字段显示它们。结果:
{
"phones": [
{
"_id": "1",
"field1": "value1",
"field2": "value2",
"tags": [
"tag1",
"tag3"
]
},
{
"_id": "2",
"field1": "value1",
"field2": "value2",
"tags": [
"tag2",
"tag3"
]
}
],
"bikes" : [
{
"_id": "3",
"field1": "value1",
"field2": "value2",
"tags": [
"tag3",
"tag4"
]
}
]
}
是否可以获得具有聚合组函数的此方案?感谢
可以按类别进行分组,但不能按呈现方式进行分组。这真的是一件好事,因为您的"类别"实际上是数据,您不应该在存储或输出中将"数据"表示为"键"。
因此,建议您这样转换:
db.collection.aggregate([
{ "$match": { "user": 1 } },
{ "$group": {
"_id": "$category",
"items": {
"$push": {
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
}
}
}},
{ "$group": {
"_id": null,
"categories": {
"$push": {
"_id": "$_id",
"items": "$items"
}
}
}}
])
你会得到这样的输出:
{
"_id" : null,
"categories" : [
{
"_id" : "bikes",
"items" : [
{
"_id": 3,
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag3",
"tag4"
]
}
]
},
{
"_id" : "phones",
"items" : [
{
"_id": 1,
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag1",
"tag3"
]
},
{
"_id": 2,
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag2",
"tag3"
]
}
]
}
]
}
实际上,最好是具有不随数据变化而变化的通用键名称。这实际上就是面向对象的模式。
如果你真的认为你需要这里的"数据作为密钥",对于聚合框架,你要么知道你期望的"类别",要么准备生成管道阶段:
db.utest.aggregate([
{ "$match": { "user": "1" } },
{ "$group": {
"_id": null,
"phones": {
"$push": {
"$cond": [
{ "$eq": ["$category","phones"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
},
"bikes": {
"$push": {
"$cond": [
{ "$eq": ["$category","bikes"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
}
}},
{ "$unwind": "$phones" },
{ "$match": { "phones": { "$ne": false } }},
{ "$group": {
"_id": "$_id",
"phones": { "$push": "$phones" },
"bikes": { "$first": "$bikes" }
}},
{ "$unwind": "$bikes" },
{ "$match": { "bikes": { "$ne": false } }},
{ "$group": {
"_id": "$_id",
"phones": { "$first": "$phones" },
"bikes": { "$push": "$bikes" }
}},
{ "$project": {
"_id": 0,
"phones": 1,
"bikes": 1
}}
])
您可以使用MongoDB 2.6将其缩短一点,因为您可以使用$setDifference
运算符过滤掉false
值
db.collection.aggregate([
{ "$match": { "user": "1" } },
{ "$group": {
"_id": null,
"phones": {
"$push": {
"$cond": [
{ "$eq": ["$category","phones"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
},
"bikes": {
"$push": {
"$cond": [
{ "$eq": ["$category","bikes"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
}
}},
{ "$project": {
"_id": 0,
"phones": { "$setDifference": ["$phones",[false]] },
"bikes": { "$setDifference": ["$bikes",[false]] }
}}
])
两者都能按照您的意愿产生输出:
{
"phones" : [
{
"_id" : "1",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag1",
"tag3"
]
},
{
"_id" : "2",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag2",
"tag3"
]
}
],
"bikes" : [
{
"_id" : "3",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag3",
"tag4"
]
}
]
}
这里的一般情况是,聚合框架不允许字段数据用作键,因此您需要仅对数据进行分组,或者自己指定键名。
获得"动态"密钥名称的唯一方法是使用mapReduce:
db.collection.mapReduce(
function () {
var obj = { };
var category = this.category;
delete this.user;
delete this.category;
obj[category] = [this];
emit(null,obj);
},
function (key,values) {
var reduced = {};
values.forEach(function(value) {
Object.keys(value).forEach(function(key) {
if ( !reduced.hasOwnProperty(key) )
reduced[key] = [];
value[key].forEach(function(item) {
reduced[key].push(item);
});
});
});
return reduced;
},
{
"query": { "user": "1" },
"out": { "inline": 1 }
}
)
因此,现在密钥生成是动态的,但输出是以一种非常mapReduce的方式完成的:
{
"_id" : null,
"value" : {
"phones" : [
{
"_id" : "1",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag1",
"tag3"
]
},
{
"_id" : "2",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag2",
"tag3"
]
}
],
"bikes" : [
{
"_id" : "3",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag3",
"tag4"
]
}
]
}
}
因此,输出受到mapReduce如何引导输出的限制,并且此处的JavaScript评估速度将比聚合框架的本地操作慢。操纵权更大,但这是一种权衡。
总之,如果您坚持这种模式,那么使用聚合框架的第一种方法是最快、最好的方法,此外,您还可以在从服务器返回后重新构建结果。如果你坚持打破这种模式,并且需要来自服务器的动态密钥,那么mapReduce会在其他聚合框架被认为不切实际的地方这样做。