按多个范围查询文档计数,返回具有匹配元素计数的范围开始/结束



我一直在尝试对以下文档创建查询:

[
{
"timestamp": new ISODate('2020-01-01T00:00:00'),
"objectId": "Id_A",
"locationId": "Location_A"
},
{
"timestamp": new ISODate('2021-01-01T00:00:00'),
"objectId": "Id_A",
"locationId": "Location_A"
},
{
"timestamp": new ISODate('2022-01-01T00:00:00'),
"objectId": "Id_A",
"locationId": "Location_B"
},
{
"timestamp": new ISODate('2021-01-01T00:00:00'),
"objectId": "Id_B",
"locationId": "Location_B"
},
{
"timestamp": new ISODate('2022-01-01T00:00:00'),
"objectId": "Id_A",
"locationId": "Location_A"
}
]

给定倍数";以及";查询,我想计算每个范围的匹配文档

[$or: [
{ $and: [{
"timestamp": {$gte: new ISODate('2020-01-01T00:00:00'),
$lt: new ISODate('2020-12-31T00:00:00'),
},
"objectId": "Id_A",
"locationId": "Location_A"}]},
{ $and: [{
"timestamp": {$gte: new ISODate('2020-01-01T00:00:00'),
$lt: new ISODate('2022-12-31T00:00:00'),
},
"objectId": "Id_A",
"locationId": "Location_A"}]},
{ $and: [{
"timestamp": {$gte: new ISODate('2022-01-01T00:00:00'),
$lt: new ISODate('2022-12-31T00:00:00'),
},
"objectId": "Id_A",
"locationId": "Location_B"}]}
]
]

我想将计数映射到一个看起来像的结果结构

[
{"objectId": "Id_A", "locationId": "Location_A", "rangeStart:": new ISODate('2020-01-01T00:00:00'), "rangeEnd": new ISODate('2020-12-31T00:00:00'), "count": 1},
{"objectId": "Id_A", "locationId": "Location_A", "rangeStart:": new ISODate('2020-01-01T00:00:00'), "rangeEnd": new ISODate('2022-12-31T00:00:00'), "count": 3},
{"objectId": "Id_A", "locationId": "Location_B", "rangeStart:": new ISODate('2022-01-01T00:00:00'), "rangeEnd": new ISODate('2022-12-31T00:00:00'), "count": 1},
]

到目前为止我查看的汇总:

  • 铲斗
  • 刻面

但我还是想不通。你的方法是什么?

更新1rangeKeys没有正确分配给我从@Takis建议中得出的解决方案:

我得到以下结果:

{ 
"_id" : {
"objectId" : "objectA", 
"locationId" : "locationA", 
"rangeKey" : "UUID2", 
"count" : 1.0
}
}

而我期待以下结果:

{ 
"_id" : {
"objectId" : "objectA", 
"locationId" : "locationA", 
"rangeKey" : "UUID1",
"count" : 1.0
}
}, {

"_id" : {
"objectId" : "objectB", 
"locationId" : "locationA", 
"rangeKey" : "UUID2",
"count" : 0.0
}
}

这是我构建的查询

db.createCollection("object_location_tracking")
db.getCollection("object_location_tracking").insertMany([
{
_id: "1",
locationId: "locationA",
objectId: "objectA",
timestamp: ISODate("2020-01-01T00:00:00Z")
},
{
_id: "2",
locationId: "locationB",
objectId: "objectA",
timestamp: ISODate("2020-01-01T00:00:00Z")
},
{
_id: "3",
locationId: "locationA",
objectId: "objectB",
timestamp: ISODate("2019-01-01T00:00:00Z")
},
{
_id: "4",
locationId: "locationB",
objectId: "objectB",
timestamp: ISODate("2020-01-01T00:00:00Z")
}
]);
db.getCollection("object_location_tracking").aggregate(
[
{ 
"$match" : { 
"locationId" : "locationA", 
"$or" : [
{ 
"$and" : [
{ 
"objectId" : "objectA"
}, 
{ 
"timestamp" : { 
"$gte" : ISODate("2020-01-01T00:00:00.000+0000")
}
}, 
{ 
"timestamp" : { 
"$lt" : ISODate("2022-01-01T00:00:00.000+0000")
}
}
]
}, 
{ 
"$and" : [
{ 
"objectId" : "objectB"
}, 
{ 
"timestamp" : { 
"$gte" : ISODate("2020-01-01T00:00:00.000+0000")
}
}, 
{ 
"timestamp" : { 
"$lt" : ISODate("2022-01-01T00:00:00.000+0000")
}
}
]
}
]
}
}, 
{ 
"$group" : { 
"_id" : { 
"objectId" : "$objectId", 
"locationId" : "$locationId", 
"rangeKey" : { 
"$switch" : { 
"branches" : [
{ 
"case" : { 
"$and" : [
{ 
"$gte" : [
"$timestamp", 
ISODate("2020-01-01T00:00:00.000+0000")
]
}, 
{ 
"$lt" : [
"$timestamp", 
ISODate("2022-01-01T00:00:00.000+0000")
]
}, 
{ 
"objectId" : "objectB"
}, 
{ 
"locationId" : "locationA"
}
]
}, 
"then" : "UUDI2"
}, 
{ 
"case" : { 
"$and" : [
{ 
"$gte" : [
"$timestamp", 
ISODate("2020-01-01T00:00:00.000+0000")
]
}, 
{ 
"$lt" : [
"$timestamp", 
ISODate("2022-01-01T00:00:00.000+0000")
]
}, 
{ 
"objectId" : "objectA"
}, 
{ 
"locationId" : "locationA"
}
]
}, 
"then" : "UUID2"
}
], 
"default" : "0"
}
}, 
"count" : { 
"$sum" : 1.0
}
}
}
}
], 
{ 
"allowDiskUse" : true
}
);

正如评论中所暗示的那样,$facet将发挥作用。请注意,为了简单起见,ISODate使用了仅限年份的构造函数。$project$unwind不是绝对必要的,因为它们只是每个OP的格式化便利。$facet中只有一个文档将被转换为三个,并且对于范围只有三个离散文档,因此不会影响性能。

db.foo.aggregate([
{$facet: {
"first_bucket": [
{$match: {"objectId":"Id_A",
"locationId":"Location_A",
"timestamp": {$gte: new ISODate('2020-01-01'),
$lt: new ISODate('2020-12-31')}
}},
{$count: "N"}
],
"second_bucket": [
{$match: {"objectId":"Id_A",
"locationId":"Location_A",
"timestamp": {$gte: new ISODate('2020-01-01'),
$lt: new ISODate('2022-12-31')}
}},
{$count: "N"}
],
"third_bucket": [
{$match: {"objectId":"Id_A",
"locationId":"Location_B",
"timestamp": {$gte: new ISODate('2022-01-01'),
$lt: new ISODate('2022-12-31')}
}},
{$count: "N"}
]
}},
{$project: {X: [
{"objectId":"Id_A",
"locationId":"Location_A",
"rangeStart": new ISODate('2020-01-01'),
"rangeEnd": new ISODate('2020-12-31'),
"count": {$first: '$first_bucket.N'}
},
{"objectId":"Id_A",
"locationId":"Location_A",
"rangeStart": new ISODate('2020-01-01'),
"rangeEnd": new ISODate('2022-12-31'),
"count": {$first: '$second_bucket.N'}
},
{"objectId":"Id_A",
"locationId":"Location_B",
"rangeStart": new ISODate('2022-01-01'),
"rangeEnd": new ISODate('2022-12-31'),
"count": {$first: '$third_bucket.N'}
}
]
}},
{$unwind: '$X'},
{$replaceRoot: {newRoot: '$X'}}
]);

更新

CCD_ 6是在v>4.4.为了使该解决方案起作用,v<4.4,将$project中的count表达从$first改为:

"count": {$arrayElemAt:['$the_bucket.N',0]}

一个稍微有趣的变化是在$facet表达中使用$group。这将在bucket中生成更多条目,但其优点是只对日期范围进行硬编码。

db.foo.aggregate([
{$facet: {
"first_bucket": [
{$match: {"timestamp": {$gte: new ISODate('2020-01-01'),
$lt: new ISODate('2020-12-31')}
}},
{$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
N: {$sum:1}}}
],
"second_bucket": [
{$match: {"timestamp": {$gte: new ISODate('2020-01-01'),
$lt: new ISODate('2022-12-31')}
}},
{$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
N: {$sum:1}}}
],
"third_bucket": [
{$match: {"timestamp": {$gte: new ISODate('2022-01-01'),
$lt: new ISODate('2022-12-31')}
}},
{$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
N: {$sum:1}}}
]
}}
]);

查询

  • 您可以添加一个$match作为第一阶段,以仅保留有效范围(这也可以使用索引(
  • objectId、locationId和条件范围上的组
  • 我没有测试下面的查询,因为我没有样本数据,如果不起作用,你会被卡住,如果你可以添加样本数据和预期输出

*$facet可以使用,但facet有这些问题(测试它,看看什么对您的查询更好(

  • 不使用索引(即使匹配是第一阶段(
  • 多次运行管道,每个字段1次
aggregate(
[{"$group":
{"_id":
{"objectId":"$objectId",
"locationId":"$locationId",
"range":
{"$switch":
{"branches":
[{"case":
{"$and":
[{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
{"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
"then":
{"rangeStart":ISODate("2020-01-01T00:00:00Z"),
"rangeEnd":ISODate("2020-12-31T00:00:00Z")}},
{"case":
{"$and":
[{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
{"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
"then":
{"rangeStart":ISODate("2020-01-01T00:00:00Z"),
"rangeEnd":ISODate("2020-12-31T00:00:00Z")}},
{"case":
{"$and":
[{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
{"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
"then":
{"rangeStart":ISODate("2020-01-01T00:00:00Z"),
"rangeEnd":ISODate("2020-12-31T00:00:00Z")}}],
"default":"out-of-range"}}},
"count":{"$sum":1}}},
{"$project":
{"_id":0,
"count":1,
"objectId":"$_id.objectId",
"locationId":"$_id.locationId",
"range":"$_id.range"}}])

最新更新