如何提高MongoDB的组和累加器阶段聚合?



下面的查询:

  • 用match
  • 匹配选定的文档
  • 按月的天数分组,然后使用累加器向返回的数据集添加url和计数

问题是查询非常复杂,并且随着数据量的增长,这似乎真的不是很好的性能。在mongodb中是否有更简单的方法来实现同样的事情?

输出形状如下:

{
"results": [
{
"_id": {
"month": 2,
"day": 2,
"year": 2021
},
"urls": [
{
"url": "https://shop.mydomain.com/product/golden-axe",
"count": 20
},
{
"url": "https://shop.mydomain.com/product/phantasy-star",
"count": 218
},
{
"url": "https://shop.mydomain.com/product/sega-arcades-retro",
"count": 30
}
],
"count": 268
},
{
"_id": {
"month": 2,
"day": 3,
"year": 2021
},
"urls": [
{
"url": "https://shop.mydomain.com/product/golden-axe",
"count": 109
},
{
"url": "https://shop.mydomain.com/product/phantasy-star",
"count": 416
},
{
"url": "https://shop.mydomain.com/product/sega-arcades-retro",
"count": 109
}
],
"count": 634
},
const aggregate = [
{
$match: {
source: 'itemLineView',
createdAt: {
$gte: new Date(query.dateGT),
$lte: new Date(query.dateLT)
},
url: { $regex: `^${query.url}` },
}
},
{
$group: {
_id: {
month: {
$month: '$createdAt'
},
day: {
$dayOfMonth: '$createdAt'
},
year: {
$year: '$createdAt'
}
},
urls: {
// https://docs.mongodb.com/manual/reference/operator/aggregation/accumulator/#grp._S_accumulator
$accumulator: {
init: function (): AccumulatorSourceStats {
return {
origins: []
};
},
// initArgs: [], // Argument arr to pass to the init function
accumulate: function (state: AccumulatorSourceStats, url: string) {
const index = state.origins.findIndex(function (origin) {
return origin.url === url;
});
if (index === -1) {
state.origins.push({
url: url,
count: 1
});
} else {
++state.origins[index].count;
}
return state;
},
accumulateArgs: ['$url'], // Argument(s) passed to the accumulate function
merge: function (state1: AccumulatorSourceStats, state2: AccumulatorSourceStats) {
return {
origins: state1.origins.concat(state2.origins)
};
},
finalize: function (state: AccumulatorSourceStats) { // Adjust the state to only return field we need
const sortByUrl = function (a: AccumulatorSourceStatsOrigin, b: AccumulatorSourceStatsOrigin) {
if (a.url < b.url) {
return -1;
}
if (a.url > b.url) {
return 1;
}
return 0;
};
return state.origins.sort(sortByUrl);
},
lang: 'js'
}
},
count: { $sum: 1 }
}
},
{ $sort: { _id: 1 } }
];
return this.model.aggregate(aggregate);

From the docs:

在聚合操作符内执行JavaScript可能会降低性能。只有当提供的管道操作符不能满足应用程序的需要时,才使用$accumulator操作符。

避免在Mongo管道中使用javascript代码被认为是一种良好的实践,这应该仅作为最后的手段使用。在这种情况下,我们可以通过两次$group来避免使用它,每个url每天一次,然后每天一次。像这样:

db.collection.aggregate([
{
$match: {
source: "itemLineView",
createdAt: {
$gte: new Date(query.dateGT),
$lte: new Date(query.dateLT)
},
url: {
$regex: `^${query.url}`
},

}
},
{
$group: {
_id: {
month: {
$month: "$createdAt"
},
day: {
$dayOfMonth: "$createdAt"
},
year: {
$year: "$createdAt"
},
url: "$url"
},
count: {
$sum: 1
}
}
},
{
$group: {
_id: {
month: "$_id.month",
day: "$_id.day",
year: "$_id.year",

},
urls: {
$push: {
url: "$_id.url",
count: "$count"
}
},
count: {
$sum: "$count"
}
}
}
])

Mongo操场

最新更新