$sort中的Mongo和索引问题



我问如何在mongo中启用$sort阶段的索引使用。因为它总是执行COLLSCAN,而一个简单的查询需要很长时间(在我的例子中是1'(才能执行。

例如:如果我运行db.data.distinct("doc.field"),它需要不到一秒钟的时间,但如果我运行

{$sort:{"doc.field":1}} //this should use index and ease the $group stage
,{$group:{"_id":"$doc.field", "lbl":1}}
,{$group:{"lbl":1, "out":{$push:"$_id"} }} //slow also without this last grouping stage

这相当于一分钟多的时间,我的数据库有大约5GiB的数据;稀疏的";以及";正常的";"文档字段"上的索引

第一个查询中的explain给出DISTINCT_SCAN并使用索引,第二个查询中使用COLLSCAN并不使用索引,即使根据文档,它应该使用索引。

我的问题是:

db.getCollection('data').aggregate( 
[
//{$match:{'events.hi2.ClientId':{$exists:false}}},
{$sort:{'events.hi2.ClientId':1}},
{
"$group": {
"_id": "$events.hi2.ClientId"
}
}]
,{allowDiskUse:true })

查询计划(带有allPlansExecution(是这样的:

{
"stages" : [ 
{
"$cursor" : {
"query" : {},
"fields" : {
"events.hi2.ClientId" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "db.data",
"indexFilterSet" : false,
"parsedQuery" : {},
"queryHash" : "8B3D4AB8",
"planCacheKey" : "8B3D4AB8",
"winningPlan" : {
"stage" : "COLLSCAN",
"direction" : "forward"
},
"rejectedPlans" : []
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 5147612,
"executionTimeMillis" : 54159,
"totalKeysExamined" : 0,
"totalDocsExamined" : 5147612,
"executionStages" : {
"stage" : "COLLSCAN",
"nReturned" : 5147612,
"executionTimeMillisEstimate" : 404,
"works" : 5147614,
"advanced" : 5147612,
"needTime" : 1,
"needYield" : 0,
"saveState" : 42826,
"restoreState" : 42826,
"isEOF" : 1,
"direction" : "forward",
"docsExamined" : 5147612
},
"allPlansExecution" : []
}
}
}, 
{
"$sort" : {
"sortKey" : {
"events.hi2.ClientId" : 1
}
}
}, 
{
"$group" : {
"_id" : "$events.hi2.ClientId"
}
}
],
"ok" : 1.0
}

另一个查询是:

db.data.distinct('events.hi2.ClientId');

它的查询计划(与allPlansExecution(是这样的:

{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "db.data",
"indexFilterSet" : false,
"parsedQuery" : {},
"queryHash" : "03E96EE9",
"planCacheKey" : "03E96EE9",
"winningPlan" : {
"stage" : "PROJECTION_DEFAULT",
"transformBy" : {
"_id" : 0,
"events.hi2.ClientId" : 1
},
"inputStage" : {
"stage" : "DISTINCT_SCAN",
"keyPattern" : {
"events.hi2.ClientId" : 1
},
"indexName" : "ClientId",
"isMultiKey" : true,
"multiKeyPaths" : {
"events.hi2.ClientId" : [ 
"events"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"events.hi2.ClientId" : [ 
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : []
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 201,
"executionTimeMillis" : 3,
"totalKeysExamined" : 201,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "PROJECTION_DEFAULT",
"nReturned" : 201,
"executionTimeMillisEstimate" : 0,
"works" : 202,
"advanced" : 201,
"needTime" : 0,
"needYield" : 0,
"saveState" : 1,
"restoreState" : 1,
"isEOF" : 1,
"transformBy" : {
"_id" : 0,
"events.hi2.ClientId" : 1
},
"inputStage" : {
"stage" : "DISTINCT_SCAN",
"nReturned" : 201,
"executionTimeMillisEstimate" : 0,
"works" : 202,
"advanced" : 201,
"needTime" : 0,
"needYield" : 0,
"saveState" : 1,
"restoreState" : 1,
"isEOF" : 1,
"keyPattern" : {
"events.hi2.ClientId" : 1
},
"indexName" : "ClientId",
"isMultiKey" : true,
"multiKeyPaths" : {
"events.hi2.ClientId" : [ 
"events"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"events.hi2.ClientId" : [ 
"[MinKey, MaxKey]"
]
},
"keysExamined" : 201
}
},
"allPlansExecution" : []
},

"ok" : 1.0
}

从MongoDBv4.2:开始

如果管道按同一字段进行排序和分组。。。CCD_ 8级可以使用索引。。。https://docs.mongodb.com/manual/reference/operator/aggregation/group/#optimization-返回每组的第一个文档

db.getCollection('data').explain("allPlansExecution").aggregate([
{"$sort":{'events.hi2.ClientId':1}}, // If you dont use `$first` operator,
// you don't need to sort at all
{
"$group": {
"_id": "$events.hi2.ClientId"
}
}
],{"allowDiskUse":true })

---输出---

{
"stages" : [ 
{
"$cursor" : {
"query" : {},
"sort" : {
"events.hi2.ClientId" : 1
},
"fields" : {
"events.hi2.ClientId" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.data",
"indexFilterSet" : false,
"parsedQuery" : {},
"queryHash" : "14D2D195",
"planCacheKey" : "14D2D195",
"winningPlan" : {
"stage" : "PROJECTION_DEFAULT",
"transformBy" : {
"events.hi2.ClientId" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "DISTINCT_SCAN",
"keyPattern" : {
"events.hi2.ClientId" : 1.0
},
"indexName" : "events.hi2.ClientId_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"events.hi2.ClientId" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"events.hi2.ClientId" : [ 
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : []
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 100000,
"executionTimeMillis" : 739,
"totalKeysExamined" : 100000,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "PROJECTION_DEFAULT",
"nReturned" : 100000,
"executionTimeMillisEstimate" : 28,
"works" : 100001,
"advanced" : 100000,
"needTime" : 0,
"needYield" : 0,
"saveState" : 799,
"restoreState" : 799,
"isEOF" : 1,
"transformBy" : {
"events.hi2.ClientId" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "DISTINCT_SCAN",
"nReturned" : 100000,
"executionTimeMillisEstimate" : 15,
"works" : 100001,
"advanced" : 100000,
"needTime" : 0,
"needYield" : 0,
"saveState" : 799,
"restoreState" : 799,
"isEOF" : 1,
"keyPattern" : {
"events.hi2.ClientId" : 1.0
},
"indexName" : "events.hi2.ClientId_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"events.hi2.ClientId" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"events.hi2.ClientId" : [ 
"[MinKey, MaxKey]"
]
},
"keysExamined" : 100000
}
},
"allPlansExecution" : []
}
}
}, 
{
"$groupByDistinctScan" : {
"newRoot" : {
"_id" : "$events.hi2.ClientId"
}
}
}
],
"serverInfo" : {
...
},
"ok" : 1.0
}

最新更新