如何在弹性搜索中使用一个键返回多个值的GROUP BY查询



我们有一个用户索引,其中包含弹性搜索中具有以下结构的用户。我们正在尝试提取技能数组中的技能对象。

{
"first_name":"xxxx",
"last_name":"xxxx",
"skills":[
{
"years_of_experience": 1,
"id": "1",
"skill": { 
"id":"1",
"label":"java",
"category":"Coding"
}
}, 
{
"years_of_experience": 2,
"id": "2",
"skill":{ 
"id":"2",
"label":"Python",
"category":"Coding"
}
}, 
{
"years_of_experience": 1,
"id": "1",
"skill": { 
"id":"3",
"label":"Wix",
"category":"CMS"
}
}
]
}

我如何返回所有不同的标签及其id和类别

我尝试过这个查询:

{
"from":0,
"size":0,
"aggs": {
"by_code": {
"terms": {
"field": "skills.skill.label.keyword"
}
}
}
}

但它只返回标签和文档计数:

{
"key": "Data Factory",
"doc_count": 2
},
{
"key": "Databricks",
"doc_count": 2
},
{
"key": "Pyspark",
"doc_count": 2
},
{
"key": "AWS",
"doc_count": 1
},
{
"key": "Wix",
"doc_count": 1
}

您需要使用嵌套聚合以及术语和热门聚合,以实现所需的结果

添加一个具有索引映射、数据、搜索查询和搜索结果的工作示例。

索引映射:

{
"mappings": {
"properties": {
"skills": {
"type": "nested",
"properties": {
"skill": {
"type": "nested"
}
}
}
}
}
}

指数数据:

{
"first_name": "xxxx",
"last_name": "xxxx",
"skills": [
{
"years_of_experience": 1,
"id": "1",
"skill": {
"id": "1",
"label": "java",
"category": "Coding"
}
},
{
"years_of_experience": 2,
"id": "2",
"skill": {
"id": "2",
"label": "Python",
"category": "Coding"
}
},
{
"years_of_experience": 1,
"id": "1",
"skill": {
"id": "3",
"label": "Wix",
"category": "CMS"
}
}
]
}

搜索查询:

{
"size": 0,
"aggs": {
"nested_skills": {
"nested": {
"path": "skills.skill"
},
"aggs": {
"by_code": {
"terms": {
"field": "skills.skill.label.keyword",
"size": 10
},
"aggs": {
"top_skills_hits": {
"top_hits": {
"size": 1,
"_source": {
"includes": [
"skills.skill.id",
"skills.skill.category"
]
}
}
}
}
}
}
}
}
}

搜索结果:

"aggregations": {
"nested_skills": {
"doc_count": 3,
"by_code": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Python",     // note this
"doc_count": 1,
"top_skills_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65544482",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "skills",
"offset": 1,
"_nested": {
"field": "skill",
"offset": 0
}
},
"_score": 1.0,
"_source": {
"id": "2",                  // note this
"category": "Coding"
}
}
]
}
}
},
{
"key": "Wix",
"doc_count": 1,
"top_skills_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65544482",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "skills",
"offset": 2,
"_nested": {
"field": "skill",
"offset": 0
}
},
"_score": 1.0,
"_source": {
"id": "3",
"category": "CMS"
}
}
]
}
}
},
{
"key": "java",
"doc_count": 1,
"top_skills_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65544482",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "skills",
"offset": 0,
"_nested": {
"field": "skill",
"offset": 0
}
},
"_score": 1.0,
"_source": {
"id": "1",
"category": "Coding"
}
}
]
}
}
}
]
}
}
}

最新更新