Elasticsearch-简单请求和聚合请求(NEST)的不同结果订购



我有一个搜索页面,其中包含两个搜索结果类型:摘要结果和具体结果。

  • 摘要结果页面包含每个类别的前3个结果(最佳命中(
  • 混凝土结果页面包含选定类别的所有结果。

要获得摘要页面,我使用请求:

var searchDescriptor = new SearchDescriptor<ElasticType>();
searchDescriptor.Index("index_name")
    .Query(q =>
      q.MultiMatch(m => m
        .Fields(fs => fs
          .Field(f => f.Content1, 3)
          .Field(f => f.Content2, 2)
          .Field(f => f.Content3, 1))
        .Fuzziness(Fuzziness.EditDistance(1))
        .Query(query)
        .Boost(1.1)
        .Slop(2)
        .PrefixLength(1)
        .MaxExpansions(100)
        .Operator(Operator.Or)
        .MinimumShouldMatch(2)
        .FuzzyRewrite(RewriteMultiTerm.ConstantScoreBoolean)
        .TieBreaker(1.0)
        .CutoffFrequency(0.5)
        .Lenient()
        .ZeroTermsQuery(ZeroTermsQuery.All))
    && (q.Terms(t => t.Field(f => f.LanguageId).Terms(1)) || q.Terms(t => t.Field(f => f.LanguageId).Terms(0))))
    .Aggregations(a => a
      .Terms("category", tagd => tagd
        .Field(f => f.Category)
        .Size(10)
        .Aggregations(aggs => aggs.TopHits("top_tag_hits", t => t.Size(3)))))
    .FielddataFields(fs => fs
      .Field(p => p.Content1, 3)
      .Field(p => p.Content2, 2)
      .Field(p => p.Content3, 1));
var elasticResult = _elasticClient.Search<ElasticType>(_ => searchDescriptor);

我得到结果,例如

{
    "aggregations": {
        "category": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [{
                "key": "category1",
                "doc_count": 40,
                "top_tag_hits": {
                    "hits": {
                        "total": 40,
                        "max_score": 5.4,
                        "hits": [{
                            "_index": "...",
                            "_type": "...",
                            "_id": "...",
                            "_score": 5.4,
                            "_source": {
                                "id": 1
                            }
                        },
                        {
                            "_index": "...",
                            "_type": "...",
                            "_id": "...",
                            "_score": 4.3,
                            "_source": {
                                "id": 3 // FAIL!
                            }
                        },
                        {
                            "_index": "...",
                            "_type": "...",
                            "_id": "...",
                            "_score": 4.3,
                            "_source": {
                                "id": 2
                            }
                        }]
                    }
                }
            }]
        }
    }
}

所以我使用相同的 _score获得了很少的命中。

获得具体结果(按类别(,我使用请求:

var searchDescriptor = new SearchDescriptor<ElasticType>();
searchDescriptor.Index("index_name")
    .Size(perPage <= 0 ? 100 : perPage)
    .From(page * perPage)
    .Query(q => q
      .MultiMatch(m => m
         .Fields(fs => fs
           .Field(f => f.Content1, 3)
           .Field(f => f.Content2, 2)
           .Field(f => f.Content3, 1)
           .Field(f => f.Category))
         .Fuzziness(Fuzziness.EditDistance(1))
         .Query(searchRequest.Query)
         .Boost(1.1)
         .Slop(2)
         .PrefixLength(1)
         .MaxExpansions(100)
         .Operator(Operator.Or)
         .MinimumShouldMatch(2)
         .FuzzyRewrite(RewriteMultiTerm.ConstantScoreBoolean)
         .TieBreaker(1.0)
         .CutoffFrequency(0.5)
         .Lenient()
         .ZeroTermsQuery(ZeroTermsQuery.All))
      && q.Term(t => t.Field(f => f.Category).Value(searchRequest.Category))
      && (q.Terms(t => t.Field(f => f.LanguageId).Terms(1)) || q.Terms(t => t.Field(f => f.LanguageId).Terms(0))))
    .FielddataFields(fs => fs
      .Field(p => p.Content1, 3)
      .Field(p => p.Content2, 2)
      .Field(p => p.Content3, 1))
    .Aggregations(a => a
      .Terms("category", tagd => tagd
        .Field(f => f.Category)));

和这样的结果:

{
    "hits": {
        "total": 40,
        "max_score": 7.816723,
        "hits": [{
            "_index": "...",
            "_type": "...",
            "_id": "...",
            "_score": 7.816723,
            "_source": {
                "id": 1
            }
        },
        {
            "_index": "...",
            "_type": "...",
            "_id": "...",
            "_score": 6.514713,
            "_source": {
                "id": 2
            }
        },
        {
            "_index": "...",
            "_type": "...",
            "_id": "...",
            "_score": 6.514709,
            "_source": {
                "id": 3
            }
        }]
    }
}

等,在第二种情况下,对于特定类别,我以非常精确的方式获得_score,并且弹性可以轻松地正确对结果进行排序。但是在汇总的情况下,有相同的_score结果,在这种情况下,排序尚不清楚其工作原理。

有人可以将我带到正确的道路,如何解决这个问题?还是如何在结果中达到相同的顺序?也许我可以提高汇总结果的准确性?

我使用Elasticsearch Server版本" 5.3.0"和Nest Library版本" 5.0.0"。

更新:集合请求的本机查询:

{
    "fielddata_fields": [
        "content1^3",
        "content2^2",
        "content3^1"
    ],
    "aggs": {
        "category": {
            "terms": {
                "field": "category",
                "size": 10
            },
            "aggs": {
                "top_tag_hits": {
                    "top_hits": {
                        "size": 3
                    }
                }
            }
        }
    },
    "query": {
        "bool": {
            "must": [
                {
                    "multi_match": {
                        "boost": 1.1,
                        "query": "sparta",
                        "fuzzy_rewrite": "constant_score_boolean",
                        "fuzziness": 1,
                        "cutoff_frequency": 0.5,
                        "prefix_length": 1,
                        "max_expansions": 100,
                        "slop": 2,
                        "lenient": true,
                        "tie_breaker": 1.0,
                        "minimum_should_match": 2,
                        "operator": "or",
                        "fields": [
                            "content1^3",
                            "content2^2",
                            "content3^1"
                        ],
                        "zero_terms_query": "all"
                    }
                },
                {
                    "bool": {
                        "should": [
                            {
                                "terms": {
                                    "languageId": [
                                        1
                                    ]
                                }
                            },
                            {
                                "terms": {
                                    "languageId": [
                                        0
                                    ]
                                }
                            }
                        ]
                    }
                }
            ]
        }
    }
}

混凝土请求的本机查询:

{
    "from": 0,
    "size": 100,
    "fielddata_fields": [
        "content1^3",
        "content2^2",
        "content3^1"
    ],
    "aggs": {
        "category": {
            "terms": {
                "field": "category"
            }
        }
    },
    "query": {
        "bool": {
            "must": [
                {
                    "bool": {
                        "must": [
                            {
                                "multi_match": {
                                    "boost": 1.1,
                                    "query": ".....",
                                    "fuzzy_rewrite": "constant_score_boolean",
                                    "fuzziness": 1,
                                    "cutoff_frequency": 0.5,
                                    "prefix_length": 1,
                                    "max_expansions": 100,
                                    "slop": 2,
                                    "lenient": true,
                                    "tie_breaker": 1.0,
                                    "minimum_should_match": 2,
                                    "operator": "or",
                                    "fields": [
                                        "content1^3",
                                        "content2^2",
                                        "content3^1",
                                        "category"
                                    ],
                                    "zero_terms_query": "all"
                                }
                            },
                            {
                                "term": {
                                    "category": {
                                        "value": "category1"
                                    }
                                }
                            }
                        ]
                    }
                },
                {
                    "bool": {
                        "should": [
                            {
                                "terms": {
                                    "languageId": [
                                        1
                                    ]
                                }
                            },
                            {
                                "terms": {
                                    "languageId": [
                                        0
                                    ]
                                }
                            }
                        ]
                    }
                }
            ]
        }
    }
}

我也使用下一个映射来创建索引:

var descriptor = new CreateIndexDescriptor(indexName)
    .Mappings(ms => ms
     .Map<ElasticType>(m => m
       .Properties(ps => ps
         .Keyword(s => s.Name(ecp => ecp.Title))
         .Text(s => s.Name(ecp => ecp.Content1))
         .Text(s => s.Name(ecp => ecp.Content2))
         .Text(s => s.Name(ecp => ecp.Content3))
         .Date(s => s.Name(ecp => ecp.Date))
         .Number(s => s.Name(ecp => ecp.LanguageId).Type(NumberType.Integer))
         .Keyword(s => s.Name(ecp => ecp.Category))
         .Text(s => s.Name(ecp => ecp.PreviewImageUrl).Index(false))
         .Text(s => s.Name(ecp => ecp.OptionalContent).Index(false))
         .Text(s => s.Name(ecp => ecp.Url).Index(false)))));
    _elasticClient.CreateIndex(indexName, _ => descriptor);

您的查询有问题。

  1. 您使用的是mustmustshould的组合,作为bool查询的一部分。

    因此,如果您在此链接中阅读了更多信息,则可以看到must

    该子句(查询(必须出现在匹配的文档中,并将为分数做出贡献。

    因此,它将与与条件相匹配的所有文档进行五个相等的评分。任何与条件不匹配的其他条件甚至都不会在结果中得分。

    您应该做的是使用should查询,但在must查询之外,因此Elasticsearch将能够正确评分您的文档

  2. 以获取更多信息作为此问题的一部分

    有人可以将我带到正确的道路如何解决这个问题?

    您应该在查询中传递'explain': true。您可以在此链接中阅读有关解释查询以及如何解释结果的更多信息。

  3. 您回答这个问题是

    如何在结果中达到相同的顺序?

    由于每个分数都是相同的,因此elasticsearch可以以任何方式从其节点中获取响应。

可能的解决方案:

您应该重新组织查询,以真正利用should查询及其提升功能。您可以在此处阅读有关提升的更多信息。

我尝试了两个与您的查询相似的查询,但使用了should的正确使用,他们给了我与预期的订单相同的订单。您的两个查询应如下:

{
  "from": 0,
  "size": 10,
  "_source": [
    "content1^3",
    "content2^2",
    "content3^1"
  ],
  "query": {
    "bool": {
      "should": [
        {
          "match": {
            "languageId": 1
          }
        },
        {
          "match": {
            "languageId": 0
          }
        }
      ],
      "must": [
        {
          "multi_match": {
            "boost": 1.1,
            "query": ".....",
            "fuzzy_rewrite": "constant_score_boolean",
            "fuzziness": 1,
            "cutoff_frequency": 0.5,
            "prefix_length": 1,
            "max_expansions": 100,
            "slop": 2,
            "lenient": true,
            "tie_breaker": 1,
            "minimum_should_match": 2,
            "operator": "or",
            "fields": [
              "content1^3",
              "content2^2",
              "content3^1",
              "category"
            ],
            "zero_terms_query": "all"
          }
        }
      ]
    }
  }
}

和第二个查询为

{
  "size": 0,
  "query": {
    "bool": {
      "should": [
        {
          "match": {
            "languageId": 1
          }
        },
        {
          "match": {
            "languageId": 0
          }
        }
      ],
      "must": [
        {
          "multi_match": {
            "boost": 1.1,
            "query": ".....",
            "fuzzy_rewrite": "constant_score_boolean",
            "fuzziness": 1,
            "cutoff_frequency": 0.5,
            "prefix_length": 1,
            "max_expansions": 100,
            "slop": 2,
            "lenient": true,
            "tie_breaker": 1,
            "minimum_should_match": 2,
            "operator": "or",
            "fields": [
              "content1^3",
              "content2^2",
              "content3^1",
              "category"
            ],
            "zero_terms_query": "all"
          }
        }
      ]
    }
  },
  "aggs": {
    "categories": {
      "terms": {
        "field": "category",
        "size": 10
      },
      "aggs": {
        "produdtcs": {
          "top_hits": {
            "_source": [
              "content1^3",
              "content2^2",
              "content3^1"
            ],
            "size": 3
          }
        }
      }
    }
  }
}

最新更新