Elasticsearch的行为与多个must_not条件不一致



我正在研究基于NodeJ的API,它是用Elasticsearch和NodeJ构建的。我必须提供一个功能,API用户可以根据分类术语排除某些内容。

自由形式:多值字段-类型:字符串

游戏:多值字段-类型:字符串

玩家:多值字段-类型:字符串

团队:多值字段-类型:字符串

编写器:多值字段-类型:字符串

通道:多值字段-类型:字符串

API网址的格式与下面类似。https://api.test.net/2/xxxxx/article/?exclude=term1+term2

以上应返回未标记为term1和term2的内容。

假设我们有3篇文章,第一篇文章被标记为term1,第二篇文章被标签为term2,第三篇文章被添加为term1和term2。

预期结果:https://api.test.net/2/xxxxx/article/?exclude=term1+term2应首先返回文章(标记到term1(,第二篇文章(标记为term2(,而不是第三篇文章(标签到term1和term2(

实际结果:有时https://api.test.net/2/xxxxx/article/?exclude=term1+term2返回所有三篇文章,有时返回第一篇(标记到term1(和第二篇(标记为term2(。

更新:下面是正确的文档,我也添加了查询,但我仍然看到不一致。

文档:

{
"team":{
"mappings":{
"article":{
"properties":{
"body":{
"properties":{
"content":{
"type":"string"
},
"html":{
"type":"string"
},
"type":{
"type":"string"
}
}
},
"brand":{
"type":"string"
},
"changed":{
"type":"date",
"format":"dateOptionalTime"
},
"changedUtc":{
"type":"long"
},
"content":{
"type":"string"
},
"created":{
"type":"date",
"format":"dateOptionalTime"
},
"createdUtc":{
"type":"long"
},
"domain":{
"type":"string"
},
"headline":{
"type":"string"
},
"langcode":{
"type":"string",
"index":"not_analyzed"
},
"nid":{
"type":"string"
},
"published":{
"type":"date",
"format":"dateOptionalTime"
},
"publishedUtc":{
"type":"long"
},
"raw":{
"type":"string",
"index":"no"
},
"revisionTimestamp":{
"type":"date",
"format":"dateOptionalTime"
},
"revisionTimestampUtc":{
"type":"long"
},
"status":{
"type":"string"
},
"subheadline":{
"type":"string"
},
"syndication":{
"properties":{
"brand":{
"type":"string"
},
"options":{
"properties":{
"actionFrom":{
"type":"string"
},
"publish":{
"type":"string"
},
"status":{
"type":"string"
}
}
},
"type":{
"type":"string"
}
}
},
"taxonomy":{
"properties":{
"coaches":{
"properties":{
"value":{
"type":"string"
}
}
},
"freeform":{
"properties":{
"value":{
"type":"string"
}
}
},
"games":{
"properties":{
"id":{
"type":"string"
},
"value":{
"type":"string"
}
}
},
"players":{
"properties":{
"id":{
"type":"string"
},
"value":{
"type":"string"
}
}
},
"section":{
"properties":{
"value":{
"type":"string"
}
}
},
"teams":{
"properties":{
"city":{
"type":"string"
},
"id":{
"type":"string"
},
"nickname":{
"type":"string"
},
"tricode":{
"type":"string"
},
"urlName":{
"type":"string"
},
"value":{
"type":"string"
}
}
},
"writer":{
"properties":{
"emailAddress":{
"type":"string"
},
"id":{
"type":"string"
},
"responsive_web":{
"type":"string"
},
"title":{
"type":"string"
},
"value":{
"type":"string"
}
}
}
}
},
"teaser":{
"type":"string"
},
"title":{
"type":"string"
},
"type":{
"type":"string"
},
"url":{
"type":"string",
"index":"not_analyzed"
},
"uuid":{
"type":"string",
"index":"not_analyzed"
},
"vid":{
"type":"string"
}
}
}
}
}
}

查询:

{
"filter":{
"bool":{
"should":[
[
{
"term":{
"status":1
}
}
]
],
"must":[
]
}
},
"from":0,
"size":10,
"_source":{
"include":[
"uuid",
"nid",
"type",
"title",
"headline",
"shortHeadline",
"teaser",
"url",
"published",
"changed",
"subheadline",
"listImage",
"brand",
"videoId",
"videoSource",
"duration",
"taxonomy",
"includeTerms",
"excludeTerms",
"media",
"credit",
"caption",
"description",
"videoCaptions",
"franchiseId",
"showAirTime",
"slateImage",
"mainImage",
"thumbnailImage",
"showTitle",
"status",
"pubstatus"
]
},
"query":{
"bool":{
"must":[
{
"query_string":{
"query":"langcode:"en""
}
},
{
"bool":{
"must_not":{
"query_string":{
"query":"(taxonomy.freeform.value:"sociosqu" AND taxonomy.freeform.value:"nullam") OR (taxonomy.games.id:"sociosqu" AND taxonomy.games.id:"nullam") OR (taxonomy.players.id:"sociosqu" AND taxonomy.players.id:"nullam") OR (taxonomy.teams.id:"sociosqu" AND taxonomy.teams.id:"nullam") OR (taxonomy.writer.value:"sociosqu" AND taxonomy.writer.value:"nullam") OR (taxonomy.channels.value:"sociosqu" AND taxonomy.channels.value:"nullam") OR (taxonomy.section.value:"sociosqu" AND taxonomy.section.value:"nullam") "
}
}
}
}
]
}
},
"sort":[
{
"publishedUtc":{
"order":"desc"
}
},
{
"_score":{
"order":"desc"
}
}
]
}

我假设您有类似的映射:

{
"mappings": {
"_doc": {
"properties": {
"taxonomy": {
"properties": {
"game": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"team": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
}

所需查询为:

{
"query": {
"bool": {
"filter": [
{
"query_string": {
"query": "(t1 OR t2) NOT (t1 AND t2)",
"fields": [
"taxonomy.game.keyword",
"taxonomy.team.keyword"
]
}
}
]
}
}
}

根据您提供的文件,您的字段列表将为:

"fields":[
"taxonomy.teams.id",
"taxonomy.freeform.value",
"taxonomy.games.id",
"taxonomy.players.id",
"taxonomy.writer.value",
"taxonomy.coaches.value"
]

请注意,我假设idvaluekeyword类型。如果不是,请更改映射。此外,如果映射是动态创建的,则在idvalue下将有一个子字段名称keyword,其方式类似于我使用的在gameteam下具有keyword字段的示例映射。

以上适用于弹性搜索6.x

根据添加到问题的查询进行更新:

查询中的must_not部分在逻辑上应该是:

(f1=t1 AND f1=t2) OR (f2=t1 AND f2=t2) OR (f3=t1 AND f3=t2) ....

其中fi表示字段,ti表示项。

所以must_not部分应该是:

{
"must_not": {
"query_string": {
"query": "(taxonomy.freeform.value:"sociosqu" AND taxonomy.freeform.value:"nullam") OR (taxonomy.games.id:"sociosqu" AND taxonomy.games.id:"nullam") OR (taxonomy.players.id:"sociosqu" AND taxonomy.players.id:"nullam") OR (taxonomy.teams.id:"sociosqu" AND taxonomy.teams.id:"nullam") OR (taxonomy.writer.value:"sociosqu" AND taxonomy.writer.value:"nullam") OR (taxonomy.channels.value:"sociosqu" AND taxonomy.channels.value:"nullam") OR (taxonomy.section.value:"sociosqu" AND taxonomy.section.value:"nullam") "
}
}
}

相关内容

最新更新