需要在弹性搜索中的单词之间搜索请建议我如何处理



嗨,我正在寻找搜索功能,我们需要在弹性搜索中的单词之间进行搜索。Ryt现在我们的搜索工作就像如果我们想搜索"公司名称",我们需要用"c"、"n"、"co"、"comp"、"na"、"nam"搜索,但req是如果我们用"mp"、"any"、"ame"、"me"、"p"搜索,结果应该是"公司名称"。请建议我们如何处理这一问题是否有任何此类搜索功能,我尝试了通配符,但它不适用于多个字段。如果我遗漏了任何内容,请建议我或建议我如何实现。

您可以使用N-gram标记器,它首先将文本分解为每当遇到指定字符列表中的一个时,则它发出指定长度的每个字的N克。

添加一个具有索引数据、映射、搜索查询和结果的工作示例。

索引映射:

{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "my_tokenizer"
}
},
"tokenizer": {
"my_tokenizer": {
"type": "ngram",
"min_gram": 1,
"max_gram": 20,
"token_chars": [
"letter",
"digit"
]
}
}
},
"max_ngram_diff": 50
},
"mappings": {
"properties": {
"body": {
"type": "text",
"analyzer": "my_analyzer"
}
}
}
}

分析API

GET/_analzye
{
"analyzer" : "my_analyzer",
"text" : "company name"
}

生成以下令牌

{
"tokens": [
{
"token": "c",
"start_offset": 0,
"end_offset": 1,
"type": "word",
"position": 0
},
{
"token": "co",
"start_offset": 0,
"end_offset": 2,
"type": "word",
"position": 1
},
{
"token": "com",
"start_offset": 0,
"end_offset": 3,
"type": "word",
"position": 2
},
{
"token": "comp",
"start_offset": 0,
"end_offset": 4,
"type": "word",
"position": 3
},
{
"token": "compa",
"start_offset": 0,
"end_offset": 5,
"type": "word",
"position": 4
},
{
"token": "compan",
"start_offset": 0,
"end_offset": 6,
"type": "word",
"position": 5
},
{
"token": "company",
"start_offset": 0,
"end_offset": 7,
"type": "word",
"position": 6
},
{
"token": "o",
"start_offset": 1,
"end_offset": 2,
"type": "word",
"position": 7
},
{
"token": "om",
"start_offset": 1,
"end_offset": 3,
"type": "word",
"position": 8
},
{
"token": "omp",
"start_offset": 1,
"end_offset": 4,
"type": "word",
"position": 9
},
{
"token": "ompa",
"start_offset": 1,
"end_offset": 5,
"type": "word",
"position": 10
},
{
"token": "ompan",
"start_offset": 1,
"end_offset": 6,
"type": "word",
"position": 11
},
{
"token": "ompany",
"start_offset": 1,
"end_offset": 7,
"type": "word",
"position": 12
},
{
"token": "m",
"start_offset": 2,
"end_offset": 3,
"type": "word",
"position": 13
},
{
"token": "mp",
"start_offset": 2,
"end_offset": 4,
"type": "word",
"position": 14
},
{
"token": "mpa",
"start_offset": 2,
"end_offset": 5,
"type": "word",
"position": 15
},
{
"token": "mpan",
"start_offset": 2,
"end_offset": 6,
"type": "word",
"position": 16
},
{
"token": "mpany",
"start_offset": 2,
"end_offset": 7,
"type": "word",
"position": 17
},
{
"token": "p",
"start_offset": 3,
"end_offset": 4,
"type": "word",
"position": 18
},
{
"token": "pa",
"start_offset": 3,
"end_offset": 5,
"type": "word",
"position": 19
},
{
"token": "pan",
"start_offset": 3,
"end_offset": 6,
"type": "word",
"position": 20
},
{
"token": "pany",
"start_offset": 3,
"end_offset": 7,
"type": "word",
"position": 21
},
{
"token": "a",
"start_offset": 4,
"end_offset": 5,
"type": "word",
"position": 22
},
{
"token": "an",
"start_offset": 4,
"end_offset": 6,
"type": "word",
"position": 23
},
{
"token": "any",
"start_offset": 4,
"end_offset": 7,
"type": "word",
"position": 24
},
{
"token": "n",
"start_offset": 5,
"end_offset": 6,
"type": "word",
"position": 25
},
{
"token": "ny",
"start_offset": 5,
"end_offset": 7,
"type": "word",
"position": 26
},
{
"token": "y",
"start_offset": 6,
"end_offset": 7,
"type": "word",
"position": 27
},
{
"token": "n",
"start_offset": 8,
"end_offset": 9,
"type": "word",
"position": 28
},
{
"token": "na",
"start_offset": 8,
"end_offset": 10,
"type": "word",
"position": 29
},
{
"token": "nam",
"start_offset": 8,
"end_offset": 11,
"type": "word",
"position": 30
},
{
"token": "name",
"start_offset": 8,
"end_offset": 12,
"type": "word",
"position": 31
},
{
"token": "a",
"start_offset": 9,
"end_offset": 10,
"type": "word",
"position": 32
},
{
"token": "am",
"start_offset": 9,
"end_offset": 11,
"type": "word",
"position": 33
},
{
"token": "ame",
"start_offset": 9,
"end_offset": 12,
"type": "word",
"position": 34
},
{
"token": "m",
"start_offset": 10,
"end_offset": 11,
"type": "word",
"position": 35
},
{
"token": "me",
"start_offset": 10,
"end_offset": 12,
"type": "word",
"position": 36
},
{
"token": "e",
"start_offset": 11,
"end_offset": 12,
"type": "word",
"position": 37
}
]
}

指数数据:

{
"body": "company name"
}

搜索查询:

{
"query": {
"match": {
"body": "ame"
}
}
}

搜索结果:

"hits": [
{
"_index": "64975316",
"_type": "_doc",
"_id": "1",
"_score": 1.941854,
"_source": {
"body": "company name"
}
}
]

最新更新