我想执行一个simple_query_string在Elasticsearch中搜索,同时具有子词匹配。例如,如果a将有一个文件名:"C:UsersSven OnderbekeDocumentsArduino"如果我的搜索词是"ocumen",我会希望这个文件名被列出。
这个线程建议使用ngram来匹配单词的部分。我试图实现它如下(在Python中),但我得到零结果,而我期望一个:
test_mapping = {
"properties": {
"filename": {
"type": "text",
"analyzer": "my_index_analyzer"
},
}
}
def create_index(index_name, mapping):
created = False
# index settings
settings = {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
},
"analysis": {
"index_analyzer": {
"my_index_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"mynGram"
]
}
},
"search_analyzer": {
"my_search_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"standard",
"lowercase",
"mynGram"
]
}
},
"filter": {
"mynGram": {
"type": "nGram",
"min_gram": 2,
"max_gram": 50
}
}
},
"mappings": mapping
}
try:
if not es.indices.exists(index_name):
# Ignore 400 means to ignore "Index Already Exist" error.
es.indices.create(index=index_name, ignore=400, body=settings)
print(f'Created Index: {index_name}')
created = True
except Exception as ex:
print(str(ex))
finally:
return created
create_index("test", test_mapping)
doc = {
'filename': r"C:UsersSven OnderbekeDocumentsArduino",
}
es.index(index="test", document=doc)
needle = "ocumen"
q = {
"simple_query_string": {
"query": needle,
"default_operator": "and"
}
}
res = es.search(index="test", query=q)
print(res)
for hit in res['hits']['hits']:
print(hit)
您的解决方案不起作用的原因是因为您在定义映射时没有为命名为field
的属性提供分析器。更新映射如下,然后重新索引所有文档。
test_mapping = {
"properties": {
"filename": {
"type": "text",
"analyzer": "my_index_analyzer"
},
}
}