Elasticsearch:具有快速矢量荧光笔的多个前标签/后标签



文档包括以下关于pre_tags/post_tags设置的隐晦注释,该设置能够包含多对前/后标签:

使用快速矢量荧光笔可以有更多的标签排序为"重要性"。

有人知道这句话的确切含义吗?

这花了一段时间,但通过使用ES 1.7和_head插件尝试不同的查询,我能够弄清楚多个pre和post标签如何影响高亮显示。

使用快速矢量高亮显示,您可以按照"重要性"的顺序指定标签,这似乎意味着它们的顺序和搜索词的顺序应该匹配。使用多个pre或post标记以达到任何效果都需要在查询中使用多个字段。

给定指数

{
myindex: {
mappings: {
corpdocument: {
properties: {
createddate: {
type: "date",
format: "dateOptionalTime"
},
docbody: {
type: "string",
analyzer: "text_analyzer",
fields: {
exact: {
type: "string",
analyzer: "text_analyzer_exact"
}
}
},
modifieddate: {
type: "date",
format: "dateOptionalTime"
},
title: {
type: "string"
}
}
}
}
}
}

和搜索

POST locahost:9200/myindex/corpdocument/_search
{
"highlight": {
"pre_tags": ["|primary-highlight|",
"|secondary-highlight|",
"post_tags": ["|/primaryh-highlight|",
"|/secondary-highlight|",
"fields": {
"docbody.exact": {
"fragment_size": 150,
"number_of_fragments": 3
}
}
},
"_source": {
"exclude": ["docbody"]
},
"query": {
"bool": {
"should": [{
"match": {
"docbody.exact": {
"query": "foo"
}
}
},
{
"match": {
"docbody.exact": {
"query": "bar"
}
}
}
}
}
}

你可以得到这样的结果

{
"took": 14,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 97,
"max_score": 0.48895144,
"hits": [{
"_index": "myindex",
"_type": "corpdocument",
"_id": "XFxxZWR0ZXN0ZG9jc1xTYW5kYm94XFNhbmRib3hBbGxcRGV4dGVyX2xpdFw3NS5kb2M=",
"_score": 0.48895144,
"_source": {
"createddate": "2010-11-02T00:00:00-05:00",
"modifieddate": "2007-09-04T00:00:00-05:00",
"_id": "XFxxZWR0ZXN0ZG9jc1xTYW5kYm94XFNhbmRib3hBbGxcRGV4dGVyX2xpdFw3NS5kb2M="
},
"highlight": {
"docbody.exact": ["Lorem ipsum dolor sit amet, consectetur adipiscing elit |primary-highlight|foo|/primary-highlight|Lorem ipsum dolor sit amet, consectetur adipiscing elit",
"Lorem ipsum dolor sit amet, consectetur adipiscing elit |secondary-highlight|bar|/secondary-highlight|TOTHE|primary-highlight|foo</span>|/primary-highlight|Lorem ipsum dolor sit amet, consectetur adipiscing elit",
"Lorem ipsum dolor sit amet, consectetur adipiscing elit |secondary-highlight|bar|/secondary-highlight| Lorem ipsum dolor sit amet, consectetur adipiscing elit |primary-highlight|Chief|/primary-highlight| Lorem ipsum dolor sit amet, consectetur adipiscing elit"]
}
},
...
]
}
}

哪个标签包装哪个命中取决于标签和搜索词的顺序。切换"foo"one_answers"bar"的顺序,同时保持其他所有内容不变将导致bar被包装在主标记中,foo被包装在辅助标记中。

从一些使用3个搜索项和2个标签的初步实验来看,第三个搜索项似乎被包裹在第一个标签中,而不是第二个标签中。添加第三个标签可以解决这个问题,但需要重复第二个标签n次才能覆盖所有搜索词。

"highlight": {
"pre_tags": ["|primary-highlight|",
"|secondary-highlight|",
"|secondary-highlight|",
"post_tags": ["|/primaryh-highlight|",
"|/secondary-highlight|",
"|/secondary-highlight|",
"fields": {
"docbody.exact": {
"fragment_size": 150,
"number_of_fragments": 3
}
}
},
..."query": {
"bool": {
"should": [{
"match": {
"docbody.exact": {
"query": "foo"
}
}
},
{
"match": {
"docbody.exact": {
"query": "bar"
}
}
},
{
"match": {
"docbody.exact": {
"query": "baz"
}
}
}
}
}

最新更新