我在umbraco中设置Lucene搜索引擎时遇到了一个问题。我正试图对存储在Umbraco创建的默认索引中的数据进行搜索。搜索方法如下:
private DictionaryResult GetRowContent(
Lucene.Net.Highlight.Highlighter highlighter,
Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer
,Lucene.Net.Documents.Document doc1, string criteria)
{
JavaScriptSerializer jsScriptSerializer = new JavaScriptSerializer();
DictionaryResult controls = new DictionaryResult();
Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new StringReader(doc1.Get("bodyContent")));
dynamic rowContentHtmlDocument = JObject.Parse(((JValue)doc1.Get("bodyContent")).ToString(CultureInfo.CurrentCulture));
foreach (dynamic section in rowContentHtmlDocument.sections)
{
foreach (var row in section.rows)
{
foreach (var area in row.areas)
{
foreach (var control in area.controls)
{
if (control != null && control.editor != null) // && control.editor.view != null)
{
JObject rowContentHtml = null;
try
{
rowContentHtml = JObject.Parse(((JContainer)control)["value"].ToString());
}
catch (Exception e)
{
}
if (rowContentHtml != null)
{
try
{
var macroParamsDictionary = JObject.Parse(((JContainer)rowContentHtml)["macroParamsDictionary"].ToString());
var documentText = macroParamsDictionary.GetValue("dokument");
if (documentText != null)
{
var document = documentText.ToString().Replace(""", """);
dynamic documents = jsScriptSerializer.Deserialize<dynamic>(document);
foreach (Dictionary<string, object> doc in documents)
{
if (doc.ContainsKey("FileName") && doc.ContainsKey("DocumentId"))
{
if (doc["FileName"].ToString().Length > 0 &&
doc["FileName"].ToString().ToLower().Contains(criteria.ToLower()))
{
controls.Add(new RowResult()
{
Type = 0,
Object = new Document()
{
DocumentName = doc["FileName"].ToString(),//highlighter.GetBestFragments(stream, doc["FileName"].ToString(), 1, "..."),
DocId = Guid.Parse(doc["DocumentId"].ToString())
} // StringBuilder(@"<a href=" + Url.Action("DownloadDocument", "Document", new { DocumentId = doc["DocumentId"] }) + "> " + @doc["FileName"] + "</a>").ToString()
}
);
}
}
}
}
}
catch (Exception e)
{
}
}
else
{
var text = HtmlRemoval.StripTagsRegex(((JContainer)control)["value"].ToString()).Replace("ë", "e").Replace("ç", "c");
var textResultFiltered = highlighter.GetBestFragments(stream,doc1.Get("bodyContent"), 5, "...");
controls.Add(new RowResult()
{
Type = 1,
Object = textResultFiltered
});
}
}
}
}
}
}
return controls;
}
在这里,我试图从简单的html内容过滤宏文档并以不同的方式呈现。但是在这部分的最后
var text = HtmlRemoval.StripTagsRegex(((JContainer)control)["value"].ToString()).Replace("ë", "e").Replace("ç", "c");
var textResultFiltered = highlighter.GetBestFragments(stream,doc1.Get("bodyContent"), 5, "...");
controls.Add(new RowResult()
{
Type = 1,
Object = textResultFiltered
});
表示在搜索中包含宏。结果,我得到了文档属性,但突出显示的html内容具有宏内容,如下所示:
6th Edition V413HAV.pdf","FileContent"... Framework 6th Edition V413HAV.pdf","... with Java 8 - 1st Edition (2015) - Copy.pdf"... 4.5 Framework 6th Edition V413HAV.pdf","... And The NET 4.5 Framework 6th Edition V413HAV.pdf" which is coming from Json data of the macro. Any idea how to exclude the macros from searching or to customize the hmtl content not to search on specific macro ? Thanks in advance.
我引用这个链接来创建荧光笔等…链接到Lucene示例
是否知道如何防止搜索宏或将它们从突出显示的内容中排除?
这看起来太复杂了,如果你只是做一个常规的搜索。你知道Umbraco有自己的Lucene"版本"吗?它内置在Umbraco中,不需要太多设置就可以运行标准搜索:https://our.umbraco.org/documentation/reference/searching/examine/
我从来没有看到宏或JSON标记在我的搜索结果使用检查,所以也许尝试一下?
您可以轻松使用检查。您只需要选择您想要的搜索提供程序(config/examinessettings .config),它允许您选择是否要避免未发布和受保护的内容。然后,您只需要做一些类似于下一段代码的事情,例如,您可以选择要搜索的字段或不想避免的数据类型。
string term = "test"
var criteria = ExamineManager.Instance.SearchProviderCollection["ExternalSearcher"].CreateSearchCriteria();
var crawl = criteria.GroupedOr(new string[] { "nodeName", "pageTitle", "metaDescription", "metaKeywords" }, term)
.Not().Field("nodeTypeAlias", "GlobalSettings")
.Not().Field("nodeTypeAlias", "Error")
.Not().Field("nodeTypeAlias", "File")
.Not().Field("nodeTypeAlias", "Folder")
.Not().Field("nodeTypeAlias", "Image")
.Not().Field("excludeFromSearch", "1")
.Compile();
ISearchResults SearchResults = ExamineManager.Instance
.SearchProviderCollection["ExternalSearcher"]
.Search(crawl);
IList<JsonSearchResult> results = new List<JsonSearchResult>();
我尝试使用下面的检查:
SearchQuery = string.Format("+{0}:{1}~", SearchField, criteria);
var Criteria = ExamineManager.Instance
.SearchProviderCollection["ExternalSearcher"]
.CreateSearchCriteria();
var crawl = Criteria.GroupedOr(new string[] { "bodyContent", "nodeName" }, criteria)
.Not()
.Field("umbracoNaviHide", "1")
.Not()
.Field("nodeTypeAlias", "Image")
.Compile();
IEnumerable<Examine.SearchResult> SearchResults1 = ExamineManager.Instance
.SearchProviderCollection["ExternalSearcher"]
.Search(crawl);
我使用了下面两种方法来突出显示文本,但是这些方法不是很有效!!我有一些链接没有突出显示任何文本。
public string GetHighlight(string value, string highlightField, BaseLuceneSearcher searcher, string luceneRawQuery)
{
var query = GetQueryParser(highlightField).Parse(luceneRawQuery);
var scorer = new QueryScorer(searcher.GetSearcher().Rewrite(query));
var highlighter = new Highlighter(HighlightFormatter, scorer);
var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value));
return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator);
}
protected QueryParser GetQueryParser(string highlightField)
{
if (!QueryParsers.ToString().Contains(highlightField))
{
var temp = new QueryParser(_luceneVersion, highlightField, HighlightAnalyzer);
return temp;
}
return null;
}
如果你有任何在检查中高亮显示的样本,我将非常感谢。