-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
多路召回融合排序 #23
多路召回融合排序 #23
Changes from all commits
4439ae0
300aa85
c999b13
3ab68b4
0db788b
8645312
9ef5ee4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,17 +34,20 @@ | |
import org.elasticsearch.search.builder.SearchSourceBuilder; | ||
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; | ||
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; | ||
import org.elasticsearch.search.sort.SortOrder; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import org.springframework.util.StringUtils; | ||
import org.springframework.web.util.HtmlUtils; | ||
import com.search.docsearch.utils.Trie; | ||
import com.search.docsearch.config.EsfunctionScoreConfig; | ||
import com.search.docsearch.constant.Constants; | ||
import com.search.docsearch.entity.vo.SearchCondition; | ||
import com.search.docsearch.except.ServiceImplException; | ||
import com.search.docsearch.multirecall.composite.Component; | ||
import com.search.docsearch.multirecall.composite.cdata.EsRecallData; | ||
import com.search.docsearch.multirecall.recall.SearchStrategy; | ||
import com.search.docsearch.properties.FusionSortProperties; | ||
import com.search.docsearch.utils.General; | ||
import org.elasticsearch.client.RestHighLevelClient; | ||
|
||
|
@@ -79,6 +82,11 @@ public class EsSearchStrategy implements SearchStrategy { | |
*/ | ||
private EsfunctionScoreConfig esfunctionScoreConfig; | ||
|
||
/** | ||
* insert fusion sort properties | ||
*/ | ||
private FusionSortProperties fuProperties; | ||
|
||
/** | ||
* roughly filter the recalled results | ||
* | ||
|
@@ -87,11 +95,12 @@ public class EsSearchStrategy implements SearchStrategy { | |
* @param paratire the algorithim toolkit | ||
* @param config the boost socre config which used to ranking the result list | ||
*/ | ||
public EsSearchStrategy(RestHighLevelClient pararestHighLevelClient, String paraindex, Trie paratire,EsfunctionScoreConfig config){ | ||
public EsSearchStrategy(RestHighLevelClient pararestHighLevelClient, String paraindex, Trie paratire,EsfunctionScoreConfig config, FusionSortProperties fuProperties){ | ||
this.restHighLevelClient = pararestHighLevelClient; | ||
this.index = paraindex; | ||
this.trie = paratire; | ||
this.esfunctionScoreConfig = config; | ||
this.fuProperties = fuProperties; | ||
} | ||
|
||
/** | ||
|
@@ -167,7 +176,8 @@ private Component searchByCondition(SearchCondition condition) throws ServiceImp | |
if (highlightFields.containsKey("title")) { | ||
map.put("title", highlightFields.get("title").getFragments()[0].toString()); | ||
} | ||
|
||
reCaculateScore(map); | ||
map.put("recallType", "E"); | ||
data.add(map); | ||
} | ||
if (data.isEmpty()) { | ||
|
@@ -185,6 +195,28 @@ private Component searchByCondition(SearchCondition condition) throws ServiceImp | |
return resData; | ||
} | ||
|
||
/** | ||
* caculate the es recall data by using the date | ||
* | ||
* @param entity the map entity of search result | ||
*/ | ||
public void reCaculateScore(Map<String, Object> entity) { | ||
double score = (double) entity.get("score"); | ||
try { | ||
if (entity.containsKey("date")) { | ||
String[] parts = entity.get("date").toString().split("-"); | ||
int year = Integer.parseInt(parts[0]); | ||
int month = Integer.parseInt(parts[1]); | ||
int day = Integer.parseInt(parts[2]); | ||
List<Double> dateWeight = fuProperties.getDateWeight(); | ||
score += (year * dateWeight.get(0) + month * dateWeight.get(1) + day * dateWeight.get(2)); | ||
Comment on lines
+208
to
+212
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 出错后设置默认分数 |
||
entity.put("score", score); | ||
} | ||
} catch (Exception e) { | ||
LOGGER.error("es recall score caculate error: {}", e.getMessage()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 服务器中不打印具体错误信息 |
||
} | ||
} | ||
|
||
/** | ||
* build the es qeury from search condition | ||
* | ||
|
@@ -290,6 +322,9 @@ private SearchRequest BuildSearchRequest(SearchCondition condition, String index | |
sourceBuilder.highlighter(highlightBuilder); | ||
sourceBuilder.from(startIndex).size(condition.getPageSize()); | ||
sourceBuilder.timeout(TimeValue.timeValueMinutes(1L)); | ||
if ("desc".equals(condition.getSort())) { | ||
sourceBuilder.sort("date", SortOrder.DESC); | ||
} | ||
request.source(sourceBuilder); | ||
return request; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,6 +31,7 @@ | |
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import com.huaban.analysis.jieba.JiebaSegmenter; | ||
import com.search.docsearch.constant.Constants; | ||
import com.search.docsearch.entity.vo.GoogleSearchParams; | ||
import com.search.docsearch.entity.vo.SearchCondition; | ||
import com.search.docsearch.except.ServiceImplException; | ||
|
@@ -94,6 +95,9 @@ public Component search(SearchCondition condition) { | |
* @throws IOException | ||
*/ | ||
private Component searchByCondition(SearchCondition condition) throws ServiceImplException, IOException { | ||
if (!"".equals(condition.getType())) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 添加注释 |
||
return null; | ||
} | ||
// google search 处理无效字符 | ||
condition.setKeyword(condition.getKeyword().replace(" ", "")); | ||
condition.setKeyword(condition.getKeyword().replace(".", "")); | ||
|
@@ -103,7 +107,7 @@ private Component searchByCondition(SearchCondition condition) throws ServiceImp | |
googleSearchParams.setLr("lang_en"); | ||
} | ||
int start = (condition.getPage() - 1) * condition.getPageSize() + 1; | ||
int num = Math.min(10, condition.getPageSize()); | ||
int num = Constants.GOOGLE_NUM; | ||
if(start + num > 100) { | ||
return null; | ||
} else { | ||
|
@@ -154,6 +158,7 @@ private Component searchByCondition(SearchCondition condition) throws ServiceImp | |
map.put("lang", "zh"); | ||
} | ||
map.put("score", (double) (5000 - (count + start) * 50)); | ||
map.put("recallType","G"); | ||
count++; | ||
data.add(map); | ||
} | ||
|
@@ -185,12 +190,12 @@ public String highLightContent(String searchkey, String content){ | |
List<String> segments = this.segmenter.sentenceProcess(searchkey); | ||
String lightContent = content; | ||
for (String keyword : segments){ | ||
Pattern pattern = Pattern.compile(Pattern.quote(keyword)); | ||
Pattern pattern = Pattern.compile(Pattern.quote(keyword), Pattern.CASE_INSENSITIVE); | ||
Matcher matcher = pattern.matcher(lightContent); | ||
StringBuffer result = new StringBuffer(); | ||
while (matcher.find()) { | ||
matcher.appendReplacement(result, "<span>" + matcher.group() + "</span>"); | ||
} | ||
} | ||
matcher.appendTail(result); | ||
lightContent = result.toString(); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
/* Copyright (c) 2024 openEuler Community | ||
EasySoftware is licensed under the Mulan PSL v2. | ||
You can use this software according to the terms and conditions of the Mulan PSL v2. | ||
You may obtain a copy of Mulan PSL v2 at: | ||
http://license.coscl.org.cn/MulanPSL2 | ||
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, | ||
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, | ||
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. | ||
See the Mulan PSL v2 for more details. | ||
*/ | ||
package com.search.docsearch.properties; | ||
|
||
import java.util.List; | ||
|
||
import org.springframework.boot.context.properties.ConfigurationProperties; | ||
import org.springframework.stereotype.Component; | ||
|
||
import lombok.Getter; | ||
import lombok.Setter; | ||
|
||
@Component | ||
@Getter | ||
@Setter | ||
@ConfigurationProperties(prefix = "fusion-sort") | ||
public class FusionSortProperties { | ||
|
||
/** | ||
* Date weight in fusion sort | ||
*/ | ||
private List<Double> dateWeight; | ||
|
||
/** | ||
* The weight of es recall data; | ||
*/ | ||
private double esRecallWeight; | ||
|
||
/** | ||
* The weight of google recall data; | ||
*/ | ||
private double gRecallWeight; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ | |
import com.search.docsearch.multirecall.recall.MultiSearchContext; | ||
import com.search.docsearch.multirecall.recall.cstrategy.EsSearchStrategy; | ||
import com.search.docsearch.multirecall.recall.cstrategy.GSearchStrategy; | ||
import com.search.docsearch.properties.FusionSortProperties; | ||
import com.search.docsearch.properties.GoogleSearchProperties; | ||
import com.search.docsearch.service.SearchService; | ||
import com.search.docsearch.utils.General; | ||
|
@@ -124,6 +125,12 @@ public class SearchServiceImpl implements SearchService { | |
*/ | ||
@Autowired | ||
private HttpConnectFactory httpConnectFactory; | ||
|
||
/** | ||
* insert fusion sort properties | ||
*/ | ||
@Autowired | ||
private FusionSortProperties fuProperties; | ||
|
||
@Autowired | ||
private EsfunctionScoreConfig esfunctionScoreConfig; | ||
|
@@ -222,14 +229,18 @@ public Map<String, Object> getSuggestion(String keyword, String lang) throws Ser | |
@Override | ||
public Map<String, Object> searchByCondition(SearchCondition condition) throws ServiceImplException { | ||
//create es search strategy | ||
EsSearchStrategy esRecall = new EsSearchStrategy(restHighLevelClient,mySystem.index,trie,esfunctionScoreConfig); | ||
EsSearchStrategy esRecall = new EsSearchStrategy(restHighLevelClient,mySystem.index,trie,esfunctionScoreConfig,fuProperties); | ||
GSearchStrategy gRecall = new GSearchStrategy(gProperties, httpConnectFactory); | ||
MultiSearchContext multirecall = new MultiSearchContext(); | ||
//set es search into search contex | ||
multirecall.setSearchStrategy(esRecall); | ||
multirecall.setSearchStrategy(gRecall); | ||
//do recall and fetch the result | ||
DataComposite multiRecallRes = multirecall.executeMultiSearch(condition); | ||
if ("desc".equals(condition.getSort())) { | ||
return multiRecallRes.getChild(0).getResList(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 注意下标 |
||
} | ||
multiRecallRes.setFuProperties(fuProperties); | ||
// multiRecallRes.filter("policy") filtering data here | ||
return multiRecallRes.mergeResult(); | ||
//return multiRecallRes.getChild(1).getResList(); | ||
|
@@ -334,6 +345,9 @@ public SearchRequest BuildSearchRequest(SearchCondition condition, String index) | |
sourceBuilder.highlighter(highlightBuilder); | ||
sourceBuilder.from(startIndex).size(condition.getPageSize()); | ||
sourceBuilder.timeout(TimeValue.timeValueMinutes(1L)); | ||
if ("desc".equals(condition.getSort())) { | ||
sourceBuilder.sort("date", SortOrder.DESC); | ||
} | ||
request.source(sourceBuilder); | ||
return request; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
try catch异常,避免split(-) 出现错误,导致下标越界