diff --git a/pom.xml b/pom.xml index 5cb9dd6..9438ea4 100644 --- a/pom.xml +++ b/pom.xml @@ -174,6 +174,11 @@ true + + com.huaban + jieba-analysis + 1.0.2 + org.commonmark diff --git a/src/main/java/com/search/docsearch/multirecall/composite/DataComposite.java b/src/main/java/com/search/docsearch/multirecall/composite/DataComposite.java index b26fbac..352b12c 100644 --- a/src/main/java/com/search/docsearch/multirecall/composite/DataComposite.java +++ b/src/main/java/com/search/docsearch/multirecall/composite/DataComposite.java @@ -97,6 +97,19 @@ public int getSize(){ * */ public Map mergeResult(){ + Map baseRes = this.getChild(0).getResList(); + int pageSize = (int) baseRes.get("pageSize"); + List> mergedList = weightedMerge(pageSize); + baseRes.put("records", mergedList); + return baseRes; + } + + /** + * merge the other recall results into one way, based one the index 0 of children + * + * @return the merged result lists + */ + private Map postionMerge(){ if (children.size() <= 1) return this.getChild(0).getResList(); Component a = children.get(0); @@ -114,10 +127,9 @@ public Map mergeResult(){ aresList.add(pos, bresList.get(pos)); } } - ares.put("records", aresList); return ares; - } + } /** * merge the other recall results into one way, based one the index 0 of children diff --git a/src/main/java/com/search/docsearch/multirecall/recall/cstrategy/EsSearchStrategy.java b/src/main/java/com/search/docsearch/multirecall/recall/cstrategy/EsSearchStrategy.java index c2d523e..ad886e7 100644 --- a/src/main/java/com/search/docsearch/multirecall/recall/cstrategy/EsSearchStrategy.java +++ b/src/main/java/com/search/docsearch/multirecall/recall/cstrategy/EsSearchStrategy.java @@ -160,6 +160,9 @@ private Component searchByCondition(SearchCondition condition) throws ServiceImp else { map.put("score", score*1.0); } + } else { + Double score = (double) hit.getScore(); + map.put("score", score); } if (highlightFields.containsKey("title")) { map.put("title", highlightFields.get("title").getFragments()[0].toString()); diff --git a/src/main/java/com/search/docsearch/multirecall/recall/cstrategy/GSearchStrategy.java b/src/main/java/com/search/docsearch/multirecall/recall/cstrategy/GSearchStrategy.java index 05248a1..c66d2d5 100644 --- a/src/main/java/com/search/docsearch/multirecall/recall/cstrategy/GSearchStrategy.java +++ b/src/main/java/com/search/docsearch/multirecall/recall/cstrategy/GSearchStrategy.java @@ -14,20 +14,23 @@ import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; -import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.web.util.HtmlUtils; - +import java.util.regex.Matcher; +import java.util.regex.Pattern; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.huaban.analysis.jieba.JiebaSegmenter; import com.search.docsearch.entity.vo.GoogleSearchParams; import com.search.docsearch.entity.vo.SearchCondition; import com.search.docsearch.except.ServiceImplException; @@ -53,9 +56,15 @@ public class GSearchStrategy implements SearchStrategy { */ private HttpConnectFactory httpConnectFactory; + /** + * jieba segmenter + */ + private JiebaSegmenter segmenter; + public GSearchStrategy(GoogleSearchProperties gProperties, HttpConnectFactory httpConnectFactory) { this.gProperties = gProperties; this.httpConnectFactory = httpConnectFactory; + this.segmenter = new JiebaSegmenter(); } /** @@ -130,9 +139,15 @@ private Component searchByCondition(SearchCondition condition) throws ServiceImp if (termsNode.isArray()) { for (JsonNode termNode : termsNode) { Map map = new HashMap<>(); - map.put("title", termNode.get("title").asText()); - map.put("path", termNode.get("link").asText()); - map.put("textContent", termNode.get("snippet").asText()); + String highlightTittle = highLightContent(condition.getKeyword(),termNode.get("title").asText()); + String highlightText = highLightContent(condition.getKeyword(),termNode.get("snippet").asText()); + map.put("title", highlightTittle); + String path = termNode.get("link").asText(); + path = path.replace("http:", "https:"); + map.put("path", path); + String type = parseTypeByPath(path); + map.put("type", type); + map.put("textContent", highlightText); if ("lang_en".equals(googleSearchParams.getLr())) { map.put("lang", "en"); } else { @@ -158,4 +173,54 @@ private Component searchByCondition(SearchCondition condition) throws ServiceImp } return null; } + + /** + * doing the recall according user query + * + * @param searchkey the user query + * @param content the text contnt + * @return text content with highlight + */ + public String highLightContent(String searchkey, String content){ + List segments = this.segmenter.sentenceProcess(searchkey); + String lightContent = content; + for (String keyword : segments){ + Pattern pattern = Pattern.compile(Pattern.quote(keyword)); + Matcher matcher = pattern.matcher(lightContent); + StringBuffer result = new StringBuffer(); + while (matcher.find()) { + matcher.appendReplacement(result, "" + matcher.group() + ""); + } + matcher.appendTail(result); + lightContent = result.toString(); + } + return lightContent; + } + + /** + * parse google path to a type params + * + * @param path the google search link + * @return type content string + */ + public String parseTypeByPath(String path){ + String type = "other"; + HashSet hashSet = new HashSet<>(gProperties.getTypeList()); + String flag = "zh/"; + if (path.indexOf(flag) == -1) { + flag = "en/"; + } + String[] spliteArray = path.split(flag); + if (spliteArray.length < 2) { + return type; + } else { + int index = spliteArray[1].indexOf("/"); + if (index != -1 && hashSet.contains(spliteArray[1].substring(0, index))) { + type = spliteArray[1].substring(0, index); + } else { + return type; + } + } + return type; + } } \ No newline at end of file diff --git a/src/main/java/com/search/docsearch/properties/GoogleSearchProperties.java b/src/main/java/com/search/docsearch/properties/GoogleSearchProperties.java index b021b53..f584156 100644 --- a/src/main/java/com/search/docsearch/properties/GoogleSearchProperties.java +++ b/src/main/java/com/search/docsearch/properties/GoogleSearchProperties.java @@ -10,6 +10,8 @@ */ package com.search.docsearch.properties; +import java.util.List; + import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.stereotype.Component; @@ -33,4 +35,8 @@ public class GoogleSearchProperties { * The URL template for the Google Search API. */ private String url; + /** + * The list of parsing google search link to type. + */ + private List typeList; }