Skip to content

Commit

Permalink
[feature] google搜索api接入
Browse files Browse the repository at this point in the history
  • Loading branch information
2511689622 committed Jan 13, 2025
1 parent 5eb9423 commit fd7b3c1
Show file tree
Hide file tree
Showing 7 changed files with 366 additions and 12 deletions.
4 changes: 4 additions & 0 deletions src/main/java/com/search/docsearch/DocSearchApplication.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;

@SpringBootApplication
@EnableScheduling
@EnableAsync
@ComponentScan(basePackages = {"com.search.*"})
@EnableConfigurationProperties
public class DocSearchApplication {

public static void main(String[] args) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/* Copyright (c) 2024 openEuler Community
EasySoftware is licensed under the Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package com.search.docsearch.entity.vo;


import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.Pattern;
import jakarta.validation.constraints.Size;
import lombok.Getter;
import lombok.Setter;


@Getter
@Setter
public class GoogleSearchParams {

/**
* The keyword to search for.
*/
@NotBlank(message = "keyword can not be null")
@Pattern(regexp = "^[\\u4E00-\\u9FA5A-Za-z0-9.()$\\-_ ]+$", message = "Include only letters, digits, and special characters(_-()$.), Contain 1 to 100 characters.")
@Size(max = 100)
private String keyWord;
/**
* The starting index for the search results to return.
*/
@NotBlank(message = "start can not be null")
@Pattern(regexp = "\\d+", message = "start Must be numeric")
@Size(max = 100)
private String start;
/**
* The number of search results to return per page.
*/
@Pattern(regexp = "\\d+", message = "num Must be numeric")
@Size(max = 10)
private String num;
/**
* The language restriction for the search.
*/
private String lr;
public String buildUrl(String url, String api, String cx) {
String urlString = url + "?key=" + api + "&q=" + keyWord + "&cx=" + cx
+ "&start=" + start + "&num=" + num + "&lr=" + lr;
return urlString;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ public void filter(String filterPolicy) throws RuntimeException {
*/
@Override
public Map<String, Object> getResList(){
//writing filter logic here
return null;
return this.recallList;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,142 @@
*/
package com.search.docsearch.multirecall.recall.cstrategy;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.util.HtmlUtils;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.search.docsearch.entity.vo.GoogleSearchParams;
import com.search.docsearch.entity.vo.SearchCondition;
import com.search.docsearch.except.ServiceImplException;
import com.search.docsearch.multirecall.composite.Component;
import com.search.docsearch.multirecall.composite.cdata.GRecallData;
import com.search.docsearch.multirecall.recall.SearchStrategy;
import com.search.docsearch.properties.GoogleSearchProperties;

public class GSearchStrategy implements SearchStrategy {
/**
* logger.
*/
private static final Logger LOGGER = LoggerFactory.getLogger(GSearchStrategy.class);


/**
* insert google serach properties
*/
private GoogleSearchProperties gProperties;

public GSearchStrategy(GoogleSearchProperties gProperties) {
this.gProperties = gProperties;
}

/**
* roughly filter the recalled results
*
* @param SearchCondition paraClient mannage by spring aoc
*/
@Override
public Component search(SearchCondition condition) {
//writing google search logic here
//writing google search logic here
GRecallData emptyRes = new GRecallData(Collections.emptyMap()); // 空返回
try {
Component GRecallData = searchByCondition(condition);
return GRecallData == null ? emptyRes : GRecallData; // 遇到正常0召回情况,返回空结果
} catch (Exception e) {
LOGGER.error("google search result error :{}", e.getMessage());
return emptyRes;
}
}

/**
* doing the recall according user query
*
* @param SearchCondition the user query
* @return a Component containing the parsed response data from the Search API
* @throws ServiceImplException
* @throws IOException
*/
private Component searchByCondition(SearchCondition condition) throws ServiceImplException, IOException {
GoogleSearchParams googleSearchParams = new GoogleSearchParams();
googleSearchParams.setKeyWord(condition.getKeyword());
if ("en".equals(condition.getLang())) {
googleSearchParams.setLr("lang_en");
}
int start = (condition.getPage() - 1) * condition.getPageSize() + 1;
int num = Math.min(10, condition.getPageSize());
if(start + num > 100) {
return null;
} else {
googleSearchParams.setNum(String.valueOf(num));
googleSearchParams.setStart(String.valueOf(start));
}
int count = 0;
String keyWord = googleSearchParams.getKeyWord();
String urlString = googleSearchParams.buildUrl(gProperties.getUrl(), gProperties.getKey(), gProperties.getCx());
// 创建URL对象
URL url = new URL(urlString);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
try {
connection.setRequestMethod("GET");
int timeout = 15000; // 设置超时时间为15秒
connection.setConnectTimeout(timeout);
connection.setReadTimeout(timeout);
int responseCode = connection.getResponseCode();
// 如果响应成功(状态码200-299),则读取响应体
if (responseCode == HttpURLConnection.HTTP_OK) {
try (BufferedReader in = new BufferedReader(
new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8))) {
String inputLine;
StringBuilder response = new StringBuilder();
while ((inputLine = in.readLine()) != null) {
response.append(inputLine);
}
Map<String, Object> result = new HashMap<>();
result.put("keyword", HtmlUtils.htmlEscape(keyWord));
ObjectMapper mapper = new ObjectMapper();
List<Map<String, Object>> data = new ArrayList<>();
JsonNode rootNode = mapper.readTree(response.toString());
JsonNode termsNode = rootNode.get("items");
if (termsNode.isArray()) {
for (JsonNode termNode : termsNode) {
Map<String, Object> map = new HashMap<>();
map.put("title", termNode.get("title").asText());
map.put("path", termNode.get("link").asText());
map.put("textContent", termNode.get("snippet").asText());
if ("lang_en".equals(googleSearchParams.getLr())) {
map.put("lang", "en");
} else {
map.put("lang", "zh");
}
map.put("score", 5000 - (count + start) * 50);
count++;
data.add(map);
}
}
result.put("records", data);
GRecallData resData = new GRecallData(result);
return resData;
}
} else {
LOGGER.error("GET request not worked, response code: {}", responseCode);
}
} catch (Exception e) {
LOGGER.error(e.getMessage());
} finally {
connection.disconnect();
}
return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/* Copyright (c) 2024 openEuler Community
EasySoftware is licensed under the Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package com.search.docsearch.properties;

import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;

import lombok.Getter;
import lombok.Setter;

@Component
@Getter
@Setter
@ConfigurationProperties(prefix = "google-search")
public class GoogleSearchProperties {
/**
* The Google Search API key.
*/
private String key;
/**
* The ID of the Google Custom Search Engine (cx).
*/
private String cx;
/**
* The URL template for the Google Search API.
*/
private String url;
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
/* Copyright (c) 2024 openEuler Community
EasySoftware is licensed under the Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package com.search.docsearch.service.impl;

import com.fasterxml.jackson.core.JsonProcessingException;
Expand All @@ -13,6 +23,8 @@
import com.search.docsearch.multirecall.composite.DataComposite;
import com.search.docsearch.multirecall.recall.MultiSearchContext;
import com.search.docsearch.multirecall.recall.cstrategy.EsSearchStrategy;
import com.search.docsearch.multirecall.recall.cstrategy.GSearchStrategy;
import com.search.docsearch.properties.GoogleSearchProperties;
import com.search.docsearch.service.SearchService;
import com.search.docsearch.utils.General;
import com.search.docsearch.utils.ParameterUtil;
Expand Down Expand Up @@ -50,7 +62,6 @@
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
Expand Down Expand Up @@ -85,20 +96,24 @@ public class SearchServiceImpl implements SearchService {
@Qualifier("setConfig")
private MySystem mySystem;

@Value("${api.allApi}")
//@Value("${api.allApi}")
private String allApi;

@Value("${api.starsApi}")
//@Value("${api.starsApi}")
private String starsApi;

@Value("${api.sigNameApi}")
//@Value("${api.sigNameApi}")
private String sigNameApi;

@Value("${api.repoInfoApi}")
//@Value("${api.repoInfoApi}")
private String repoInfoApi;

@Value("${api.npsApi}")
//@Value("${api.npsApi}")
private String npsApi;

@Autowired
private GoogleSearchProperties gProperties;

@Autowired
private EsfunctionScoreConfig esfunctionScoreConfig;

Expand Down Expand Up @@ -197,14 +212,16 @@ public Map<String, Object> getSuggestion(String keyword, String lang) throws Ser
public Map<String, Object> searchByCondition(SearchCondition condition) throws ServiceImplException {
//create es search strategy
EsSearchStrategy esRecall = new EsSearchStrategy(restHighLevelClient,mySystem.index,trie,esfunctionScoreConfig);
GSearchStrategy gRecall = new GSearchStrategy(gProperties);
MultiSearchContext multirecall = new MultiSearchContext();
//set es search into search contex
multirecall.setSearchStrategy(esRecall);
multirecall.setSearchStrategy(gRecall);
//do recall and fetch the result
DataComposite multiRecallRes = multirecall.executeMultiSearch(condition);
// multiRecallRes.filter("policy") filtering data here
//
return multiRecallRes.getChild(0).getResList();
return multiRecallRes.mergeResult();
//return multiRecallRes.getChild(1).getResList();
}

public SearchRequest BuildSearchRequest(SearchCondition condition, String index) {
Expand Down
Loading

0 comments on commit fd7b3c1

Please sign in to comment.