欢迎来到尧图网

客户服务 关于我们

您的位置:首页 > 科技 > IT业 > es简单实现文章检索功能

es简单实现文章检索功能

2024/10/26 1:19:38 来源:https://blog.csdn.net/m0_73363097/article/details/142825893  浏览:    关键词:es简单实现文章检索功能

使用的api是:Elasticsearch Java API client 8.0

官网:Package structure and namespace clients | Elasticsearch Java API Client [8.15] | Elastic

 

1.建立索引库

实现搜索功能字段:

  1. title:文章标题
  2. content:文章内容
  3. category:文章分类
PUT /search_test
{"mappings": {"properties": {"title":{"type": "text","analyzer": "ik_max_word","index": true},"content":{"type": "text","analyzer": "ik_max_word","index": true},"category":{"type": "keyword","index": true}}}
}

2.代码实现

返回实体:

@Data
public class ContentSearchVO {@Schema(description = "标题")private String title;@Schema(description = "高亮内容部分,用于首页展示")private String highLightContent;@Schema(description = "内容")private String content;}

请求实体:

@Data
public class SearchDTO {@Schema(description="搜索词")@NotBlank(message = "搜索词不能是空")private String text;@Schema(description="分类")private String category;private Integer pageNo;private Integer pageSize;}

2.1基于模板形式

    @Overridepublic List<ContentSearchVO> search(SearchDTO dto)  {String tags = dto.getCategory();if (tags != null){tags = String.format(SearchConstants.CATEGORY, tags) ;}else {tags = "";}//es内容搜索String query = String.format(SearchConstants.SEARCH_MAPPING, dto.getText(), dto.getText(), tags, dto.getPageNo(), dto.getPageSize());StringReader stringReader = new StringReader(query);SearchRequest searchRequest = SearchRequest.of(builder -> builder.index(SearchConstants.INDEX_NAME).withJson(stringReader));SearchResponse<Object> response = EsSearchUtil.search(searchRequest);List<ContentSearchVO> searchResult = EsSearchUtil.buildSearchResult(response);return searchResult;}

模板常量

public class SearchConstants {//索引库public static final String INDEX_NAME = "search_test";//类别模板public static final String CATEGORY =   "        ,\"term\": {\n" +"          \"category\": \"%s\"\n" +"        },\n";//搜索模板public static final String SEARCH_MAPPING = "{" +"  \"query\": {\n" +"    \"bool\": {\n" +"      \"must\":[ " +"        {\n" +"          \"match\": {\n" +"            \"title\": \"%s\"\n" +"            }\n" +"        },\n" +"        {\n" +"          \"match\": {\n" +"             \"content\": \"%s\"\n" +"             }\n" +"        }\n" +"%s"+"     ]}\n" +"    },\n" +"    \"from\": %s,\n" +"    \"size\": %s,\n" +"  \"highlight\": {\n" +"    \"pre_tags\": \"<em>\",\n" +"    \"post_tags\": \"</em>\",\n" +"    \"fields\": {\n"+"      \"title\": {},\n" +"      \"content\": {}\n" +"      }\n" +"  }\n" +"}";}

EsSearchUtil

@Component
@Slf4j
public class EsSearchUtil {private static ElasticsearchClient client;@Autowiredpublic EsSearchUtil(ElasticsearchClient client) {EsSearchUtil.client = client;}/*** 数据检索*/public static  SearchResponse<Object> search(SearchRequest searchRequest) {SearchResponse<Object> response;try {response = client.search(searchRequest, Object.class);log.debug("搜索返回结果:" + response.toString());} catch (IOException e) {log.error(e.toString());throw new RuntimeException("搜索服务出了点小差,请稍后再试", e);}return response;}/*** 构建搜索结果*/public static <T> List<T> buildSearchResult(SearchResponse<Object> resp, Class<T> clazz) {if (resp.hits() == null || resp.hits().hits() == null) {return Collections.emptyList();}List<Hit<Object>> hits = resp.hits().hits();List<T> list = new ArrayList<>();for(Hit<Object> hit:hits) {//格式转换T t = ConvertUtil.objToObj(hit.source(), clazz);list.add(t);}return list;}public  static  List<ContentSearchVO> buildSearchResult(SearchResponse<Object> resp){List<Hit<Object>> hits = resp.hits().hits();List<ContentSearchVO> list = new ArrayList<>();for(Hit<Object> hit:hits){ContentSearchVO contentSearchVO = ConvertUtil.objToObj(hit.source(), ContentSearchVO.class);Map<String, List<String>> highlight = hit.highlight();if (!CollectionUtils.isEmpty(highlight)) {//标题List<String> highLightTitle = highlight.get("title");//内容List<String> highLightContent = highlight.get("content");//无高亮,就用内容替代if (!CollectionUtils.isEmpty(highLightTitle)){StringBuilder highLightTitleStringBuilder = new StringBuilder();for (String titleSegment : highLightTitle) {highLightTitleStringBuilder.append(titleSegment);}contentSearchVO .setTitle(highLightTitleStringBuilder.toString());}if (!CollectionUtils.isEmpty(highLightContent)){StringBuilder highLightContentStringBuilder = new StringBuilder();for (String contentSegment : highLightContent) {highLightContentStringBuilder.append(contentSegment);}contentSearchVO .setHighLightContent(highLightContentStringBuilder.toString());}else {contentSearchVO .setHighLightContent(contentSearchVO.getContent());}}list.add(contentSearchVO);}return list;}/***  分词解析* @param text 搜索词* @param index 索引库* @return 分词结果*/public static List<String> analyze (String text,String index) {AnalyzeRequest analyzeRequest = new AnalyzeRequest.Builder().index(index).text(text).analyzer("ik_max_word")  // ik_smart.build();AnalyzeResponse analyzeResponse;try {analyzeResponse = client.indices().analyze(analyzeRequest);} catch (IOException e) {throw new RuntimeException(e);}List<AnalyzeToken> tokens = analyzeResponse.tokens();List<String> result = new ArrayList<>();// 分词结果for (AnalyzeToken token : tokens) {result.add(token.token());}return result;}
}

2.2 基于搜索api实现

@Service
@Slf4j
@RequiredArgsConstructor
public class ContentSearchServiceImpl implements ContentSearchService {private final SearchHotWordsService  searchHotWordsService;@Value("${search.highlightWords:500}")private Integer highlightWords;@Value("${search.words:500}")private Integer words;@Value("${search.searchWords:200}")private Integer searchWords;@Overridepublic SearchVO search(SearchDTO dto)  {SearchVO result = new SearchVO();//构建boolBoolQuery boolQuery =  this.buildBoolQuery(dto);//构建高亮字段Highlight highlight = this.buildHighlight();//构建查询SearchRequest searchRequest = SearchRequest.of(s -> s.index(SearchConstants.INDEX_NAME).query(q -> q.bool(boolQuery)).from((dto.getPageNo() - 1) * dto.getPageSize()).size(dto.getPageSize()).highlight(highlight));SearchResponse<SearchDocument> response = EsSearchUtil.search(searchRequest,SearchDocument.class);List<ContentSearchVO> searchResult = this.buildSearchResult(response);if (!CollectionUtils.isEmpty(searchResult)){searchResult = this.sortSearchResult(searchResult);}result.setList(searchResult);return result;}/***  构建bool* @param dto 查询参数* @return BoolQuery*/private BoolQuery buildBoolQuery(SearchDTO dto) {String tags = dto.getTags();//要查询的字段List<String> queryFields = List.of(SearchConstants.FILE_NAME, SearchConstants.CONVERTED_TEXT);//构建bool查询BoolQuery.Builder boolBuilder = new BoolQuery.Builder().should(s -> s.multiMatch(mu -> mu.fields(queryFields).query(dto.getText())));if (tags != null){List<FieldValue> v = new ArrayList<>();String[] split = tags.split(",");for (String s : split) {v.add(FieldValue.of(s));}TermsQuery termsQuery = TermsQuery.of(t -> t.field(SearchConstants.TAGS).terms(tm -> tm.value(v)));boolBuilder.must(m -> m.terms(termsQuery));}return boolBuilder.build();}/*** 构建高亮字段* @return Highlight*/private Highlight buildHighlight() {Map<String, HighlightField> map = new HashMap<>();map.put(SearchConstants.FILE_NAME, HighlightField.of(hf -> hf.preTags(SearchConstants.PRE_TAGS).postTags(SearchConstants.POST_TAGS)));map.put(SearchConstants.CONVERTED_TEXT, HighlightField.of(hf -> hf.preTags(SearchConstants.PRE_TAGS).postTags(SearchConstants.POST_TAGS).numberOfFragments(1).fragmentSize(highlightWords)  //numberOfFragments(1),表示将字段分割为最多1个片段,并设置 fragmentSize(300),表示每个片段的大小为300个字符。));return Highlight.of(h -> h.type(HighlighterType.Unified).order(HighlighterOrder.Score).fields(map));}private List<ContentSearchVO> buildSearchResult(SearchResponse<SearchDocument> resp) {List<Hit<SearchDocument>> hits = resp.hits().hits();List<ContentSearchVO> list = new ArrayList<>();for(Hit<SearchDocument> hit:hits){SearchDocument searchDocument = JsonUtil.objToObj(hit.source(), SearchDocument.class);ContentSearchVO contentSearchVO =  this.searchAdapter(searchDocument);String content = contentSearchVO.getContent();Map<String, List<String>> highlight = hit.highlight();if (!CollectionUtils.isEmpty(highlight)) {//高亮标题List<String> highLightTitle = highlight.get(SearchConstants.FILE_NAME);//高亮内容List<String> highLightContent = highlight.get(SearchConstants.CONVERTED_TEXT);//标题if (!CollectionUtils.isEmpty(highLightTitle)){StringBuilder highLightTitleStringBuilder = new StringBuilder();for (String titleSegment : highLightTitle) {highLightTitleStringBuilder.append(titleSegment);}contentSearchVO .setTitle(highLightTitleStringBuilder.toString());}//内容if (!CollectionUtils.isEmpty(highLightContent)){StringBuilder highLightContentStringBuilder = new StringBuilder();for (String titleSegment : highLightContent) {highLightContentStringBuilder.append(titleSegment);}contentSearchVO .setContent(highlightContent);}else {//无高亮字段---从头开始取一定数量的字String s = this.replaceSymbol(content).replace(SearchConstants.LINE, "");if (s.length() >= words){s = s.substring(0,words);}contentSearchVO .setContent(s);}}else {//无高亮字段---从头开始取一定数量的字String s = this.replaceSymbol(content).replace(SearchConstants.LINE, "");if (s.length() >= words){s = s.substring(0,words);}contentSearchVO .setContent(s);}list.add(contentSearchVO );}return list;}private ContentSearchVO searchAdapter(SearchDocument searchDocument) {ContentSearchVO contentSearchVO = new ContentSearchVO();contentSearchVO .setTitle(searchDocument.getFileName());contentSearchVO .setContent(searchDocument.getDocText());contentSearchVO .setFileId(searchDocument.getFileId());contentSearchVO .setTags(searchDocument.getTags());contentSearchVO .setTimestamp(searchDocument.getTimestamp());return contentSearchVO ;}private String replaceSymbol(String replaceStr) {return replaceStr.replaceAll("\t","").replaceAll(" "," ").replaceAll(" "," ").replaceAll("(\\n\\s*)+", " ") //多重换行只保留一个.replaceAll("\\s+"," ").replaceAll("-\\d+-", "") //去掉 页码;}}

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com