文章目录
- 1.EasyCode生成interview_history的crud
- 1.在模板设置中手动指定逻辑删除的值
- 2.生成代码,进行测试
- 2.PDF识别关键字
- 1.引入依赖
- 2.代码概览
- 3.PDFUtil.java
- 4.keyword
- 1.EndType.java
- 2.FlagIndex.java
- 3.WordType.java
- 4.KeyWordUtil.java
- 3.策略模式实现引擎切换&简历分析
- 1.req和vo
- 1.InterviewReq.java
- 2.InterviewVO.java
- 2.策略模式准备
- 1.引擎策略枚举 EngineEnum.java
- 2.引擎策略能力接口 InterviewEngine.java
- 3.本地引擎具体策略 JiChiInterviewEngine.java
- 3.业务
- 1.InterviewController.java
- 2.InterviewHistoryService.java
- 3.InterviewHistoryServiceImpl.java
- 4.测试
1.EasyCode生成interview_history的crud
1.在模板设置中手动指定逻辑删除的值
2.生成代码,进行测试
2.PDF识别关键字
1.引入依赖
<dependency><groupId>org.apache.pdfbox</groupId><artifactId>pdfbox</artifactId><version>2.0.24</version></dependency>
2.代码概览
3.PDFUtil.java
package com.sunxiansheng.interview.server.util;import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Pattern;@Slf4j
public class PDFUtil {private static Pattern pattern = Pattern.compile("\\s*|\t|\r|\n");public static String getPdfText(String pdfUrl) {PDDocument document = null;String text = "";try {URL url = new URL(pdfUrl);HttpURLConnection htpcon = (HttpURLConnection) url.openConnection();htpcon.setRequestMethod("GET");htpcon.setDoOutput(true);htpcon.setDoInput(true);htpcon.setUseCaches(false);htpcon.setConnectTimeout(10000);htpcon.setReadTimeout(10000);InputStream in = htpcon.getInputStream();document = PDDocument.load(in);PDFTextStripper stripper = new PDFTextStripper();stripper.setSortByPosition(true);stripper.setStartPage(0);stripper.setEndPage(Integer.MAX_VALUE);text = stripper.getText(document);text = pattern.matcher(text).replaceAll("");if (log.isInfoEnabled()) {log.info("识别到的pdf为{}", text);}} catch (Exception e) {log.error("获取pdf转为文字错误:{}", e.getMessage(), e);} finally {if (document != null) {try {document.close();} catch (Exception e) {log.error("close error", e);}}}return text;}}
4.keyword
1.EndType.java
package com.sunxiansheng.interview.server.util.keyword;
public enum EndType {HAS_NEXT, IS_END
}
2.FlagIndex.java
package com.sunxiansheng.interview.server.util.keyword;import java.util.List;
public class FlagIndex {private boolean flag;private boolean isWhiteWord;private List<Integer> index;public boolean isFlag() {return flag;}public void setFlag(boolean flag) {this.flag = flag;}public List<Integer> getIndex() {return index;}public void setIndex(List<Integer> index) {this.index = index;}public boolean isWhiteWord() {return isWhiteWord;}public void setWhiteWord(boolean whiteWord) {isWhiteWord = whiteWord;}
}
3.WordType.java
package com.sunxiansheng.interview.server.util.keyword;
public enum WordType {BLACK, WHITE
}
4.KeyWordUtil.java
package com.sunxiansheng.interview.server.util.keyword;import com.baomidou.mybatisplus.core.toolkit.CollectionUtils;import java.util.*;public class KeyWordUtil {private final static Map wordMap = new HashMap(1024);private static boolean init = false;public static boolean isInit() {return init;}public static List<String> buildKeyWordsLists(final String text) {List<String> wordList = new ArrayList<>();char[] charset = text.toCharArray();for (int i = 0; i < charset.length; i++) {FlagIndex fi = getFlagIndex(charset, i, 0);if (fi.isFlag()) {if (fi.isWhiteWord()) {i += fi.getIndex().size() - 1;} else {StringBuilder builder = new StringBuilder();for (int j : fi.getIndex()) {char word = text.charAt(j);builder.append(word);}wordList.add(builder.toString());}}}return wordList;}private static FlagIndex getFlagIndex(final char[] charset, final int begin, final int skip) {FlagIndex fi = new FlagIndex();Map current = wordMap;boolean flag = false;int count = 0;List<Integer> index = new ArrayList<>();for (int i = begin; i < charset.length; i++) {char word = charset[i];Map mapTree = (Map) current.get(word);if (count > skip || (i == begin && Objects.isNull(mapTree))) {break;}if (Objects.nonNull(mapTree)) {current = mapTree;count = 0;index.add(i);} else {count++;if (flag && count > skip) {break;}}if ("1".equals(current.get("isEnd"))) {flag = true;}if ("1".equals(current.get("isWhiteWord"))) {fi.setWhiteWord(true);break;}}fi.setFlag(flag);fi.setIndex(index);return fi;}public static void addWord(Collection<String> wordList) {init = true;if (CollectionUtils.isEmpty(wordList)) {return;}WordType wordType = WordType.BLACK;Map nowMap;Map<String, String> newWorMap;for (String key : wordList) {nowMap = wordMap;for (int i = 0; i < key.length(); i++) {char keyChar = key.charAt(i);Object wordMap = nowMap.get(keyChar);if (wordMap != null) {nowMap = (Map) wordMap;} else {newWorMap = new HashMap<>(4);newWorMap.put("isEnd", String.valueOf(EndType.HAS_NEXT.ordinal()));nowMap.put(keyChar, newWorMap);nowMap = newWorMap;}if (i == key.length() - 1) {nowMap.put("isEnd", String.valueOf(EndType.IS_END.ordinal()));nowMap.put("isWhiteWord", String.valueOf(wordType.ordinal()));}}}}
}
3.策略模式实现引擎切换&简历分析
1.req和vo
1.InterviewReq.java
package com.sunxiansheng.interview.api.req;import com.sunxiansheng.interview.api.enums.EngineEnum;
import lombok.Getter;
import lombok.Setter;import java.io.Serializable;@Getter
@Setter
public class InterviewReq implements Serializable {private String url;private String engine = EngineEnum.JI_CHI.name();}
2.InterviewVO.java
package com.sunxiansheng.interview.api.vo;import lombok.Data;
import lombok.Getter;
import lombok.Setter;import java.io.Serializable;
import java.util.List;@Getter
@Setter
public class InterviewVO implements Serializable {private List<Interview> questionList;@Datapublic static class Interview {private String keyWord;private Long categoryId;private Long labelId;}}
2.策略模式准备
1.引擎策略枚举 EngineEnum.java
package com.sunxiansheng.interview.api.enums;import lombok.Getter;
@Getter
public enum EngineEnum {JI_CHI,ALI_BL,}
2.引擎策略能力接口 InterviewEngine.java
package com.sunxiansheng.interview.server.service;import com.sunxiansheng.interview.api.enums.EngineEnum;
import com.sunxiansheng.interview.api.vo.InterviewVO;import java.util.List;
public interface InterviewEngine {EngineEnum engineType();InterviewVO analyse(List<String> KeyWords);}
3.本地引擎具体策略 JiChiInterviewEngine.java
package com.sunxiansheng.interview.server.service.impl;import com.sunxiansheng.interview.api.enums.EngineEnum;
import com.sunxiansheng.interview.api.vo.InterviewVO;
import com.sunxiansheng.interview.server.entity.po.SubjectCategory;
import com.sunxiansheng.interview.server.entity.po.SubjectLabel;
import com.sunxiansheng.interview.server.mapper.SubjectMapper;
import com.sunxiansheng.interview.server.service.InterviewEngine;
import org.springframework.stereotype.Component;
import org.springframework.util.CollectionUtils;import javax.annotation.PostConstruct;
import javax.annotation.Resource;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Function;
import java.util.stream.Collectors;
@Component
public class JiChiInterviewEngine implements InterviewEngine {@Resourceprivate SubjectMapper subjectMapper;private List<SubjectLabel> labels;private Map<Long, SubjectCategory> categoryMap;@PostConstructpublic void init() {labels = subjectMapper.listAllLabel();categoryMap = subjectMapper.listAllCategory().stream().collect(Collectors.toMap(SubjectCategory::getId, Function.identity()));}@Overridepublic EngineEnum engineType() {return EngineEnum.JI_CHI;}@Overridepublic InterviewVO analyse(List<String> KeyWords) {if (CollectionUtils.isEmpty(KeyWords)) {return new InterviewVO();}List<SubjectLabel> includedLabels = labels.stream().filter(item -> {return KeyWords.contains(item.getLabelName());}).collect(Collectors.toList());List<InterviewVO.Interview> collect = includedLabels.stream().map(label -> {InterviewVO.Interview interview = new InterviewVO.Interview();SubjectCategory subjectCategory = categoryMap.get(label.getCategoryId());if (Objects.nonNull(subjectCategory)) {interview.setKeyWord(String.format("%s-%s", subjectCategory.getCategoryName(), label.getLabelName()));} else {interview.setKeyWord(label.getLabelName());}interview.setCategoryId(label.getCategoryId());interview.setLabelId(label.getId());return interview;}).collect(Collectors.toList());InterviewVO interviewVO = new InterviewVO();interviewVO.setQuestionList(collect);return interviewVO;}
}
3.业务
1.InterviewController.java
package com.sunxiansheng.interview.server.controller;import com.alibaba.fastjson.JSON;
import com.google.common.base.Preconditions;
import com.sunxiansheng.interview.api.common.Result;
import com.sunxiansheng.interview.api.req.InterviewReq;
import com.sunxiansheng.interview.api.vo.InterviewVO;
import com.sunxiansheng.interview.server.convert.InterviewHistoryConvert;
import com.sunxiansheng.interview.server.entity.dto.InterviewHistoryDto;
import com.sunxiansheng.interview.server.entity.page.PageResult;
import com.sunxiansheng.interview.server.entity.req.InterviewHistoryReq;
import com.sunxiansheng.interview.server.entity.vo.InterviewHistoryVo;
import com.sunxiansheng.interview.server.service.InterviewHistoryService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.*;import javax.annotation.Resource;
import java.util.Objects;
@Slf4j
@RestController
@RequestMapping("/interview")
public class InterviewController {@Resourceprivate InterviewHistoryService interviewHistoryService;@GetMapping("/queryPage")public Result<PageResult<InterviewHistoryVo>> queryByPage(@RequestBody InterviewHistoryReq req) {try {if (log.isInfoEnabled()) {log.info("分页查询数据入参{}", JSON.toJSONString(req));}InterviewHistoryDto interviewHistoryDto = InterviewHistoryConvert.INSTANCE.convertReqToDto(req);PageResult<InterviewHistoryVo> interviewHistoryVoPageResult = this.interviewHistoryService.queryByPage(interviewHistoryDto);return Result.ok(interviewHistoryVoPageResult);} catch (Exception e) {log.error("分页查询数据!错误原因{}", e.getMessage(), e);return Result.fail(e.getMessage());}}@PostMapping(value = "/analyse")public Result<InterviewVO> analyse(@RequestBody InterviewReq req) {try {if (log.isInfoEnabled()) {log.info("分析简历入参{}", JSON.toJSON(req));}Preconditions.checkArgument(!Objects.isNull(req), "参数不能为空!");Preconditions.checkArgument(!Objects.isNull(req.getEngine()), "引擎不能为空!");Preconditions.checkArgument(!Objects.isNull(req.getUrl()), "简历不能为空!");return Result.ok(interviewHistoryService.analyse(req));} catch (IllegalArgumentException e) {log.error("参数异常!错误原因{}", e.getMessage(), e);return Result.fail(e.getMessage());} catch (Exception e) {log.error("分析简历异常!错误原因{}", e.getMessage(), e);return Result.fail("分析简历异常!");}}}
2.InterviewHistoryService.java
package com.sunxiansheng.interview.server.service;import com.sunxiansheng.interview.api.req.InterviewReq;
import com.sunxiansheng.interview.api.vo.InterviewVO;
import com.sunxiansheng.interview.server.entity.dto.InterviewHistoryDto;
import com.sunxiansheng.interview.server.entity.page.PageResult;
import com.sunxiansheng.interview.server.entity.vo.InterviewHistoryVo;
public interface InterviewHistoryService {PageResult<InterviewHistoryVo> queryByPage(InterviewHistoryDto Dto);InterviewVO analyse(InterviewReq req);
}
3.InterviewHistoryServiceImpl.java
package com.sunxiansheng.interview.server.service.impl;import com.google.common.base.Preconditions;
import com.sunxiansheng.interview.api.req.InterviewReq;
import com.sunxiansheng.interview.api.vo.InterviewVO;
import com.sunxiansheng.interview.server.convert.InterviewHistoryConvert;
import com.sunxiansheng.interview.server.entity.dto.InterviewHistoryDto;
import com.sunxiansheng.interview.server.entity.page.PageResult;
import com.sunxiansheng.interview.server.entity.page.SunPageHelper;
import com.sunxiansheng.interview.server.entity.po.InterviewHistoryPo;
import com.sunxiansheng.interview.server.entity.po.SubjectLabel;
import com.sunxiansheng.interview.server.entity.vo.InterviewHistoryVo;
import com.sunxiansheng.interview.server.mapper.InterviewHistoryMapper;
import com.sunxiansheng.interview.server.mapper.SubjectMapper;
import com.sunxiansheng.interview.server.service.InterviewEngine;
import com.sunxiansheng.interview.server.service.InterviewHistoryService;
import com.sunxiansheng.interview.server.util.PDFUtil;
import com.sunxiansheng.interview.server.util.keyword.KeyWordUtil;
import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.stereotype.Service;import javax.annotation.Resource;
import java.util.*;
import java.util.stream.Collectors;
@Service("interviewHistoryService")
public class InterviewHistoryServiceImpl implements InterviewHistoryService, ApplicationContextAware {@Resourceprivate InterviewHistoryMapper interviewHistoryMapper;@Resourceprivate SubjectMapper subjectMapper;private static final Map<String, InterviewEngine> engineMap = new HashMap<>();@Overridepublic void setApplicationContext(ApplicationContext applicationContext) throws BeansException {Collection<InterviewEngine> engines = applicationContext.getBeansOfType(InterviewEngine.class).values();for (InterviewEngine engine : engines) {engineMap.put(engine.engineType().name(), engine);}}@Overridepublic PageResult<InterviewHistoryVo> queryByPage(InterviewHistoryDto interviewHistoryDto) {InterviewHistoryPo interviewHistoryPo = InterviewHistoryConvert.INSTANCE.convertDtoToPo(interviewHistoryDto);PageResult<InterviewHistoryPo> paginate = SunPageHelper.paginate(interviewHistoryDto.getPageNo(), interviewHistoryDto.getPageSize(),() -> interviewHistoryMapper.count(interviewHistoryPo),(offset, size) -> interviewHistoryMapper.queryPage(interviewHistoryPo, offset, size));PageResult<InterviewHistoryVo> interviewHistoryVoPageResult = InterviewHistoryConvert.INSTANCE.convertPageResult(paginate);return interviewHistoryVoPageResult;}@Overridepublic InterviewVO analyse(InterviewReq req) {List<String> keyWords = buildKeyWords(req.getUrl());InterviewEngine engine = engineMap.get(req.getEngine());Preconditions.checkArgument(!Objects.isNull(engine), "引擎不能为空!");return engine.analyse(keyWords);}private List<String> buildKeyWords(String url) {String pdfText = PDFUtil.getPdfText(url);if (!KeyWordUtil.isInit()) {List<String> list = subjectMapper.listAllLabel().stream().map(SubjectLabel::getLabelName).collect(Collectors.toList());KeyWordUtil.addWord(list);}return KeyWordUtil.buildKeyWordsLists(pdfText);}}
4.测试