From 6e36e9df65156d1e3161eebec9c3b52912cd0fe7 Mon Sep 17 00:00:00 2001 From: suwen <577014284@qq.com> Date: Wed, 3 Feb 2021 16:25:59 +0800 Subject: [PATCH] =?UTF-8?q?try=20to:=20lucene=E6=96=87=E7=AB=A0=E6=90=9C?= =?UTF-8?q?=E7=B4=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - lucene文章搜索分页跳转 - lucene索引创建 --- .gitignore | 3 + .../lucene/api/LuceneSearchController.java | 88 ++++++ .../forest/lucene/api/SearchController.java | 72 ----- .../lucene/lucene/ArticleBeanIndex.java | 61 ++-- .../lucene/mapper/ArticleLuceneMapper.java | 33 ++ .../forest/lucene/model/ArticleLucene.java | 39 +++ .../forest/lucene/service/LuceneService.java | 46 +++ .../forest/lucene/service/SearchService.java | 281 ------------------ .../service/impl/LuceneServiceImpl.java | 182 ++++++++++++ src/main/java/mapper/BaikeMapper.xml | 21 -- .../mapper/lucene/ArticleLuceneMapper.xml | 64 ++++ 11 files changed, 488 insertions(+), 402 deletions(-) create mode 100755 src/main/java/com/rymcu/forest/lucene/api/LuceneSearchController.java delete mode 100755 src/main/java/com/rymcu/forest/lucene/api/SearchController.java create mode 100755 src/main/java/com/rymcu/forest/lucene/mapper/ArticleLuceneMapper.java create mode 100644 src/main/java/com/rymcu/forest/lucene/model/ArticleLucene.java create mode 100644 src/main/java/com/rymcu/forest/lucene/service/LuceneService.java delete mode 100644 src/main/java/com/rymcu/forest/lucene/service/SearchService.java create mode 100644 src/main/java/com/rymcu/forest/lucene/service/impl/LuceneServiceImpl.java delete mode 100755 src/main/java/mapper/BaikeMapper.xml create mode 100755 src/main/java/mapper/lucene/ArticleLuceneMapper.xml diff --git a/.gitignore b/.gitignore index a67cc18..f859859 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,6 @@ build/ ### VS Code ### .vscode/ + +### lucene ### +index diff --git a/src/main/java/com/rymcu/forest/lucene/api/LuceneSearchController.java b/src/main/java/com/rymcu/forest/lucene/api/LuceneSearchController.java new file mode 100755 index 0000000..1c61b44 --- /dev/null +++ b/src/main/java/com/rymcu/forest/lucene/api/LuceneSearchController.java @@ -0,0 +1,88 @@ +package com.rymcu.forest.lucene.api; + +import com.github.pagehelper.Page; +import com.github.pagehelper.PageInfo; +import com.rymcu.forest.core.result.GlobalResult; +import com.rymcu.forest.core.result.GlobalResultGenerator; +import com.rymcu.forest.dto.ArticleDTO; +import com.rymcu.forest.lucene.model.ArticleLucene; +import com.rymcu.forest.lucene.service.LuceneService; +import com.rymcu.forest.util.Utils; +import lombok.extern.log4j.Log4j2; +import org.springframework.web.bind.annotation.*; + +import javax.annotation.Resource; +import java.util.List; + +/** + * LuceneSearchController + * + * @author suwen + * @date 2021/2/3 10:41 + */ +@Log4j2 +@RestController +@RequestMapping("/api/v1/lucene") +public class LuceneSearchController { + + @Resource private LuceneService luceneService; + + @GetMapping("/getArticles") + public GlobalResult createIndex() { + return GlobalResultGenerator.genSuccessResult(luceneService.getAllArticleLucene()); + } + + @GetMapping("/getArticlesByIds") + public GlobalResult getArticlesByIds() { + return GlobalResultGenerator.genSuccessResult( + luceneService.getArticlesByIds(new String[] {"1", "2", "3"})); + } + + @GetMapping("/createIndex") + public GlobalResult createIndex( + @RequestParam(required = false, defaultValue = "0") Integer limit, + @RequestParam(required = false, defaultValue = "1000") Integer offset) { + // 拉取数据 + luceneService.writeArticle(luceneService.getAllArticleLucene()); + return GlobalResultGenerator.genSuccessResult("创建索引成功"); + } + + /** + * 搜索,实现高亮 + * + * @param q + * @return + */ + @GetMapping("/searchArticle/{q}") + public GlobalResult searchArticle( + @PathVariable String q, + @RequestParam(defaultValue = "1") Integer pageNum, + @RequestParam(defaultValue = "10") Integer pageSize) { + // 找出相关文章,相关度倒序 + List resList = luceneService.searchArticle(q); + // 分页组装文章详情 + int total = resList.size(); + if (total == 0) { + return GlobalResultGenerator.genSuccessResult("未找到相关文章"); + } + Page page = new Page<>(pageNum, pageSize); + page.setTotal(total); + int startIndex = (pageNum - 1) * pageSize; + int endIndex = Math.min(startIndex + pageSize, total); + // 分割子列表 + List subList = resList.subList(startIndex, endIndex); + String[] ids = subList.stream().map(ArticleLucene::getIdArticle).toArray(String[]::new); + List articleDTOList = luceneService.getArticlesByIds(ids); + ArticleDTO temp; + // 写入文章关键词信息 + for (int i = 0; i < articleDTOList.size(); i++) { + temp = articleDTOList.get(i); + temp.setArticleTitle(subList.get(i).getArticleTitle()); + temp.setArticlePreviewContent(subList.get(i).getArticleContent()); + articleDTOList.set(i, temp); + } + page.addAll(articleDTOList); + PageInfo pageInfo = new PageInfo<>(page); + return GlobalResultGenerator.genSuccessResult(Utils.getArticlesGlobalResult(pageInfo)); + } +} diff --git a/src/main/java/com/rymcu/forest/lucene/api/SearchController.java b/src/main/java/com/rymcu/forest/lucene/api/SearchController.java deleted file mode 100755 index fe65fa7..0000000 --- a/src/main/java/com/rymcu/forest/lucene/api/SearchController.java +++ /dev/null @@ -1,72 +0,0 @@ -package com.rymcu.forest.lucene.api; - -import com.rymcu.forest.dto.ArticleDTO; -import com.rymcu.forest.dto.ArticleSearchDTO; -import com.rymcu.forest.lucene.mapper.BaikeMapper; -import com.rymcu.forest.lucene.model.Baike; -import com.rymcu.forest.lucene.service.SearchService; -import com.rymcu.forest.service.ArticleService; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PathVariable; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RestController; -import org.springframework.web.servlet.ModelAndView; - -import java.util.List; -import java.util.Map; - -@RestController -@RequestMapping("/api/v1/lucene") -public class SearchController { - @Autowired private BaikeMapper baikeMapper; - @Autowired private SearchService searchService; - @Autowired private ArticleService articleService; - - @GetMapping("/index") - public String createIndex(int limit, int offset) { - // 拉取数据 - List baikes = baikeMapper.getAllBaike(limit, offset); - searchService.write(baikes); - return "成功"; - } - - @GetMapping("/indexArticle") - public String createArticleIndex() { - // 拉取数据 - List list = articleService.findArticles(new ArticleSearchDTO()); - searchService.writeArticle(list); - return "成功"; - } - - /** - * 搜索,实现高亮 - * - * @param q - * @return - * @throws Exception - */ - @GetMapping("/search/{q}") - public List> getSearchText(@PathVariable String q) throws Exception { - return searchService.search(q); - } - - /** - * 搜索,实现高亮 - * - * @param q - * @return - * @throws Exception - */ - @GetMapping("/searchArticle/{q}") - public List searchArticle(@PathVariable String q) throws Exception { - - return searchService.searchArticle(q); - } - - @GetMapping(value = "/search") - public ModelAndView test(ModelAndView mv) { - mv.setViewName("/search"); - return mv; - } -} diff --git a/src/main/java/com/rymcu/forest/lucene/lucene/ArticleBeanIndex.java b/src/main/java/com/rymcu/forest/lucene/lucene/ArticleBeanIndex.java index 7d2d582..f586e9b 100644 --- a/src/main/java/com/rymcu/forest/lucene/lucene/ArticleBeanIndex.java +++ b/src/main/java/com/rymcu/forest/lucene/lucene/ArticleBeanIndex.java @@ -1,8 +1,6 @@ package com.rymcu.forest.lucene.lucene; - -import com.rymcu.forest.dto.ArticleDTO; -import com.rymcu.forest.lucene.model.Baike; +import com.rymcu.forest.lucene.model.ArticleLucene; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; @@ -19,32 +17,39 @@ import java.util.concurrent.CountDownLatch; * @author suwen * @date 2021/2/2 14:10 */ -public class ArticleBeanIndex extends BaseIndex{ +public class ArticleBeanIndex extends BaseIndex { - public ArticleBeanIndex(IndexWriter writer, CountDownLatch countDownLatch1, - CountDownLatch countDownLatch2, List list) { - super(writer, countDownLatch1, countDownLatch2, list); - } - public ArticleBeanIndex(String parentIndexPath, int subIndex, CountDownLatch countDownLatch1, - CountDownLatch countDownLatch2, List list) { - super(parentIndexPath, subIndex, countDownLatch1, countDownLatch2, list); - } - @Override - public void indexDoc(IndexWriter writer, ArticleDTO t) throws Exception { - Document doc = new Document(); - Field id = new Field("id", t.getIdArticle()+"", TextField.TYPE_STORED); - Field title = new Field("title", t.getArticleTitle(), TextField.TYPE_STORED); - Field summary = new Field("summary", t.getArticleContent(), TextField.TYPE_STORED); - //添加到Document中 - doc.add(id); - doc.add(title); - doc.add(summary); - if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE){ - writer.addDocument(doc); - }else{ - writer.updateDocument(new Term("id", t.getIdArticle()+""), doc); - } - } + public ArticleBeanIndex( + IndexWriter writer, + CountDownLatch countDownLatch1, + CountDownLatch countDownLatch2, + List list) { + super(writer, countDownLatch1, countDownLatch2, list); + } + public ArticleBeanIndex( + String parentIndexPath, + int subIndex, + CountDownLatch countDownLatch1, + CountDownLatch countDownLatch2, + List list) { + super(parentIndexPath, subIndex, countDownLatch1, countDownLatch2, list); + } + @Override + public void indexDoc(IndexWriter writer, ArticleLucene t) throws Exception { + Document doc = new Document(); + Field id = new Field("id", t.getIdArticle() + "", TextField.TYPE_STORED); + Field title = new Field("title", t.getArticleTitle(), TextField.TYPE_STORED); + Field summary = new Field("summary", t.getArticleContent(), TextField.TYPE_STORED); + // 添加到Document中 + doc.add(id); + doc.add(title); + doc.add(summary); + if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { + writer.addDocument(doc); + } else { + writer.updateDocument(new Term("id", t.getIdArticle() + ""), doc); + } + } } diff --git a/src/main/java/com/rymcu/forest/lucene/mapper/ArticleLuceneMapper.java b/src/main/java/com/rymcu/forest/lucene/mapper/ArticleLuceneMapper.java new file mode 100755 index 0000000..32422a0 --- /dev/null +++ b/src/main/java/com/rymcu/forest/lucene/mapper/ArticleLuceneMapper.java @@ -0,0 +1,33 @@ +package com.rymcu.forest.lucene.mapper; + +import com.rymcu.forest.dto.ArticleDTO; +import com.rymcu.forest.lucene.model.ArticleLucene; +import org.apache.ibatis.annotations.Mapper; +import org.apache.ibatis.annotations.Param; + +import java.util.List; + +/** + * ArticleLuceneMapper + * + * @author suwen + * @date 2021/2/3 10:00 + */ +@Mapper +public interface ArticleLuceneMapper { + + /** + * 加载所有文章内容 + * + * @return + */ + List getAllArticleLucene(); + + /** + * 加载所有文章内容 + * + * @param ids 文章id(半角逗号分隔) + * @return + */ + List getArticlesByIds(@Param("ids") String[] ids); +} diff --git a/src/main/java/com/rymcu/forest/lucene/model/ArticleLucene.java b/src/main/java/com/rymcu/forest/lucene/model/ArticleLucene.java new file mode 100644 index 0000000..e256580 --- /dev/null +++ b/src/main/java/com/rymcu/forest/lucene/model/ArticleLucene.java @@ -0,0 +1,39 @@ +package com.rymcu.forest.lucene.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * ArticleLucene + * + * @author suwen + * @date 2021/2/3 09:57 + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class ArticleLucene { + + /** + * 文章编号 + */ + private String idArticle; + + /** + * 文章标题 + */ + private String articleTitle; + + /** + * 文章内容 + */ + private String articleContent; + + /** + * 相关度评分 + */ + private String score; +} diff --git a/src/main/java/com/rymcu/forest/lucene/service/LuceneService.java b/src/main/java/com/rymcu/forest/lucene/service/LuceneService.java new file mode 100644 index 0000000..2998cd1 --- /dev/null +++ b/src/main/java/com/rymcu/forest/lucene/service/LuceneService.java @@ -0,0 +1,46 @@ +package com.rymcu.forest.lucene.service; + +import com.rymcu.forest.dto.ArticleDTO; +import com.rymcu.forest.lucene.model.ArticleLucene; + +import java.util.List; + +/** + * LuceneService + * + * @author suwen + * @date 2021/2/3 10:10 + */ +public interface LuceneService { + + /** + * 将文章的数据解析为一个个关键字词存储到索引文件中 + * + * @param list + */ + void writeArticle(List list); + + /** + * 关键词搜索 + * + * @param value + * @return + * @throws Exception + */ + List searchArticle(String value); + + /** + * 加载所有文章内容 + * + * @return + */ + List getAllArticleLucene(); + + /** + * 加载所有文章内容 + * + * @param ids 文章id(半角逗号分隔) + * @return + */ + List getArticlesByIds(String[] ids); +} diff --git a/src/main/java/com/rymcu/forest/lucene/service/SearchService.java b/src/main/java/com/rymcu/forest/lucene/service/SearchService.java deleted file mode 100644 index 903b18c..0000000 --- a/src/main/java/com/rymcu/forest/lucene/service/SearchService.java +++ /dev/null @@ -1,281 +0,0 @@ -package com.rymcu.forest.lucene.service; - -import com.rymcu.forest.dto.ArticleDTO; -import com.rymcu.forest.lucene.lucene.ArticleBeanIndex; -import com.rymcu.forest.lucene.lucene.BaiKeBeanIndex; -import com.rymcu.forest.lucene.lucene.IKAnalyzer; -import com.rymcu.forest.lucene.model.Baike; -import com.rymcu.forest.lucene.util.SearchUtil; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.Document; -import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.highlight.*; -import org.springframework.stereotype.Service; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; - -/** - * SearchService - * - * @author suwen - * @date 2021/2/2 14:01 - */ -@Service -public class SearchService { - - /** Lucene索引文件路径 */ - private final String indexPath = System.getProperty("user.dir") + "/index"; - - /** - * 封裝一个方法,用于将数据库中的数据解析为一个个关键字词存储到索引文件中 - * - * @param baikes - */ - public void write(List baikes) { - try { - int totalCount = baikes.size(); - int perThreadCount = 3000; - int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1); - ExecutorService pool = Executors.newFixedThreadPool(threadCount); - CountDownLatch countDownLatch1 = new CountDownLatch(1); - CountDownLatch countDownLatch2 = new CountDownLatch(threadCount); - - for (int i = 0; i < threadCount; i++) { - int start = i * perThreadCount; - int end = Math.min((i + 1) * perThreadCount, totalCount); - List subList = baikes.subList(start, end); - Runnable runnable = - new BaiKeBeanIndex("index", i, countDownLatch1, countDownLatch2, subList); - // 子线程交给线程池管理 - pool.execute(runnable); - } - countDownLatch1.countDown(); - System.out.println("开始创建索引"); - // 等待所有线程都完成 - countDownLatch2.await(); - // 线程全部完成工作 - System.out.println("所有线程都创建索引完毕"); - // 释放线程池资源 - pool.shutdown(); - } catch (Exception e) { - e.printStackTrace(); - } - } - - /** - * 封裝一个方法,用于将数据库中的数据解析为一个个关键字词存储到索引文件中 - * - * @param list - */ - public void writeArticle(List list) { - try { - int totalCount = list.size(); - int perThreadCount = 3000; - int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1); - ExecutorService pool = Executors.newFixedThreadPool(threadCount); - CountDownLatch countDownLatch1 = new CountDownLatch(1); - CountDownLatch countDownLatch2 = new CountDownLatch(threadCount); - - for (int i = 0; i < threadCount; i++) { - int start = i * perThreadCount; - int end = Math.min((i + 1) * perThreadCount, totalCount); - List subList = list.subList(start, end); - Runnable runnable = - new ArticleBeanIndex("articlesIndex", i, countDownLatch1, countDownLatch2, subList); - // 子线程交给线程池管理 - pool.execute(runnable); - } - countDownLatch1.countDown(); - System.out.println("开始创建索引"); - // 等待所有线程都完成 - countDownLatch2.await(); - // 线程全部完成工作 - System.out.println("所有线程都创建索引完毕"); - // 释放线程池资源 - pool.shutdown(); - } catch (Exception e) { - e.printStackTrace(); - } - } - - /** - * 搜索 - * - * @param value - * @return - * @throws Exception - */ - public List> search(String value) throws Exception { - List> list = new ArrayList<>(); - ExecutorService service = Executors.newCachedThreadPool(); - // 定义分词器 - Analyzer analyzer = new IKAnalyzer(); - try { - IndexSearcher searcher = SearchUtil.getIndexSearcherByParentPath(indexPath, service); - String[] fields = {"title", "summary"}; - // 构造Query对象 - MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer); - - BufferedReader in = - new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); - String line = value != null ? value : in.readLine(); - Query query = parser.parse(line); - // 最终被分词后添加的前缀和后缀处理器,默认是粗体 - SimpleHTMLFormatter htmlFormatter = - new SimpleHTMLFormatter("", ""); - // 高亮搜索的词添加到高亮处理器中 - Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); - - // 获取搜索的结果,指定返回document返回的个数 - // 默认搜索结果为显示第一页,1000 条,可以优化 - TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query); - ScoreDoc[] hits = results.scoreDocs; - - // 遍历,输出 - for (ScoreDoc hit : hits) { - int id = hit.doc; - float score = hit.score; - Document hitDoc = searcher.doc(hit.doc); - Map map = new HashMap<>(); - map.put("id", hitDoc.get("id")); - - // 获取到summary - String name = hitDoc.get("summary"); - // 将查询的词和搜索词匹配,匹配到添加前缀和后缀 - TokenStream tokenStream = - TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "summary", analyzer); - // 传入的第二个参数是查询的值 - TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10); - StringBuilder baikeValue = new StringBuilder(); - for (TextFragment textFragment : frag) { - if ((textFragment != null) && (textFragment.getScore() > 0)) { - // if ((frag[j] != null)) { - // 获取 summary 的值 - baikeValue.append(textFragment.toString()); - } - } - - // 获取到title - String title = hitDoc.get("title"); - TokenStream titleTokenStream = - TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "title", analyzer); - TextFragment[] titleFrag = - highlighter.getBestTextFragments(titleTokenStream, title, false, 10); - StringBuilder titleValue = new StringBuilder(); - for (int j = 0; j < titleFrag.length; j++) { - if ((frag[j] != null)) { - titleValue.append(titleFrag[j].toString()); - } - } - map.put("title", titleValue.toString()); - map.put("summary", baikeValue.toString()); - map.put("score", String.valueOf(score)); - list.add(map); - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - service.shutdownNow(); - } - return list; - } - - /** - * 搜索 - * - * @param value - * @return - * @throws Exception - */ - public List> searchArticle(String value) throws Exception { - List> list = new ArrayList<>(); - ExecutorService service = Executors.newCachedThreadPool(); - // 定义分词器 - Analyzer analyzer = new IKAnalyzer(); - try { - IndexSearcher searcher = - SearchUtil.getIndexSearcherByParentPath( - System.getProperty("user.dir") + "/articlesIndex", service); - String[] fields = {"title", "summary"}; - // 构造Query对象 - MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer); - - BufferedReader in = - new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); - String line = value != null ? value : in.readLine(); - Query query = parser.parse(line); - // 最终被分词后添加的前缀和后缀处理器,默认是粗体 - SimpleHTMLFormatter htmlFormatter = - new SimpleHTMLFormatter("", ""); - // 高亮搜索的词添加到高亮处理器中 - Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); - - // 获取搜索的结果,指定返回document返回的个数 - // 默认搜索结果为显示第一页,1000 条,可以优化 - TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query); - ScoreDoc[] hits = results.scoreDocs; - - // 遍历,输出 - for (ScoreDoc hit : hits) { - int id = hit.doc; - float score = hit.score; - Document hitDoc = searcher.doc(hit.doc); - Map map = new HashMap<>(); - map.put("id", hitDoc.get("id")); - - // 获取到summary - String name = hitDoc.get("summary"); - // 将查询的词和搜索词匹配,匹配到添加前缀和后缀 - TokenStream tokenStream = - TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "summary", analyzer); - // 传入的第二个参数是查询的值 - TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10); - StringBuilder baikeValue = new StringBuilder(); - for (TextFragment textFragment : frag) { - if ((textFragment != null) && (textFragment.getScore() > 0)) { - // if ((frag[j] != null)) { - // 获取 summary 的值 - baikeValue.append(textFragment.toString()); - } - } - - // 获取到title - String title = hitDoc.get("title"); - TokenStream titleTokenStream = - TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "title", analyzer); - TextFragment[] titleFrag = - highlighter.getBestTextFragments(titleTokenStream, title, false, 10); - StringBuilder titleValue = new StringBuilder(); - for (int j = 0; j < titleFrag.length; j++) { - if ((frag[j] != null)) { - titleValue.append(titleFrag[j].toString()); - } - } - map.put("title", titleValue.toString()); - map.put("summary", baikeValue.toString()); - map.put("score", String.valueOf(score)); - list.add(map); - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - service.shutdownNow(); - } - return list; - } -} diff --git a/src/main/java/com/rymcu/forest/lucene/service/impl/LuceneServiceImpl.java b/src/main/java/com/rymcu/forest/lucene/service/impl/LuceneServiceImpl.java new file mode 100644 index 0000000..7e86bf5 --- /dev/null +++ b/src/main/java/com/rymcu/forest/lucene/service/impl/LuceneServiceImpl.java @@ -0,0 +1,182 @@ +package com.rymcu.forest.lucene.service.impl; + +import com.rymcu.forest.dto.ArticleDTO; +import com.rymcu.forest.lucene.lucene.ArticleBeanIndex; +import com.rymcu.forest.lucene.lucene.IKAnalyzer; +import com.rymcu.forest.lucene.mapper.ArticleLuceneMapper; +import com.rymcu.forest.lucene.model.ArticleLucene; +import com.rymcu.forest.lucene.service.LuceneService; +import com.rymcu.forest.lucene.util.SearchUtil; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.highlight.*; +import org.springframework.stereotype.Service; + +import javax.annotation.Resource; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +/** + * LuceneServiceImpl + * + * @author suwen + * @date 2021/2/3 10:29 + */ +@Service +public class LuceneServiceImpl implements LuceneService { + + @Resource private ArticleLuceneMapper luceneMapper; + + /** Lucene索引文件路径 */ + private final String indexPath = System.getProperty("user.dir") + "/index"; + + /** + * 将文章的数据解析为一个个关键字词存储到索引文件中 + * + * @param list + */ + @Override + public void writeArticle(List list) { + try { + int totalCount = list.size(); + int perThreadCount = 3000; + int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1); + ExecutorService pool = Executors.newFixedThreadPool(threadCount); + CountDownLatch countDownLatch1 = new CountDownLatch(1); + CountDownLatch countDownLatch2 = new CountDownLatch(threadCount); + + for (int i = 0; i < threadCount; i++) { + int start = i * perThreadCount; + int end = Math.min((i + 1) * perThreadCount, totalCount); + List subList = list.subList(start, end); + Runnable runnable = + new ArticleBeanIndex("index", i, countDownLatch1, countDownLatch2, subList); + // 子线程交给线程池管理 + pool.execute(runnable); + } + countDownLatch1.countDown(); + System.out.println("开始创建索引"); + // 等待所有线程都完成 + countDownLatch2.await(); + // 线程全部完成工作 + System.out.println("所有线程都创建索引完毕"); + // 释放线程池资源 + pool.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + } + } + + /** + * 关键词搜索 + * + * @param value + * @return + * @throws Exception + */ + @Override + public List searchArticle(String value) { + List resList = new ArrayList<>(); + ExecutorService service = Executors.newCachedThreadPool(); + // 定义分词器 + Analyzer analyzer = new IKAnalyzer(); + try { + IndexSearcher searcher = SearchUtil.getIndexSearcherByParentPath(indexPath, service); + String[] fields = {"title", "summary"}; + // 构造Query对象 + MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer); + + BufferedReader in = + new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); + String line = value != null ? value : in.readLine(); + Query query = parser.parse(line); + // 最终被分词后添加的前缀和后缀处理器,默认是粗体 + SimpleHTMLFormatter htmlFormatter = + new SimpleHTMLFormatter("", ""); + // 高亮搜索的词添加到高亮处理器中 + Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); + + // 获取搜索的结果,指定返回document返回的个数 + // 默认搜索结果为显示第一页,1000 条,可以优化 + TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query); + ScoreDoc[] hits = results.scoreDocs; + + // 遍历,输出 + for (ScoreDoc hit : hits) { + int id = hit.doc; + float score = hit.score; + Document hitDoc = searcher.doc(hit.doc); + Map map = new HashMap<>(); + map.put("id", hitDoc.get("id")); + + // 获取到summary + String name = hitDoc.get("summary"); + // 将查询的词和搜索词匹配,匹配到添加前缀和后缀 + TokenStream tokenStream = + TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "summary", analyzer); + // 传入的第二个参数是查询的值 + TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10); + StringBuilder baikeValue = new StringBuilder(); + for (TextFragment textFragment : frag) { + if ((textFragment != null) && (textFragment.getScore() > 0)) { + // if ((frag[j] != null)) { + // 获取 summary 的值 + baikeValue.append(textFragment.toString()); + } + } + + // 获取到title + String title = hitDoc.get("title"); + TokenStream titleTokenStream = + TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "title", analyzer); + TextFragment[] titleFrag = + highlighter.getBestTextFragments(titleTokenStream, title, false, 10); + StringBuilder titleValue = new StringBuilder(); + for (int j = 0; j < titleFrag.length; j++) { + if ((frag[j] != null)) { + titleValue.append(titleFrag[j].toString()); + } + } + resList.add( + ArticleLucene.builder() + .idArticle(hitDoc.get("id")) + .articleTitle(titleValue.toString()) + .articleContent(baikeValue.toString()) + .score(String.valueOf(score)) + .build()); + } + } catch (IOException | ParseException | InvalidTokenOffsetsException e) { + System.out.println(e.getMessage()); + e.printStackTrace(); + } finally { + service.shutdownNow(); + } + return resList; + } + + @Override + public List getAllArticleLucene() { + return luceneMapper.getAllArticleLucene(); + } + + @Override + public List getArticlesByIds(String[] ids) { + return luceneMapper.getArticlesByIds(ids); + } +} diff --git a/src/main/java/mapper/BaikeMapper.xml b/src/main/java/mapper/BaikeMapper.xml deleted file mode 100755 index 2d519ce..0000000 --- a/src/main/java/mapper/BaikeMapper.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - id, title - - - summary - - - diff --git a/src/main/java/mapper/lucene/ArticleLuceneMapper.xml b/src/main/java/mapper/lucene/ArticleLuceneMapper.xml new file mode 100755 index 0000000..62e97bc --- /dev/null +++ b/src/main/java/mapper/lucene/ArticleLuceneMapper.xml @@ -0,0 +1,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + , article_title + + + article_content + + + + +