From 6ccd66fd2fc96511a208c965d2520a9a3bdb1b53 Mon Sep 17 00:00:00 2001 From: Suwen <577014284@qq.com> Date: Sat, 6 Mar 2021 23:04:49 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20=E7=94=A8=E6=88=B7=E7=B4=A2?= =?UTF-8?q?=E5=BC=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../lucene/api/LuceneSearchController.java | 7 +- .../lucene/lucene/ArticleBeanIndex.java | 26 --- .../rymcu/forest/lucene/lucene/BaseIndex.java | 6 - .../forest/lucene/lucene/UserBeanIndex.java | 73 +++++++ .../lucene/mapper/UserLuceneMapper.java | 41 ++++ .../forest/lucene/model/PortfolioLucene.java | 31 +++ .../rymcu/forest/lucene/model/UserLucene.java | 35 ++++ .../lucene/service/UserLuceneService.java | 74 +++++++ .../service/impl/UserLuceneServiceImpl.java | 196 ++++++++++++++++++ .../forest/lucene/util/UserIndexUtil.java | 86 ++++++++ .../java/mapper/lucene/UserLuceneMapper.xml | 42 ++++ 11 files changed, 584 insertions(+), 33 deletions(-) create mode 100644 src/main/java/com/rymcu/forest/lucene/lucene/UserBeanIndex.java create mode 100644 src/main/java/com/rymcu/forest/lucene/mapper/UserLuceneMapper.java create mode 100644 src/main/java/com/rymcu/forest/lucene/model/PortfolioLucene.java create mode 100644 src/main/java/com/rymcu/forest/lucene/model/UserLucene.java create mode 100644 src/main/java/com/rymcu/forest/lucene/service/UserLuceneService.java create mode 100644 src/main/java/com/rymcu/forest/lucene/service/impl/UserLuceneServiceImpl.java create mode 100644 src/main/java/com/rymcu/forest/lucene/util/UserIndexUtil.java create mode 100644 src/main/java/mapper/lucene/UserLuceneMapper.xml diff --git a/src/main/java/com/rymcu/forest/lucene/api/LuceneSearchController.java b/src/main/java/com/rymcu/forest/lucene/api/LuceneSearchController.java index 17985ea..2bc65c6 100755 --- a/src/main/java/com/rymcu/forest/lucene/api/LuceneSearchController.java +++ b/src/main/java/com/rymcu/forest/lucene/api/LuceneSearchController.java @@ -8,7 +8,9 @@ import com.rymcu.forest.dto.ArticleDTO; import com.rymcu.forest.lucene.model.ArticleLucene; import com.rymcu.forest.lucene.service.LuceneService; import com.rymcu.forest.lucene.service.UserDicService; +import com.rymcu.forest.lucene.service.UserLuceneService; import com.rymcu.forest.lucene.util.ArticleIndexUtil; +import com.rymcu.forest.lucene.util.UserIndexUtil; import com.rymcu.forest.util.Utils; import org.springframework.web.bind.annotation.*; @@ -31,18 +33,21 @@ import java.util.concurrent.Executors; public class LuceneSearchController { @Resource private LuceneService luceneService; + @Resource private UserLuceneService userLuceneService; @Resource private UserDicService dicService; @PostConstruct public void createIndex() { // 删除系统运行时保存的索引,重新创建索引 ArticleIndexUtil.deleteAllIndex(); + UserIndexUtil.deleteAllIndex(); ExecutorService executor = Executors.newSingleThreadExecutor(); CompletableFuture future = CompletableFuture.supplyAsync( () -> { System.out.println(">>>>>>>>> 开始创建索引 <<<<<<<<<<<"); - luceneService.writeArticle(luceneService.getAllArticleLucene()); + // luceneService.writeArticle(luceneService.getAllArticleLucene()); + userLuceneService.writeUser(userLuceneService.getAllUserLucene()); System.out.println(">>>>>>>>> 索引创建完毕 <<<<<<<<<<<"); System.out.println("加载用户配置的自定义扩展词典到主词库表"); try { diff --git a/src/main/java/com/rymcu/forest/lucene/lucene/ArticleBeanIndex.java b/src/main/java/com/rymcu/forest/lucene/lucene/ArticleBeanIndex.java index 28feec5..9de61c1 100644 --- a/src/main/java/com/rymcu/forest/lucene/lucene/ArticleBeanIndex.java +++ b/src/main/java/com/rymcu/forest/lucene/lucene/ArticleBeanIndex.java @@ -7,10 +7,7 @@ import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import java.io.IOException; import java.util.List; import java.util.concurrent.CountDownLatch; @@ -22,19 +19,6 @@ import java.util.concurrent.CountDownLatch; */ public class ArticleBeanIndex extends BaseIndex { - public ArticleBeanIndex( - String parentIndexPath,int subIndex) { - super(parentIndexPath, subIndex); - } - - public ArticleBeanIndex( - IndexWriter writer, - CountDownLatch countDownLatch1, - CountDownLatch countDownLatch2, - List list) { - super(writer, countDownLatch1, countDownLatch2, list); - } - public ArticleBeanIndex( String parentIndexPath, int subIndex, @@ -60,14 +44,4 @@ public class ArticleBeanIndex extends BaseIndex { writer.updateDocument(new Term("id", t.getIdArticle() + ""), doc); } } - - public void indexDoc(ArticleLucene t) throws Exception { - indexDoc(getWriter(),t); - } - - @Override - public void deleteDoc( String id) throws IOException { - Query query = new TermQuery(new Term("id", id)); - getWriter().deleteDocuments(query); - } } diff --git a/src/main/java/com/rymcu/forest/lucene/lucene/BaseIndex.java b/src/main/java/com/rymcu/forest/lucene/lucene/BaseIndex.java index c56c200..ecc8911 100644 --- a/src/main/java/com/rymcu/forest/lucene/lucene/BaseIndex.java +++ b/src/main/java/com/rymcu/forest/lucene/lucene/BaseIndex.java @@ -119,12 +119,6 @@ public abstract class BaseIndex implements Runnable { } } - public abstract void deleteDoc(String id) throws IOException; - - public IndexWriter getWriter() { - return writer; - } - @Override public void run() { try { diff --git a/src/main/java/com/rymcu/forest/lucene/lucene/UserBeanIndex.java b/src/main/java/com/rymcu/forest/lucene/lucene/UserBeanIndex.java new file mode 100644 index 0000000..2bf5017 --- /dev/null +++ b/src/main/java/com/rymcu/forest/lucene/lucene/UserBeanIndex.java @@ -0,0 +1,73 @@ +package com.rymcu.forest.lucene.lucene; + +import com.rymcu.forest.lucene.model.UserLucene; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.CountDownLatch; + +/** + * UserBeanIndex + * + * @author suwen + * @date 2021/2/2 14:10 + */ +public class UserBeanIndex extends BaseIndex { + + public UserBeanIndex( + String parentIndexPath,int subIndex) { + super(parentIndexPath, subIndex); + } + + public UserBeanIndex( + IndexWriter writer, + CountDownLatch countDownLatch1, + CountDownLatch countDownLatch2, + List list) { + super(writer, countDownLatch1, countDownLatch2, list); + } + + public UserBeanIndex( + String parentIndexPath, + int subIndex, + CountDownLatch countDownLatch1, + CountDownLatch countDownLatch2, + List list) { + super(parentIndexPath, subIndex, countDownLatch1, countDownLatch2, list); + } + + @Override + public void indexDoc(IndexWriter writer, UserLucene user) throws Exception { + Document doc = new Document(); + Field id = new Field("id", user.getIdUser() + "", TextField.TYPE_STORED); + Field title = new Field("nickname", user.getNickname(), TextField.TYPE_STORED); + Field summary = new Field("signature", user.getSignature(), TextField.TYPE_STORED); + // 添加到Document中 + doc.add(id); + doc.add(title); + doc.add(summary); + if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { + writer.addDocument(doc); + } else { + writer.updateDocument(new Term("id", user.getIdUser() + ""), doc); + } + } + + public void indexDoc(UserLucene t) throws Exception { + indexDoc(getWriter(),t); + } + + @Override + public void deleteDoc( String id) throws IOException { + Query query = new TermQuery(new Term("id", id)); + getWriter().deleteDocuments(query); + } +} diff --git a/src/main/java/com/rymcu/forest/lucene/mapper/UserLuceneMapper.java b/src/main/java/com/rymcu/forest/lucene/mapper/UserLuceneMapper.java new file mode 100644 index 0000000..f41b6ec --- /dev/null +++ b/src/main/java/com/rymcu/forest/lucene/mapper/UserLuceneMapper.java @@ -0,0 +1,41 @@ +package com.rymcu.forest.lucene.mapper; + +import com.rymcu.forest.dto.UserDTO; +import com.rymcu.forest.lucene.model.UserLucene; +import org.apache.ibatis.annotations.Mapper; +import org.apache.ibatis.annotations.Param; + +import java.util.List; + +/** + * UserLuceneMapper + * + * @author suwen + * @date 2021/3/6 10:00 + */ +@Mapper +public interface UserLuceneMapper { + + /** + * 加载所有用户信息 + * + * @return + */ + List getAllUserLucene(); + + /** + * 加载所有用户信息 + * + * @param ids 用户id(半角逗号分隔) + * @return + */ + List getUsersByIds(@Param("ids") String[] ids); + + /** + * 加载 UserLucene + * + * @param id 用户id + * @return + */ + UserLucene getById(@Param("id") String id); +} diff --git a/src/main/java/com/rymcu/forest/lucene/model/PortfolioLucene.java b/src/main/java/com/rymcu/forest/lucene/model/PortfolioLucene.java new file mode 100644 index 0000000..cfb8ba1 --- /dev/null +++ b/src/main/java/com/rymcu/forest/lucene/model/PortfolioLucene.java @@ -0,0 +1,31 @@ +package com.rymcu.forest.lucene.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * PortfolioLucene + * + * @author suwen + * @date 2021/3/6 09:57 + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class PortfolioLucene { + + /** 作品集编号 */ + private Integer idPortfolio; + + /** 作品集名称 */ + private String portfolioTitle; + + /** 作品集介绍 */ + private String portfolioDescription; + + /** 相关度评分 */ + private String score; +} diff --git a/src/main/java/com/rymcu/forest/lucene/model/UserLucene.java b/src/main/java/com/rymcu/forest/lucene/model/UserLucene.java new file mode 100644 index 0000000..6946e33 --- /dev/null +++ b/src/main/java/com/rymcu/forest/lucene/model/UserLucene.java @@ -0,0 +1,35 @@ +package com.rymcu.forest.lucene.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import org.apache.ibatis.type.JdbcType; +import tk.mybatis.mapper.annotation.ColumnType; + +import javax.persistence.Column; + +/** + * UserLucene + * + * @author suwen + * @date 2021/3/6 09:57 + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class UserLucene { + + /** 用户编号 */ + private Integer idUser; + + /** 昵称 */ + private String nickname; + + /** 签名 */ + private String signature; + + /** 相关度评分 */ + private String score; +} diff --git a/src/main/java/com/rymcu/forest/lucene/service/UserLuceneService.java b/src/main/java/com/rymcu/forest/lucene/service/UserLuceneService.java new file mode 100644 index 0000000..4437e70 --- /dev/null +++ b/src/main/java/com/rymcu/forest/lucene/service/UserLuceneService.java @@ -0,0 +1,74 @@ +package com.rymcu.forest.lucene.service; + +import com.rymcu.forest.dto.UserDTO; +import com.rymcu.forest.lucene.model.UserLucene; + +import java.util.List; + +/** + * UserLuceneService + * + * @author suwen + * @date 2021/3/5 10:10 + */ +public interface UserLuceneService { + + /** + * 批量写入用户信息到索引 + * + * @param list + */ + void writeUser(List list); + + /** + * 写入单个用户索引 + * + * @param id + */ + void writeUser(String id); + + /** + * 写入单个用户索引 + * + * @param UserLucene + */ + void writeUser(UserLucene UserLucene); + + /** + * 更新单个用户索引 + * + * @param id + */ + void updateUser(String id); + + /** + * 删除单个用户索引 + * + * @param id + */ + void deleteUser(String id); + + /** + * 关键词搜索 + * + * @param value + * @return + * @throws Exception + */ + List searchUser(String value); + + /** + * 加载所有用户内容 + * + * @return + */ + List getAllUserLucene(); + + /** + * 加载所有用户内容 + * + * @param ids 用户id(半角逗号分隔) + * @return + */ + List getUsersByIds(String[] ids); +} diff --git a/src/main/java/com/rymcu/forest/lucene/service/impl/UserLuceneServiceImpl.java b/src/main/java/com/rymcu/forest/lucene/service/impl/UserLuceneServiceImpl.java new file mode 100644 index 0000000..cab470f --- /dev/null +++ b/src/main/java/com/rymcu/forest/lucene/service/impl/UserLuceneServiceImpl.java @@ -0,0 +1,196 @@ +package com.rymcu.forest.lucene.service.impl; + +import com.rymcu.forest.dto.UserDTO; +import com.rymcu.forest.lucene.lucene.UserBeanIndex; +import com.rymcu.forest.lucene.lucene.IKAnalyzer; +import com.rymcu.forest.lucene.mapper.UserLuceneMapper; +import com.rymcu.forest.lucene.model.UserLucene; +import com.rymcu.forest.lucene.service.UserLuceneService; +import com.rymcu.forest.lucene.util.UserIndexUtil; +import com.rymcu.forest.lucene.util.SearchUtil; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.highlight.*; +import org.springframework.stereotype.Service; + +import javax.annotation.Resource; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +/** + * UserServiceImpl + * + * @author suwen + * @date 2021/3/6 10:29 + */ +@Service +public class UserLuceneServiceImpl implements UserLuceneService { + + @Resource private UserLuceneMapper userLuceneMapper; + + /** Lucene索引文件路径 */ + private final String indexPath = "lucene/index"; + + /** + * 将文章的数据解析为一个个关键字词存储到索引文件中 + * + * @param list + */ + @Override + public void writeUser(List list) { + try { + int totalCount = list.size(); + int perThreadCount = 3000; + int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1); + ExecutorService pool = Executors.newFixedThreadPool(threadCount); + CountDownLatch countDownLatch1 = new CountDownLatch(1); + CountDownLatch countDownLatch2 = new CountDownLatch(threadCount); + + for (int i = 0; i < threadCount; i++) { + int start = i * perThreadCount; + int end = Math.min((i + 1) * perThreadCount, totalCount); + List subList = list.subList(start, end); + Runnable runnable = + new UserBeanIndex(indexPath, i, countDownLatch1, countDownLatch2, subList); + // 子线程交给线程池管理 + pool.execute(runnable); + } + countDownLatch1.countDown(); + System.out.println("开始创建索引"); + // 等待所有线程都完成 + countDownLatch2.await(); + // 线程全部完成工作 + System.out.println("所有线程都创建索引完毕"); + // 释放线程池资源 + pool.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + } + } + + @Override + public void writeUser(String id) { + writeUser(userLuceneMapper.getById(id)); + } + + @Override + public void writeUser(UserLucene UserLucene) { + UserIndexUtil.addIndex(UserLucene); + } + + @Override + public void updateUser(String id) { + UserIndexUtil.updateIndex(userLuceneMapper.getById(id)); + } + + @Override + public void deleteUser(String id) { + UserIndexUtil.deleteIndex(id); + } + + @Override + public List searchUser(String value) { + List resList = new ArrayList<>(); + ExecutorService service = Executors.newCachedThreadPool(); + // 定义分词器 + Analyzer analyzer = new IKAnalyzer(); + try { + IndexSearcher searcher = SearchUtil.getIndexSearcherByParentPath(indexPath, service); + String[] fields = {"nickname", "signature"}; + // 构造Query对象 + MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer); + + BufferedReader in = + new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); + String line = value != null ? value : in.readLine(); + Query query = parser.parse(line); + // 最终被分词后添加的前缀和后缀处理器,默认是粗体 + SimpleHTMLFormatter htmlFormatter = + new SimpleHTMLFormatter("", ""); + // 高亮搜索的词添加到高亮处理器中 + Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); + + // 获取搜索的结果,指定返回document返回的个数 + // TODO 默认搜索结果为显示第一页,1000 条,可以优化 + TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query); + ScoreDoc[] hits = results.scoreDocs; + + // 遍历,输出 + for (ScoreDoc hit : hits) { + int id = hit.doc; + float score = hit.score; + Document hitDoc = searcher.doc(hit.doc); + Map map = new HashMap<>(); + map.put("id", hitDoc.get("id")); + + // 获取到summary + String name = hitDoc.get("signature"); + // 将查询的词和搜索词匹配,匹配到添加前缀和后缀 + TokenStream tokenStream = + TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "signature", analyzer); + // 传入的第二个参数是查询的值 + TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10); + StringBuilder baikeValue = new StringBuilder(); + for (TextFragment textFragment : frag) { + if ((textFragment != null) && (textFragment.getScore() > 0)) { + // if ((frag[j] != null)) { + // 获取 summary 的值 + baikeValue.append(textFragment.toString()); + } + } + + // 获取到title + String title = hitDoc.get("nickname"); + TokenStream titleTokenStream = + TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "nickname", analyzer); + TextFragment[] titleFrag = + highlighter.getBestTextFragments(titleTokenStream, title, false, 10); + StringBuilder titleValue = new StringBuilder(); + for (int j = 0; j < titleFrag.length; j++) { + if ((frag[j] != null)) { + titleValue.append(titleFrag[j].toString()); + } + } + resList.add( + UserLucene.builder() + .idUser(Integer.valueOf(hitDoc.get("id"))) + .nickname(titleValue.toString()) + .signature(baikeValue.toString()) + .score(String.valueOf(score)) + .build()); + } + } catch (IOException | ParseException | InvalidTokenOffsetsException e) { + System.out.println(e.getMessage()); + e.printStackTrace(); + } finally { + service.shutdownNow(); + } + return resList; + } + + @Override + public List getAllUserLucene() { + return userLuceneMapper.getAllUserLucene(); + } + + @Override + public List getUsersByIds(String[] ids) { + return userLuceneMapper.getUsersByIds(ids); + } +} diff --git a/src/main/java/com/rymcu/forest/lucene/util/UserIndexUtil.java b/src/main/java/com/rymcu/forest/lucene/util/UserIndexUtil.java new file mode 100644 index 0000000..78bd24c --- /dev/null +++ b/src/main/java/com/rymcu/forest/lucene/util/UserIndexUtil.java @@ -0,0 +1,86 @@ +package com.rymcu.forest.lucene.util; + +import cn.hutool.core.io.FileUtil; +import com.rymcu.forest.lucene.model.UserLucene; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; + +import java.io.IOException; +import java.util.Arrays; + +/** + * 用户索引更新工具类 + * + * @author suwen + */ +public class UserIndexUtil { + + /** lucene索引保存目录 */ + private static final String PATH = System.getProperty("user.dir") + "/lucene/index"; + + /** 系统运行时索引保存目录 */ + private static final String INDEX_PATH = + System.getProperty("user.dir") + "/lucene/index/index777"; + + /** 删除所有运行中保存的索引 */ + public static void deleteAllIndex() { + if (FileUtil.exist(INDEX_PATH)) { + FileUtil.del(INDEX_PATH); + } + } + + public static void addIndex(UserLucene t) { + creatIndex(t); + } + + public static void updateIndex(UserLucene t) { + deleteIndex(t.getIdUser().toString()); + creatIndex(t); + } + + /** + * 增加或创建单个索引 + * + * @param t + * @throws Exception + */ + private static synchronized void creatIndex(UserLucene t) { + System.out.println("创建单个索引"); + IndexWriter writer; + try { + writer = IndexUtil.getIndexWriter(INDEX_PATH, false); + Document doc = new Document(); + doc.add(new StringField("id", t.getIdUser() + "", Field.Store.YES)); + doc.add(new TextField("nickname", t.getNickname(), Field.Store.YES)); + doc.add(new TextField("signature", t.getSignature(), Field.Store.YES)); + writer.addDocument(doc); + writer.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** 删除单个索引 */ + public static synchronized void deleteIndex(String id) { + Arrays.stream(FileUtil.ls(PATH)) + .forEach( + each -> { + if (each.isDirectory()) { + IndexWriter writer; + try { + writer = IndexUtil.getIndexWriter(each.getAbsolutePath(), false); + writer.deleteDocuments(new Term("id", id)); + writer.forceMergeDeletes(); // 强制删除 + writer.commit(); + writer.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + }); + } +} diff --git a/src/main/java/mapper/lucene/UserLuceneMapper.xml b/src/main/java/mapper/lucene/UserLuceneMapper.xml new file mode 100644 index 0000000..8f9deeb --- /dev/null +++ b/src/main/java/mapper/lucene/UserLuceneMapper.xml @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + +