🚧 用户索引

2021-03-06 23:04:49 +08:00 · 2021-03-06 23:04:49 +08:00 · 6ccd66fd2f
commit 6ccd66fd2f
parent d2be9e3afc
11 changed files with 584 additions and 33 deletions
--- a/src/main/java/com/rymcu/forest/lucene/api/LuceneSearchController.java
+++ b/src/main/java/com/rymcu/forest/lucene/api/LuceneSearchController.java
@ -8,7 +8,9 @@ import com.rymcu.forest.dto.ArticleDTO;
 import com.rymcu.forest.lucene.model.ArticleLucene;
 import com.rymcu.forest.lucene.service.LuceneService;
 import com.rymcu.forest.lucene.service.UserDicService;
+import com.rymcu.forest.lucene.service.UserLuceneService;
 import com.rymcu.forest.lucene.util.ArticleIndexUtil;
+import com.rymcu.forest.lucene.util.UserIndexUtil;
 import com.rymcu.forest.util.Utils;
 import org.springframework.web.bind.annotation.*;

@ -31,18 +33,21 @@ import java.util.concurrent.Executors;
 public class LuceneSearchController {

  @Resource private LuceneService luceneService;
+  @Resource private UserLuceneService userLuceneService;
  @Resource private UserDicService dicService;

  @PostConstruct
  public void createIndex() {
    // 删除系统运行时保存的索引，重新创建索引
    ArticleIndexUtil.deleteAllIndex();
+    UserIndexUtil.deleteAllIndex();
    ExecutorService executor = Executors.newSingleThreadExecutor();
    CompletableFuture<String> future =
        CompletableFuture.supplyAsync(
            () -> {
              System.out.println(">>>>>>>>> 开始创建索引 <<<<<<<<<<<");
-              luceneService.writeArticle(luceneService.getAllArticleLucene());
+              //              luceneService.writeArticle(luceneService.getAllArticleLucene());
+              userLuceneService.writeUser(userLuceneService.getAllUserLucene());
              System.out.println(">>>>>>>>> 索引创建完毕 <<<<<<<<<<<");
              System.out.println("加载用户配置的自定义扩展词典到主词库表");
              try {
--- a/src/main/java/com/rymcu/forest/lucene/lucene/ArticleBeanIndex.java
+++ b/src/main/java/com/rymcu/forest/lucene/lucene/ArticleBeanIndex.java
@ -7,10 +7,7 @@ import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;

-import java.io.IOException;
 import java.util.List;
 import java.util.concurrent.CountDownLatch;

@ -22,19 +19,6 @@ import java.util.concurrent.CountDownLatch;
 */
 public class ArticleBeanIndex extends BaseIndex<ArticleLucene> {

-  public ArticleBeanIndex(
-          String parentIndexPath,int subIndex) {
-    super(parentIndexPath, subIndex);
-  }
-
-  public ArticleBeanIndex(
-      IndexWriter writer,
-      CountDownLatch countDownLatch1,
-      CountDownLatch countDownLatch2,
-      List<ArticleLucene> list) {
-    super(writer, countDownLatch1, countDownLatch2, list);
-  }
-
  public ArticleBeanIndex(
      String parentIndexPath,
      int subIndex,
@ -60,14 +44,4 @@ public class ArticleBeanIndex extends BaseIndex<ArticleLucene> {
      writer.updateDocument(new Term("id", t.getIdArticle() + ""), doc);
    }
  }
-
-  public void indexDoc(ArticleLucene t) throws Exception {
-    indexDoc(getWriter(),t);
-  }
-
-  @Override
-  public void deleteDoc( String id) throws IOException {
-    Query query = new TermQuery(new Term("id", id));
-    getWriter().deleteDocuments(query);
-  }
 }
--- a/src/main/java/com/rymcu/forest/lucene/lucene/BaseIndex.java
+++ b/src/main/java/com/rymcu/forest/lucene/lucene/BaseIndex.java
@ -119,12 +119,6 @@ public abstract class BaseIndex<T> implements Runnable {
    }
  }

-  public abstract void deleteDoc(String id) throws IOException;
-
-  public IndexWriter getWriter() {
-    return writer;
-  }
-
  @Override
  public void run() {
    try {
--- a/src/main/java/com/rymcu/forest/lucene/lucene/UserBeanIndex.java
+++ b/src/main/java/com/rymcu/forest/lucene/lucene/UserBeanIndex.java
@ -0,0 +1,73 @@
+package com.rymcu.forest.lucene.lucene;
+
+import com.rymcu.forest.lucene.model.UserLucene;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+
+/**
+ * UserBeanIndex
+ *
+ * @author suwen
+ * @date 2021/2/2 14:10
+ */
+public class UserBeanIndex extends BaseIndex<UserLucene> {
+
+  public UserBeanIndex(
+          String parentIndexPath,int subIndex) {
+    super(parentIndexPath, subIndex);
+  }
+
+  public UserBeanIndex(
+      IndexWriter writer,
+      CountDownLatch countDownLatch1,
+      CountDownLatch countDownLatch2,
+      List<UserLucene> list) {
+    super(writer, countDownLatch1, countDownLatch2, list);
+  }
+
+  public UserBeanIndex(
+      String parentIndexPath,
+      int subIndex,
+      CountDownLatch countDownLatch1,
+      CountDownLatch countDownLatch2,
+      List<UserLucene> list) {
+    super(parentIndexPath, subIndex, countDownLatch1, countDownLatch2, list);
+  }
+
+  @Override
+  public void indexDoc(IndexWriter writer, UserLucene user) throws Exception {
+    Document doc = new Document();
+    Field id = new Field("id", user.getIdUser() + "", TextField.TYPE_STORED);
+    Field title = new Field("nickname", user.getNickname(), TextField.TYPE_STORED);
+    Field summary = new Field("signature", user.getSignature(), TextField.TYPE_STORED);
+    // 添加到Document中
+    doc.add(id);
+    doc.add(title);
+    doc.add(summary);
+    if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
+      writer.addDocument(doc);
+    } else {
+      writer.updateDocument(new Term("id", user.getIdUser() + ""), doc);
+    }
+  }
+
+  public void indexDoc(UserLucene t) throws Exception {
+    indexDoc(getWriter(),t);
+  }
+
+  @Override
+  public void deleteDoc( String id) throws IOException {
+    Query query = new TermQuery(new Term("id", id));
+    getWriter().deleteDocuments(query);
+  }
+}
--- a/src/main/java/com/rymcu/forest/lucene/mapper/UserLuceneMapper.java
+++ b/src/main/java/com/rymcu/forest/lucene/mapper/UserLuceneMapper.java
@ -0,0 +1,41 @@
+package com.rymcu.forest.lucene.mapper;
+
+import com.rymcu.forest.dto.UserDTO;
+import com.rymcu.forest.lucene.model.UserLucene;
+import org.apache.ibatis.annotations.Mapper;
+import org.apache.ibatis.annotations.Param;
+
+import java.util.List;
+
+/**
+ * UserLuceneMapper
+ *
+ * @author suwen
+ * @date 2021/3/6 10:00
+ */
+@Mapper
+public interface UserLuceneMapper {
+
+  /**
+   * 加载所有用户信息
+   *
+   * @return
+   */
+  List<UserLucene> getAllUserLucene();
+
+  /**
+   * 加载所有用户信息
+   *
+   * @param ids 用户id(半角逗号分隔)
+   * @return
+   */
+  List<UserDTO> getUsersByIds(@Param("ids") String[] ids);
+
+  /**
+   * 加载 UserLucene
+   *
+   * @param id 用户id
+   * @return
+   */
+  UserLucene getById(@Param("id") String id);
+}
--- a/src/main/java/com/rymcu/forest/lucene/model/PortfolioLucene.java
+++ b/src/main/java/com/rymcu/forest/lucene/model/PortfolioLucene.java
@ -0,0 +1,31 @@
+package com.rymcu.forest.lucene.model;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * PortfolioLucene
+ *
+ * @author suwen
+ * @date 2021/3/6 09:57
+ */
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+public class PortfolioLucene {
+
+  /** 作品集编号 */
+  private Integer idPortfolio;
+
+  /** 作品集名称 */
+  private String portfolioTitle;
+
+  /** 作品集介绍 */
+  private String portfolioDescription;
+
+  /** 相关度评分 */
+  private String score;
+}
--- a/src/main/java/com/rymcu/forest/lucene/model/UserLucene.java
+++ b/src/main/java/com/rymcu/forest/lucene/model/UserLucene.java
@ -0,0 +1,35 @@
+package com.rymcu.forest.lucene.model;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import org.apache.ibatis.type.JdbcType;
+import tk.mybatis.mapper.annotation.ColumnType;
+
+import javax.persistence.Column;
+
+/**
+ * UserLucene
+ *
+ * @author suwen
+ * @date 2021/3/6 09:57
+ */
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+public class UserLucene {
+
+  /** 用户编号 */
+  private Integer idUser;
+
+  /** 昵称 */
+  private String nickname;
+
+  /** 签名 */
+  private String signature;
+
+  /** 相关度评分 */
+  private String score;
+}
--- a/src/main/java/com/rymcu/forest/lucene/service/UserLuceneService.java
+++ b/src/main/java/com/rymcu/forest/lucene/service/UserLuceneService.java
@ -0,0 +1,74 @@
+package com.rymcu.forest.lucene.service;
+
+import com.rymcu.forest.dto.UserDTO;
+import com.rymcu.forest.lucene.model.UserLucene;
+
+import java.util.List;
+
+/**
+ * UserLuceneService
+ *
+ * @author suwen
+ * @date 2021/3/5 10:10
+ */
+public interface UserLuceneService {
+
+  /**
+   * 批量写入用户信息到索引
+   *
+   * @param list
+   */
+  void writeUser(List<UserLucene> list);
+
+  /**
+   * 写入单个用户索引
+   *
+   * @param id
+   */
+  void writeUser(String id);
+
+  /**
+   * 写入单个用户索引
+   *
+   * @param UserLucene
+   */
+  void writeUser(UserLucene UserLucene);
+
+  /**
+   * 更新单个用户索引
+   *
+   * @param id
+   */
+  void updateUser(String id);
+
+  /**
+   * 删除单个用户索引
+   *
+   * @param id
+   */
+  void deleteUser(String id);
+
+  /**
+   * 关键词搜索
+   *
+   * @param value
+   * @return
+   * @throws Exception
+   */
+  List<UserLucene> searchUser(String value);
+
+  /**
+   * 加载所有用户内容
+   *
+   * @return
+   */
+  List<UserLucene> getAllUserLucene();
+
+  /**
+   * 加载所有用户内容
+   *
+   * @param ids 用户id(半角逗号分隔)
+   * @return
+   */
+  List<UserDTO> getUsersByIds(String[] ids);
+}
--- a/src/main/java/com/rymcu/forest/lucene/service/impl/UserLuceneServiceImpl.java
+++ b/src/main/java/com/rymcu/forest/lucene/service/impl/UserLuceneServiceImpl.java
@ -0,0 +1,196 @@
+package com.rymcu.forest.lucene.service.impl;
+
+import com.rymcu.forest.dto.UserDTO;
+import com.rymcu.forest.lucene.lucene.UserBeanIndex;
+import com.rymcu.forest.lucene.lucene.IKAnalyzer;
+import com.rymcu.forest.lucene.mapper.UserLuceneMapper;
+import com.rymcu.forest.lucene.model.UserLucene;
+import com.rymcu.forest.lucene.service.UserLuceneService;
+import com.rymcu.forest.lucene.util.UserIndexUtil;
+import com.rymcu.forest.lucene.util.SearchUtil;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.highlight.*;
+import org.springframework.stereotype.Service;
+
+import javax.annotation.Resource;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+/**
+ * UserServiceImpl
+ *
+ * @author suwen
+ * @date 2021/3/6 10:29
+ */
+@Service
+public class UserLuceneServiceImpl implements UserLuceneService {
+
+  @Resource private UserLuceneMapper userLuceneMapper;
+
+  /** Lucene索引文件路径 */
+  private final String indexPath = "lucene/index";
+
+  /**
+   * 将文章的数据解析为一个个关键字词存储到索引文件中
+   *
+   * @param list
+   */
+  @Override
+  public void writeUser(List<UserLucene> list) {
+    try {
+      int totalCount = list.size();
+      int perThreadCount = 3000;
+      int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1);
+      ExecutorService pool = Executors.newFixedThreadPool(threadCount);
+      CountDownLatch countDownLatch1 = new CountDownLatch(1);
+      CountDownLatch countDownLatch2 = new CountDownLatch(threadCount);
+
+      for (int i = 0; i < threadCount; i++) {
+        int start = i * perThreadCount;
+        int end = Math.min((i + 1) * perThreadCount, totalCount);
+        List<UserLucene> subList = list.subList(start, end);
+        Runnable runnable =
+            new UserBeanIndex(indexPath, i, countDownLatch1, countDownLatch2, subList);
+        // 子线程交给线程池管理
+        pool.execute(runnable);
+      }
+      countDownLatch1.countDown();
+      System.out.println("开始创建索引");
+      // 等待所有线程都完成
+      countDownLatch2.await();
+      // 线程全部完成工作
+      System.out.println("所有线程都创建索引完毕");
+      // 释放线程池资源
+      pool.shutdown();
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+  }
+
+  @Override
+  public void writeUser(String id) {
+    writeUser(userLuceneMapper.getById(id));
+  }
+
+  @Override
+  public void writeUser(UserLucene UserLucene) {
+    UserIndexUtil.addIndex(UserLucene);
+  }
+
+  @Override
+  public void updateUser(String id) {
+    UserIndexUtil.updateIndex(userLuceneMapper.getById(id));
+  }
+
+  @Override
+  public void deleteUser(String id) {
+    UserIndexUtil.deleteIndex(id);
+  }
+
+  @Override
+  public List<UserLucene> searchUser(String value) {
+    List<UserLucene> resList = new ArrayList<>();
+    ExecutorService service = Executors.newCachedThreadPool();
+    // 定义分词器
+    Analyzer analyzer = new IKAnalyzer();
+    try {
+      IndexSearcher searcher = SearchUtil.getIndexSearcherByParentPath(indexPath, service);
+      String[] fields = {"nickname", "signature"};
+      // 构造Query对象
+      MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
+
+      BufferedReader in =
+          new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
+      String line = value != null ? value : in.readLine();
+      Query query = parser.parse(line);
+      // 最终被分词后添加的前缀和后缀处理器，默认是粗体<B></B>
+      SimpleHTMLFormatter htmlFormatter =
+          new SimpleHTMLFormatter("<font color=" + "\"" + "red" + "\"" + ">", "</font>");
+      // 高亮搜索的词添加到高亮处理器中
+      Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
+
+      // 获取搜索的结果，指定返回document返回的个数
+      // TODO 默认搜索结果为显示第一页，1000 条，可以优化
+      TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query);
+      ScoreDoc[] hits = results.scoreDocs;
+
+      // 遍历，输出
+      for (ScoreDoc hit : hits) {
+        int id = hit.doc;
+        float score = hit.score;
+        Document hitDoc = searcher.doc(hit.doc);
+        Map<String, String> map = new HashMap<>();
+        map.put("id", hitDoc.get("id"));
+
+        // 获取到summary
+        String name = hitDoc.get("signature");
+        // 将查询的词和搜索词匹配，匹配到添加前缀和后缀
+        TokenStream tokenStream =
+            TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "signature", analyzer);
+        // 传入的第二个参数是查询的值
+        TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10);
+        StringBuilder baikeValue = new StringBuilder();
+        for (TextFragment textFragment : frag) {
+          if ((textFragment != null) && (textFragment.getScore() > 0)) {
+            //  if ((frag[j] != null)) {
+            // 获取 summary 的值
+            baikeValue.append(textFragment.toString());
+          }
+        }
+
+        // 获取到title
+        String title = hitDoc.get("nickname");
+        TokenStream titleTokenStream =
+            TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "nickname", analyzer);
+        TextFragment[] titleFrag =
+            highlighter.getBestTextFragments(titleTokenStream, title, false, 10);
+        StringBuilder titleValue = new StringBuilder();
+        for (int j = 0; j < titleFrag.length; j++) {
+          if ((frag[j] != null)) {
+            titleValue.append(titleFrag[j].toString());
+          }
+        }
+        resList.add(
+            UserLucene.builder()
+                .idUser(Integer.valueOf(hitDoc.get("id")))
+                .nickname(titleValue.toString())
+                .signature(baikeValue.toString())
+                .score(String.valueOf(score))
+                .build());
+      }
+    } catch (IOException | ParseException | InvalidTokenOffsetsException e) {
+      System.out.println(e.getMessage());
+      e.printStackTrace();
+    } finally {
+      service.shutdownNow();
+    }
+    return resList;
+  }
+
+  @Override
+  public List<UserLucene> getAllUserLucene() {
+    return userLuceneMapper.getAllUserLucene();
+  }
+
+  @Override
+  public List<UserDTO> getUsersByIds(String[] ids) {
+    return userLuceneMapper.getUsersByIds(ids);
+  }
+}
--- a/src/main/java/com/rymcu/forest/lucene/util/UserIndexUtil.java
+++ b/src/main/java/com/rymcu/forest/lucene/util/UserIndexUtil.java
@ -0,0 +1,86 @@
+package com.rymcu.forest.lucene.util;
+
+import cn.hutool.core.io.FileUtil;
+import com.rymcu.forest.lucene.model.UserLucene;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * 用户索引更新工具类
+ *
+ * @author suwen
+ */
+public class UserIndexUtil {
+
+  /** lucene索引保存目录 */
+  private static final String PATH = System.getProperty("user.dir") + "/lucene/index";
+
+  /** 系统运行时索引保存目录 */
+  private static final String INDEX_PATH =
+      System.getProperty("user.dir") + "/lucene/index/index777";
+
+  /** 删除所有运行中保存的索引 */
+  public static void deleteAllIndex() {
+    if (FileUtil.exist(INDEX_PATH)) {
+      FileUtil.del(INDEX_PATH);
+    }
+  }
+
+  public static void addIndex(UserLucene t) {
+    creatIndex(t);
+  }
+
+  public static void updateIndex(UserLucene t) {
+    deleteIndex(t.getIdUser().toString());
+    creatIndex(t);
+  }
+
+  /**
+   * 增加或创建单个索引
+   *
+   * @param t
+   * @throws Exception
+   */
+  private static synchronized void creatIndex(UserLucene t) {
+    System.out.println("创建单个索引");
+    IndexWriter writer;
+    try {
+      writer = IndexUtil.getIndexWriter(INDEX_PATH, false);
+      Document doc = new Document();
+      doc.add(new StringField("id", t.getIdUser() + "", Field.Store.YES));
+      doc.add(new TextField("nickname", t.getNickname(), Field.Store.YES));
+      doc.add(new TextField("signature", t.getSignature(), Field.Store.YES));
+      writer.addDocument(doc);
+      writer.close();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+
+  /** 删除单个索引 */
+  public static synchronized void deleteIndex(String id) {
+    Arrays.stream(FileUtil.ls(PATH))
+        .forEach(
+            each -> {
+              if (each.isDirectory()) {
+                IndexWriter writer;
+                try {
+                  writer = IndexUtil.getIndexWriter(each.getAbsolutePath(), false);
+                  writer.deleteDocuments(new Term("id", id));
+                  writer.forceMergeDeletes(); // 强制删除
+                  writer.commit();
+                  writer.close();
+                } catch (IOException e) {
+                  e.printStackTrace();
+                }
+              }
+            });
+  }
+}
--- a/src/main/java/mapper/lucene/UserLuceneMapper.xml
+++ b/src/main/java/mapper/lucene/UserLuceneMapper.xml
@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
+<mapper namespace="com.rymcu.forest.lucene.mapper.UserLuceneMapper">
+    <resultMap id="BaseResultMap" type="com.rymcu.forest.lucene.model.UserLucene">
+        <id column="id" property="idUser" jdbcType="INTEGER"/>
+        <result column="nickname" property="nickname" jdbcType="VARCHAR"/>
+        <result column="signature" property="signature" jdbcType="VARCHAR"/>
+    </resultMap>
+    <resultMap id="DTOResultMapper" type="com.rymcu.forest.dto.UserDTO">
+        <result column="id" property="idUser"/>
+        <result column="account" property="account"/>
+        <result column="nickname" property="nickname"/>
+        <result column="avatar_type" property="avatarType"/>
+        <result column="avatar_url" property="avatarUrl"/>
+        <result column="signature" property="signature"/>
+    </resultMap>
+    <select id="getAllUserLucene" resultMap="BaseResultMap">
+        SELECT id, nickname, signature
+        FROM forest_user
+    </select>
+
+    <select id="getUsersByIds" resultMap="DTOResultMapper">
+        select id, nickname, avatar_type, avatar_url, account, signature from forest_user where nickname = #{nickname}
+        and id in
+        <foreach collection="ids" item="id" index="index"
+                 open="(" close=")" separator=",">
+            #{id}
+        </foreach>
+        order by
+        field(id
+        <foreach collection="ids" item="id" index="index"
+                 open="," close=")" separator=",">
+            #{id}
+        </foreach>
+    </select>
+
+    <select id="getById" resultMap="BaseResultMap">
+        SELECT id, nickname, signature
+        FROM `forest_user`
+        where id = #{id};
+    </select>
+</mapper>