🚧 用户索引

This commit is contained in:
Suwen 2021-03-06 23:04:49 +08:00
parent d2be9e3afc
commit 6ccd66fd2f
11 changed files with 584 additions and 33 deletions

View File

@ -8,7 +8,9 @@ import com.rymcu.forest.dto.ArticleDTO;
import com.rymcu.forest.lucene.model.ArticleLucene;
import com.rymcu.forest.lucene.service.LuceneService;
import com.rymcu.forest.lucene.service.UserDicService;
import com.rymcu.forest.lucene.service.UserLuceneService;
import com.rymcu.forest.lucene.util.ArticleIndexUtil;
import com.rymcu.forest.lucene.util.UserIndexUtil;
import com.rymcu.forest.util.Utils;
import org.springframework.web.bind.annotation.*;
@ -31,18 +33,21 @@ import java.util.concurrent.Executors;
public class LuceneSearchController {
@Resource private LuceneService luceneService;
@Resource private UserLuceneService userLuceneService;
@Resource private UserDicService dicService;
@PostConstruct
public void createIndex() {
// 删除系统运行时保存的索引重新创建索引
ArticleIndexUtil.deleteAllIndex();
UserIndexUtil.deleteAllIndex();
ExecutorService executor = Executors.newSingleThreadExecutor();
CompletableFuture<String> future =
CompletableFuture.supplyAsync(
() -> {
System.out.println(">>>>>>>>> 开始创建索引 <<<<<<<<<<<");
luceneService.writeArticle(luceneService.getAllArticleLucene());
// luceneService.writeArticle(luceneService.getAllArticleLucene());
userLuceneService.writeUser(userLuceneService.getAllUserLucene());
System.out.println(">>>>>>>>> 索引创建完毕 <<<<<<<<<<<");
System.out.println("加载用户配置的自定义扩展词典到主词库表");
try {

View File

@ -7,10 +7,7 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.CountDownLatch;
@ -22,19 +19,6 @@ import java.util.concurrent.CountDownLatch;
*/
public class ArticleBeanIndex extends BaseIndex<ArticleLucene> {
public ArticleBeanIndex(
String parentIndexPath,int subIndex) {
super(parentIndexPath, subIndex);
}
public ArticleBeanIndex(
IndexWriter writer,
CountDownLatch countDownLatch1,
CountDownLatch countDownLatch2,
List<ArticleLucene> list) {
super(writer, countDownLatch1, countDownLatch2, list);
}
public ArticleBeanIndex(
String parentIndexPath,
int subIndex,
@ -60,14 +44,4 @@ public class ArticleBeanIndex extends BaseIndex<ArticleLucene> {
writer.updateDocument(new Term("id", t.getIdArticle() + ""), doc);
}
}
public void indexDoc(ArticleLucene t) throws Exception {
indexDoc(getWriter(),t);
}
@Override
public void deleteDoc( String id) throws IOException {
Query query = new TermQuery(new Term("id", id));
getWriter().deleteDocuments(query);
}
}

View File

@ -119,12 +119,6 @@ public abstract class BaseIndex<T> implements Runnable {
}
}
public abstract void deleteDoc(String id) throws IOException;
public IndexWriter getWriter() {
return writer;
}
@Override
public void run() {
try {

View File

@ -0,0 +1,73 @@
package com.rymcu.forest.lucene.lucene;
import com.rymcu.forest.lucene.model.UserLucene;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.CountDownLatch;
/**
* UserBeanIndex
*
* @author suwen
* @date 2021/2/2 14:10
*/
public class UserBeanIndex extends BaseIndex<UserLucene> {
public UserBeanIndex(
String parentIndexPath,int subIndex) {
super(parentIndexPath, subIndex);
}
public UserBeanIndex(
IndexWriter writer,
CountDownLatch countDownLatch1,
CountDownLatch countDownLatch2,
List<UserLucene> list) {
super(writer, countDownLatch1, countDownLatch2, list);
}
public UserBeanIndex(
String parentIndexPath,
int subIndex,
CountDownLatch countDownLatch1,
CountDownLatch countDownLatch2,
List<UserLucene> list) {
super(parentIndexPath, subIndex, countDownLatch1, countDownLatch2, list);
}
@Override
public void indexDoc(IndexWriter writer, UserLucene user) throws Exception {
Document doc = new Document();
Field id = new Field("id", user.getIdUser() + "", TextField.TYPE_STORED);
Field title = new Field("nickname", user.getNickname(), TextField.TYPE_STORED);
Field summary = new Field("signature", user.getSignature(), TextField.TYPE_STORED);
// 添加到Document中
doc.add(id);
doc.add(title);
doc.add(summary);
if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
writer.addDocument(doc);
} else {
writer.updateDocument(new Term("id", user.getIdUser() + ""), doc);
}
}
public void indexDoc(UserLucene t) throws Exception {
indexDoc(getWriter(),t);
}
@Override
public void deleteDoc( String id) throws IOException {
Query query = new TermQuery(new Term("id", id));
getWriter().deleteDocuments(query);
}
}

View File

@ -0,0 +1,41 @@
package com.rymcu.forest.lucene.mapper;
import com.rymcu.forest.dto.UserDTO;
import com.rymcu.forest.lucene.model.UserLucene;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.List;
/**
* UserLuceneMapper
*
* @author suwen
* @date 2021/3/6 10:00
*/
@Mapper
public interface UserLuceneMapper {
/**
* 加载所有用户信息
*
* @return
*/
List<UserLucene> getAllUserLucene();
/**
* 加载所有用户信息
*
* @param ids 用户id(半角逗号分隔)
* @return
*/
List<UserDTO> getUsersByIds(@Param("ids") String[] ids);
/**
* 加载 UserLucene
*
* @param id 用户id
* @return
*/
UserLucene getById(@Param("id") String id);
}

View File

@ -0,0 +1,31 @@
package com.rymcu.forest.lucene.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* PortfolioLucene
*
* @author suwen
* @date 2021/3/6 09:57
*/
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class PortfolioLucene {
/** 作品集编号 */
private Integer idPortfolio;
/** 作品集名称 */
private String portfolioTitle;
/** 作品集介绍 */
private String portfolioDescription;
/** 相关度评分 */
private String score;
}

View File

@ -0,0 +1,35 @@
package com.rymcu.forest.lucene.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.ibatis.type.JdbcType;
import tk.mybatis.mapper.annotation.ColumnType;
import javax.persistence.Column;
/**
* UserLucene
*
* @author suwen
* @date 2021/3/6 09:57
*/
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class UserLucene {
/** 用户编号 */
private Integer idUser;
/** 昵称 */
private String nickname;
/** 签名 */
private String signature;
/** 相关度评分 */
private String score;
}

View File

@ -0,0 +1,74 @@
package com.rymcu.forest.lucene.service;
import com.rymcu.forest.dto.UserDTO;
import com.rymcu.forest.lucene.model.UserLucene;
import java.util.List;
/**
* UserLuceneService
*
* @author suwen
* @date 2021/3/5 10:10
*/
public interface UserLuceneService {
/**
* 批量写入用户信息到索引
*
* @param list
*/
void writeUser(List<UserLucene> list);
/**
* 写入单个用户索引
*
* @param id
*/
void writeUser(String id);
/**
* 写入单个用户索引
*
* @param UserLucene
*/
void writeUser(UserLucene UserLucene);
/**
* 更新单个用户索引
*
* @param id
*/
void updateUser(String id);
/**
* 删除单个用户索引
*
* @param id
*/
void deleteUser(String id);
/**
* 关键词搜索
*
* @param value
* @return
* @throws Exception
*/
List<UserLucene> searchUser(String value);
/**
* 加载所有用户内容
*
* @return
*/
List<UserLucene> getAllUserLucene();
/**
* 加载所有用户内容
*
* @param ids 用户id(半角逗号分隔)
* @return
*/
List<UserDTO> getUsersByIds(String[] ids);
}

View File

@ -0,0 +1,196 @@
package com.rymcu.forest.lucene.service.impl;
import com.rymcu.forest.dto.UserDTO;
import com.rymcu.forest.lucene.lucene.UserBeanIndex;
import com.rymcu.forest.lucene.lucene.IKAnalyzer;
import com.rymcu.forest.lucene.mapper.UserLuceneMapper;
import com.rymcu.forest.lucene.model.UserLucene;
import com.rymcu.forest.lucene.service.UserLuceneService;
import com.rymcu.forest.lucene.util.UserIndexUtil;
import com.rymcu.forest.lucene.util.SearchUtil;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.*;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* UserServiceImpl
*
* @author suwen
* @date 2021/3/6 10:29
*/
@Service
public class UserLuceneServiceImpl implements UserLuceneService {
@Resource private UserLuceneMapper userLuceneMapper;
/** Lucene索引文件路径 */
private final String indexPath = "lucene/index";
/**
* 将文章的数据解析为一个个关键字词存储到索引文件中
*
* @param list
*/
@Override
public void writeUser(List<UserLucene> list) {
try {
int totalCount = list.size();
int perThreadCount = 3000;
int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1);
ExecutorService pool = Executors.newFixedThreadPool(threadCount);
CountDownLatch countDownLatch1 = new CountDownLatch(1);
CountDownLatch countDownLatch2 = new CountDownLatch(threadCount);
for (int i = 0; i < threadCount; i++) {
int start = i * perThreadCount;
int end = Math.min((i + 1) * perThreadCount, totalCount);
List<UserLucene> subList = list.subList(start, end);
Runnable runnable =
new UserBeanIndex(indexPath, i, countDownLatch1, countDownLatch2, subList);
// 子线程交给线程池管理
pool.execute(runnable);
}
countDownLatch1.countDown();
System.out.println("开始创建索引");
// 等待所有线程都完成
countDownLatch2.await();
// 线程全部完成工作
System.out.println("所有线程都创建索引完毕");
// 释放线程池资源
pool.shutdown();
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void writeUser(String id) {
writeUser(userLuceneMapper.getById(id));
}
@Override
public void writeUser(UserLucene UserLucene) {
UserIndexUtil.addIndex(UserLucene);
}
@Override
public void updateUser(String id) {
UserIndexUtil.updateIndex(userLuceneMapper.getById(id));
}
@Override
public void deleteUser(String id) {
UserIndexUtil.deleteIndex(id);
}
@Override
public List<UserLucene> searchUser(String value) {
List<UserLucene> resList = new ArrayList<>();
ExecutorService service = Executors.newCachedThreadPool();
// 定义分词器
Analyzer analyzer = new IKAnalyzer();
try {
IndexSearcher searcher = SearchUtil.getIndexSearcherByParentPath(indexPath, service);
String[] fields = {"nickname", "signature"};
// 构造Query对象
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
BufferedReader in =
new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
String line = value != null ? value : in.readLine();
Query query = parser.parse(line);
// 最终被分词后添加的前缀和后缀处理器默认是粗体<B></B>
SimpleHTMLFormatter htmlFormatter =
new SimpleHTMLFormatter("<font color=" + "\"" + "red" + "\"" + ">", "</font>");
// 高亮搜索的词添加到高亮处理器中
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
// 获取搜索的结果指定返回document返回的个数
// TODO 默认搜索结果为显示第一页1000 可以优化
TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query);
ScoreDoc[] hits = results.scoreDocs;
// 遍历输出
for (ScoreDoc hit : hits) {
int id = hit.doc;
float score = hit.score;
Document hitDoc = searcher.doc(hit.doc);
Map<String, String> map = new HashMap<>();
map.put("id", hitDoc.get("id"));
// 获取到summary
String name = hitDoc.get("signature");
// 将查询的词和搜索词匹配匹配到添加前缀和后缀
TokenStream tokenStream =
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "signature", analyzer);
// 传入的第二个参数是查询的值
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10);
StringBuilder baikeValue = new StringBuilder();
for (TextFragment textFragment : frag) {
if ((textFragment != null) && (textFragment.getScore() > 0)) {
// if ((frag[j] != null)) {
// 获取 summary 的值
baikeValue.append(textFragment.toString());
}
}
// 获取到title
String title = hitDoc.get("nickname");
TokenStream titleTokenStream =
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "nickname", analyzer);
TextFragment[] titleFrag =
highlighter.getBestTextFragments(titleTokenStream, title, false, 10);
StringBuilder titleValue = new StringBuilder();
for (int j = 0; j < titleFrag.length; j++) {
if ((frag[j] != null)) {
titleValue.append(titleFrag[j].toString());
}
}
resList.add(
UserLucene.builder()
.idUser(Integer.valueOf(hitDoc.get("id")))
.nickname(titleValue.toString())
.signature(baikeValue.toString())
.score(String.valueOf(score))
.build());
}
} catch (IOException | ParseException | InvalidTokenOffsetsException e) {
System.out.println(e.getMessage());
e.printStackTrace();
} finally {
service.shutdownNow();
}
return resList;
}
@Override
public List<UserLucene> getAllUserLucene() {
return userLuceneMapper.getAllUserLucene();
}
@Override
public List<UserDTO> getUsersByIds(String[] ids) {
return userLuceneMapper.getUsersByIds(ids);
}
}

View File

@ -0,0 +1,86 @@
package com.rymcu.forest.lucene.util;
import cn.hutool.core.io.FileUtil;
import com.rymcu.forest.lucene.model.UserLucene;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import java.io.IOException;
import java.util.Arrays;
/**
* 用户索引更新工具类
*
* @author suwen
*/
public class UserIndexUtil {
/** lucene索引保存目录 */
private static final String PATH = System.getProperty("user.dir") + "/lucene/index";
/** 系统运行时索引保存目录 */
private static final String INDEX_PATH =
System.getProperty("user.dir") + "/lucene/index/index777";
/** 删除所有运行中保存的索引 */
public static void deleteAllIndex() {
if (FileUtil.exist(INDEX_PATH)) {
FileUtil.del(INDEX_PATH);
}
}
public static void addIndex(UserLucene t) {
creatIndex(t);
}
public static void updateIndex(UserLucene t) {
deleteIndex(t.getIdUser().toString());
creatIndex(t);
}
/**
* 增加或创建单个索引
*
* @param t
* @throws Exception
*/
private static synchronized void creatIndex(UserLucene t) {
System.out.println("创建单个索引");
IndexWriter writer;
try {
writer = IndexUtil.getIndexWriter(INDEX_PATH, false);
Document doc = new Document();
doc.add(new StringField("id", t.getIdUser() + "", Field.Store.YES));
doc.add(new TextField("nickname", t.getNickname(), Field.Store.YES));
doc.add(new TextField("signature", t.getSignature(), Field.Store.YES));
writer.addDocument(doc);
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/** 删除单个索引 */
public static synchronized void deleteIndex(String id) {
Arrays.stream(FileUtil.ls(PATH))
.forEach(
each -> {
if (each.isDirectory()) {
IndexWriter writer;
try {
writer = IndexUtil.getIndexWriter(each.getAbsolutePath(), false);
writer.deleteDocuments(new Term("id", id));
writer.forceMergeDeletes(); // 强制删除
writer.commit();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
});
}
}

View File

@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
<mapper namespace="com.rymcu.forest.lucene.mapper.UserLuceneMapper">
<resultMap id="BaseResultMap" type="com.rymcu.forest.lucene.model.UserLucene">
<id column="id" property="idUser" jdbcType="INTEGER"/>
<result column="nickname" property="nickname" jdbcType="VARCHAR"/>
<result column="signature" property="signature" jdbcType="VARCHAR"/>
</resultMap>
<resultMap id="DTOResultMapper" type="com.rymcu.forest.dto.UserDTO">
<result column="id" property="idUser"/>
<result column="account" property="account"/>
<result column="nickname" property="nickname"/>
<result column="avatar_type" property="avatarType"/>
<result column="avatar_url" property="avatarUrl"/>
<result column="signature" property="signature"/>
</resultMap>
<select id="getAllUserLucene" resultMap="BaseResultMap">
SELECT id, nickname, signature
FROM forest_user
</select>
<select id="getUsersByIds" resultMap="DTOResultMapper">
select id, nickname, avatar_type, avatar_url, account, signature from forest_user where nickname = #{nickname}
and id in
<foreach collection="ids" item="id" index="index"
open="(" close=")" separator=",">
#{id}
</foreach>
order by
field(id
<foreach collection="ids" item="id" index="index"
open="," close=")" separator=",">
#{id}
</foreach>
</select>
<select id="getById" resultMap="BaseResultMap">
SELECT id, nickname, signature
FROM `forest_user`
where id = #{id};
</select>
</mapper>