try to: lucene文章搜索
- lucene文章搜索分页跳转 - lucene索引创建
This commit is contained in:
parent
90403f9b26
commit
6e36e9df65
3
.gitignore
vendored
3
.gitignore
vendored
@ -30,3 +30,6 @@ build/
|
|||||||
|
|
||||||
### VS Code ###
|
### VS Code ###
|
||||||
.vscode/
|
.vscode/
|
||||||
|
|
||||||
|
### lucene ###
|
||||||
|
index
|
||||||
|
88
src/main/java/com/rymcu/forest/lucene/api/LuceneSearchController.java
Executable file
88
src/main/java/com/rymcu/forest/lucene/api/LuceneSearchController.java
Executable file
@ -0,0 +1,88 @@
|
|||||||
|
package com.rymcu.forest.lucene.api;
|
||||||
|
|
||||||
|
import com.github.pagehelper.Page;
|
||||||
|
import com.github.pagehelper.PageInfo;
|
||||||
|
import com.rymcu.forest.core.result.GlobalResult;
|
||||||
|
import com.rymcu.forest.core.result.GlobalResultGenerator;
|
||||||
|
import com.rymcu.forest.dto.ArticleDTO;
|
||||||
|
import com.rymcu.forest.lucene.model.ArticleLucene;
|
||||||
|
import com.rymcu.forest.lucene.service.LuceneService;
|
||||||
|
import com.rymcu.forest.util.Utils;
|
||||||
|
import lombok.extern.log4j.Log4j2;
|
||||||
|
import org.springframework.web.bind.annotation.*;
|
||||||
|
|
||||||
|
import javax.annotation.Resource;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* LuceneSearchController
|
||||||
|
*
|
||||||
|
* @author suwen
|
||||||
|
* @date 2021/2/3 10:41
|
||||||
|
*/
|
||||||
|
@Log4j2
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/api/v1/lucene")
|
||||||
|
public class LuceneSearchController {
|
||||||
|
|
||||||
|
@Resource private LuceneService luceneService;
|
||||||
|
|
||||||
|
@GetMapping("/getArticles")
|
||||||
|
public GlobalResult createIndex() {
|
||||||
|
return GlobalResultGenerator.genSuccessResult(luceneService.getAllArticleLucene());
|
||||||
|
}
|
||||||
|
|
||||||
|
@GetMapping("/getArticlesByIds")
|
||||||
|
public GlobalResult getArticlesByIds() {
|
||||||
|
return GlobalResultGenerator.genSuccessResult(
|
||||||
|
luceneService.getArticlesByIds(new String[] {"1", "2", "3"}));
|
||||||
|
}
|
||||||
|
|
||||||
|
@GetMapping("/createIndex")
|
||||||
|
public GlobalResult createIndex(
|
||||||
|
@RequestParam(required = false, defaultValue = "0") Integer limit,
|
||||||
|
@RequestParam(required = false, defaultValue = "1000") Integer offset) {
|
||||||
|
// 拉取数据
|
||||||
|
luceneService.writeArticle(luceneService.getAllArticleLucene());
|
||||||
|
return GlobalResultGenerator.genSuccessResult("创建索引成功");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 搜索,实现高亮
|
||||||
|
*
|
||||||
|
* @param q
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@GetMapping("/searchArticle/{q}")
|
||||||
|
public GlobalResult searchArticle(
|
||||||
|
@PathVariable String q,
|
||||||
|
@RequestParam(defaultValue = "1") Integer pageNum,
|
||||||
|
@RequestParam(defaultValue = "10") Integer pageSize) {
|
||||||
|
// 找出相关文章,相关度倒序
|
||||||
|
List<ArticleLucene> resList = luceneService.searchArticle(q);
|
||||||
|
// 分页组装文章详情
|
||||||
|
int total = resList.size();
|
||||||
|
if (total == 0) {
|
||||||
|
return GlobalResultGenerator.genSuccessResult("未找到相关文章");
|
||||||
|
}
|
||||||
|
Page<ArticleDTO> page = new Page<>(pageNum, pageSize);
|
||||||
|
page.setTotal(total);
|
||||||
|
int startIndex = (pageNum - 1) * pageSize;
|
||||||
|
int endIndex = Math.min(startIndex + pageSize, total);
|
||||||
|
// 分割子列表
|
||||||
|
List<ArticleLucene> subList = resList.subList(startIndex, endIndex);
|
||||||
|
String[] ids = subList.stream().map(ArticleLucene::getIdArticle).toArray(String[]::new);
|
||||||
|
List<ArticleDTO> articleDTOList = luceneService.getArticlesByIds(ids);
|
||||||
|
ArticleDTO temp;
|
||||||
|
// 写入文章关键词信息
|
||||||
|
for (int i = 0; i < articleDTOList.size(); i++) {
|
||||||
|
temp = articleDTOList.get(i);
|
||||||
|
temp.setArticleTitle(subList.get(i).getArticleTitle());
|
||||||
|
temp.setArticlePreviewContent(subList.get(i).getArticleContent());
|
||||||
|
articleDTOList.set(i, temp);
|
||||||
|
}
|
||||||
|
page.addAll(articleDTOList);
|
||||||
|
PageInfo<ArticleDTO> pageInfo = new PageInfo<>(page);
|
||||||
|
return GlobalResultGenerator.genSuccessResult(Utils.getArticlesGlobalResult(pageInfo));
|
||||||
|
}
|
||||||
|
}
|
@ -1,72 +0,0 @@
|
|||||||
package com.rymcu.forest.lucene.api;
|
|
||||||
|
|
||||||
import com.rymcu.forest.dto.ArticleDTO;
|
|
||||||
import com.rymcu.forest.dto.ArticleSearchDTO;
|
|
||||||
import com.rymcu.forest.lucene.mapper.BaikeMapper;
|
|
||||||
import com.rymcu.forest.lucene.model.Baike;
|
|
||||||
import com.rymcu.forest.lucene.service.SearchService;
|
|
||||||
import com.rymcu.forest.service.ArticleService;
|
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
|
||||||
import org.springframework.web.bind.annotation.GetMapping;
|
|
||||||
import org.springframework.web.bind.annotation.PathVariable;
|
|
||||||
import org.springframework.web.bind.annotation.RequestMapping;
|
|
||||||
import org.springframework.web.bind.annotation.RestController;
|
|
||||||
import org.springframework.web.servlet.ModelAndView;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
@RestController
|
|
||||||
@RequestMapping("/api/v1/lucene")
|
|
||||||
public class SearchController {
|
|
||||||
@Autowired private BaikeMapper baikeMapper;
|
|
||||||
@Autowired private SearchService searchService;
|
|
||||||
@Autowired private ArticleService articleService;
|
|
||||||
|
|
||||||
@GetMapping("/index")
|
|
||||||
public String createIndex(int limit, int offset) {
|
|
||||||
// 拉取数据
|
|
||||||
List<Baike> baikes = baikeMapper.getAllBaike(limit, offset);
|
|
||||||
searchService.write(baikes);
|
|
||||||
return "成功";
|
|
||||||
}
|
|
||||||
|
|
||||||
@GetMapping("/indexArticle")
|
|
||||||
public String createArticleIndex() {
|
|
||||||
// 拉取数据
|
|
||||||
List<ArticleDTO> list = articleService.findArticles(new ArticleSearchDTO());
|
|
||||||
searchService.writeArticle(list);
|
|
||||||
return "成功";
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 搜索,实现高亮
|
|
||||||
*
|
|
||||||
* @param q
|
|
||||||
* @return
|
|
||||||
* @throws Exception
|
|
||||||
*/
|
|
||||||
@GetMapping("/search/{q}")
|
|
||||||
public List<Map<String, String>> getSearchText(@PathVariable String q) throws Exception {
|
|
||||||
return searchService.search(q);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 搜索,实现高亮
|
|
||||||
*
|
|
||||||
* @param q
|
|
||||||
* @return
|
|
||||||
* @throws Exception
|
|
||||||
*/
|
|
||||||
@GetMapping("/searchArticle/{q}")
|
|
||||||
public List<?> searchArticle(@PathVariable String q) throws Exception {
|
|
||||||
|
|
||||||
return searchService.searchArticle(q);
|
|
||||||
}
|
|
||||||
|
|
||||||
@GetMapping(value = "/search")
|
|
||||||
public ModelAndView test(ModelAndView mv) {
|
|
||||||
mv.setViewName("/search");
|
|
||||||
return mv;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,8 +1,6 @@
|
|||||||
package com.rymcu.forest.lucene.lucene;
|
package com.rymcu.forest.lucene.lucene;
|
||||||
|
|
||||||
|
import com.rymcu.forest.lucene.model.ArticleLucene;
|
||||||
import com.rymcu.forest.dto.ArticleDTO;
|
|
||||||
import com.rymcu.forest.lucene.model.Baike;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
@ -19,18 +17,27 @@ import java.util.concurrent.CountDownLatch;
|
|||||||
* @author suwen
|
* @author suwen
|
||||||
* @date 2021/2/2 14:10
|
* @date 2021/2/2 14:10
|
||||||
*/
|
*/
|
||||||
public class ArticleBeanIndex extends BaseIndex<ArticleDTO>{
|
public class ArticleBeanIndex extends BaseIndex<ArticleLucene> {
|
||||||
|
|
||||||
public ArticleBeanIndex(IndexWriter writer, CountDownLatch countDownLatch1,
|
public ArticleBeanIndex(
|
||||||
CountDownLatch countDownLatch2, List<ArticleDTO> list) {
|
IndexWriter writer,
|
||||||
|
CountDownLatch countDownLatch1,
|
||||||
|
CountDownLatch countDownLatch2,
|
||||||
|
List<ArticleLucene> list) {
|
||||||
super(writer, countDownLatch1, countDownLatch2, list);
|
super(writer, countDownLatch1, countDownLatch2, list);
|
||||||
}
|
}
|
||||||
public ArticleBeanIndex(String parentIndexPath, int subIndex, CountDownLatch countDownLatch1,
|
|
||||||
CountDownLatch countDownLatch2, List<ArticleDTO> list) {
|
public ArticleBeanIndex(
|
||||||
|
String parentIndexPath,
|
||||||
|
int subIndex,
|
||||||
|
CountDownLatch countDownLatch1,
|
||||||
|
CountDownLatch countDownLatch2,
|
||||||
|
List<ArticleLucene> list) {
|
||||||
super(parentIndexPath, subIndex, countDownLatch1, countDownLatch2, list);
|
super(parentIndexPath, subIndex, countDownLatch1, countDownLatch2, list);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void indexDoc(IndexWriter writer, ArticleDTO t) throws Exception {
|
public void indexDoc(IndexWriter writer, ArticleLucene t) throws Exception {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
Field id = new Field("id", t.getIdArticle() + "", TextField.TYPE_STORED);
|
Field id = new Field("id", t.getIdArticle() + "", TextField.TYPE_STORED);
|
||||||
Field title = new Field("title", t.getArticleTitle(), TextField.TYPE_STORED);
|
Field title = new Field("title", t.getArticleTitle(), TextField.TYPE_STORED);
|
||||||
@ -45,6 +52,4 @@ public class ArticleBeanIndex extends BaseIndex<ArticleDTO>{
|
|||||||
writer.updateDocument(new Term("id", t.getIdArticle() + ""), doc);
|
writer.updateDocument(new Term("id", t.getIdArticle() + ""), doc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
33
src/main/java/com/rymcu/forest/lucene/mapper/ArticleLuceneMapper.java
Executable file
33
src/main/java/com/rymcu/forest/lucene/mapper/ArticleLuceneMapper.java
Executable file
@ -0,0 +1,33 @@
|
|||||||
|
package com.rymcu.forest.lucene.mapper;
|
||||||
|
|
||||||
|
import com.rymcu.forest.dto.ArticleDTO;
|
||||||
|
import com.rymcu.forest.lucene.model.ArticleLucene;
|
||||||
|
import org.apache.ibatis.annotations.Mapper;
|
||||||
|
import org.apache.ibatis.annotations.Param;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ArticleLuceneMapper
|
||||||
|
*
|
||||||
|
* @author suwen
|
||||||
|
* @date 2021/2/3 10:00
|
||||||
|
*/
|
||||||
|
@Mapper
|
||||||
|
public interface ArticleLuceneMapper {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 加载所有文章内容
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
List<ArticleLucene> getAllArticleLucene();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 加载所有文章内容
|
||||||
|
*
|
||||||
|
* @param ids 文章id(半角逗号分隔)
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
List<ArticleDTO> getArticlesByIds(@Param("ids") String[] ids);
|
||||||
|
}
|
@ -0,0 +1,39 @@
|
|||||||
|
package com.rymcu.forest.lucene.model;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ArticleLucene
|
||||||
|
*
|
||||||
|
* @author suwen
|
||||||
|
* @date 2021/2/3 09:57
|
||||||
|
*/
|
||||||
|
@Data
|
||||||
|
@Builder
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class ArticleLucene {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 文章编号
|
||||||
|
*/
|
||||||
|
private String idArticle;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 文章标题
|
||||||
|
*/
|
||||||
|
private String articleTitle;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 文章内容
|
||||||
|
*/
|
||||||
|
private String articleContent;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 相关度评分
|
||||||
|
*/
|
||||||
|
private String score;
|
||||||
|
}
|
@ -0,0 +1,46 @@
|
|||||||
|
package com.rymcu.forest.lucene.service;
|
||||||
|
|
||||||
|
import com.rymcu.forest.dto.ArticleDTO;
|
||||||
|
import com.rymcu.forest.lucene.model.ArticleLucene;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* LuceneService
|
||||||
|
*
|
||||||
|
* @author suwen
|
||||||
|
* @date 2021/2/3 10:10
|
||||||
|
*/
|
||||||
|
public interface LuceneService {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 将文章的数据解析为一个个关键字词存储到索引文件中
|
||||||
|
*
|
||||||
|
* @param list
|
||||||
|
*/
|
||||||
|
void writeArticle(List<ArticleLucene> list);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 关键词搜索
|
||||||
|
*
|
||||||
|
* @param value
|
||||||
|
* @return
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
List<ArticleLucene> searchArticle(String value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 加载所有文章内容
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
List<ArticleLucene> getAllArticleLucene();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 加载所有文章内容
|
||||||
|
*
|
||||||
|
* @param ids 文章id(半角逗号分隔)
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
List<ArticleDTO> getArticlesByIds(String[] ids);
|
||||||
|
}
|
@ -1,281 +0,0 @@
|
|||||||
package com.rymcu.forest.lucene.service;
|
|
||||||
|
|
||||||
import com.rymcu.forest.dto.ArticleDTO;
|
|
||||||
import com.rymcu.forest.lucene.lucene.ArticleBeanIndex;
|
|
||||||
import com.rymcu.forest.lucene.lucene.BaiKeBeanIndex;
|
|
||||||
import com.rymcu.forest.lucene.lucene.IKAnalyzer;
|
|
||||||
import com.rymcu.forest.lucene.model.Baike;
|
|
||||||
import com.rymcu.forest.lucene.util.SearchUtil;
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
|
||||||
import org.apache.lucene.search.TopDocs;
|
|
||||||
import org.apache.lucene.search.highlight.*;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.CountDownLatch;
|
|
||||||
import java.util.concurrent.ExecutorService;
|
|
||||||
import java.util.concurrent.Executors;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SearchService
|
|
||||||
*
|
|
||||||
* @author suwen
|
|
||||||
* @date 2021/2/2 14:01
|
|
||||||
*/
|
|
||||||
@Service
|
|
||||||
public class SearchService {
|
|
||||||
|
|
||||||
/** Lucene索引文件路径 */
|
|
||||||
private final String indexPath = System.getProperty("user.dir") + "/index";
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 封裝一个方法,用于将数据库中的数据解析为一个个关键字词存储到索引文件中
|
|
||||||
*
|
|
||||||
* @param baikes
|
|
||||||
*/
|
|
||||||
public void write(List<Baike> baikes) {
|
|
||||||
try {
|
|
||||||
int totalCount = baikes.size();
|
|
||||||
int perThreadCount = 3000;
|
|
||||||
int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1);
|
|
||||||
ExecutorService pool = Executors.newFixedThreadPool(threadCount);
|
|
||||||
CountDownLatch countDownLatch1 = new CountDownLatch(1);
|
|
||||||
CountDownLatch countDownLatch2 = new CountDownLatch(threadCount);
|
|
||||||
|
|
||||||
for (int i = 0; i < threadCount; i++) {
|
|
||||||
int start = i * perThreadCount;
|
|
||||||
int end = Math.min((i + 1) * perThreadCount, totalCount);
|
|
||||||
List<Baike> subList = baikes.subList(start, end);
|
|
||||||
Runnable runnable =
|
|
||||||
new BaiKeBeanIndex("index", i, countDownLatch1, countDownLatch2, subList);
|
|
||||||
// 子线程交给线程池管理
|
|
||||||
pool.execute(runnable);
|
|
||||||
}
|
|
||||||
countDownLatch1.countDown();
|
|
||||||
System.out.println("开始创建索引");
|
|
||||||
// 等待所有线程都完成
|
|
||||||
countDownLatch2.await();
|
|
||||||
// 线程全部完成工作
|
|
||||||
System.out.println("所有线程都创建索引完毕");
|
|
||||||
// 释放线程池资源
|
|
||||||
pool.shutdown();
|
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 封裝一个方法,用于将数据库中的数据解析为一个个关键字词存储到索引文件中
|
|
||||||
*
|
|
||||||
* @param list
|
|
||||||
*/
|
|
||||||
public void writeArticle(List<ArticleDTO> list) {
|
|
||||||
try {
|
|
||||||
int totalCount = list.size();
|
|
||||||
int perThreadCount = 3000;
|
|
||||||
int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1);
|
|
||||||
ExecutorService pool = Executors.newFixedThreadPool(threadCount);
|
|
||||||
CountDownLatch countDownLatch1 = new CountDownLatch(1);
|
|
||||||
CountDownLatch countDownLatch2 = new CountDownLatch(threadCount);
|
|
||||||
|
|
||||||
for (int i = 0; i < threadCount; i++) {
|
|
||||||
int start = i * perThreadCount;
|
|
||||||
int end = Math.min((i + 1) * perThreadCount, totalCount);
|
|
||||||
List<ArticleDTO> subList = list.subList(start, end);
|
|
||||||
Runnable runnable =
|
|
||||||
new ArticleBeanIndex("articlesIndex", i, countDownLatch1, countDownLatch2, subList);
|
|
||||||
// 子线程交给线程池管理
|
|
||||||
pool.execute(runnable);
|
|
||||||
}
|
|
||||||
countDownLatch1.countDown();
|
|
||||||
System.out.println("开始创建索引");
|
|
||||||
// 等待所有线程都完成
|
|
||||||
countDownLatch2.await();
|
|
||||||
// 线程全部完成工作
|
|
||||||
System.out.println("所有线程都创建索引完毕");
|
|
||||||
// 释放线程池资源
|
|
||||||
pool.shutdown();
|
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 搜索
|
|
||||||
*
|
|
||||||
* @param value
|
|
||||||
* @return
|
|
||||||
* @throws Exception
|
|
||||||
*/
|
|
||||||
public List<Map<String, String>> search(String value) throws Exception {
|
|
||||||
List<Map<String, String>> list = new ArrayList<>();
|
|
||||||
ExecutorService service = Executors.newCachedThreadPool();
|
|
||||||
// 定义分词器
|
|
||||||
Analyzer analyzer = new IKAnalyzer();
|
|
||||||
try {
|
|
||||||
IndexSearcher searcher = SearchUtil.getIndexSearcherByParentPath(indexPath, service);
|
|
||||||
String[] fields = {"title", "summary"};
|
|
||||||
// 构造Query对象
|
|
||||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
|
|
||||||
|
|
||||||
BufferedReader in =
|
|
||||||
new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
|
|
||||||
String line = value != null ? value : in.readLine();
|
|
||||||
Query query = parser.parse(line);
|
|
||||||
// 最终被分词后添加的前缀和后缀处理器,默认是粗体<B></B>
|
|
||||||
SimpleHTMLFormatter htmlFormatter =
|
|
||||||
new SimpleHTMLFormatter("<font color=" + "\"" + "red" + "\"" + ">", "</font>");
|
|
||||||
// 高亮搜索的词添加到高亮处理器中
|
|
||||||
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
|
|
||||||
|
|
||||||
// 获取搜索的结果,指定返回document返回的个数
|
|
||||||
// 默认搜索结果为显示第一页,1000 条,可以优化
|
|
||||||
TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query);
|
|
||||||
ScoreDoc[] hits = results.scoreDocs;
|
|
||||||
|
|
||||||
// 遍历,输出
|
|
||||||
for (ScoreDoc hit : hits) {
|
|
||||||
int id = hit.doc;
|
|
||||||
float score = hit.score;
|
|
||||||
Document hitDoc = searcher.doc(hit.doc);
|
|
||||||
Map<String, String> map = new HashMap<>();
|
|
||||||
map.put("id", hitDoc.get("id"));
|
|
||||||
|
|
||||||
// 获取到summary
|
|
||||||
String name = hitDoc.get("summary");
|
|
||||||
// 将查询的词和搜索词匹配,匹配到添加前缀和后缀
|
|
||||||
TokenStream tokenStream =
|
|
||||||
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "summary", analyzer);
|
|
||||||
// 传入的第二个参数是查询的值
|
|
||||||
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10);
|
|
||||||
StringBuilder baikeValue = new StringBuilder();
|
|
||||||
for (TextFragment textFragment : frag) {
|
|
||||||
if ((textFragment != null) && (textFragment.getScore() > 0)) {
|
|
||||||
// if ((frag[j] != null)) {
|
|
||||||
// 获取 summary 的值
|
|
||||||
baikeValue.append(textFragment.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 获取到title
|
|
||||||
String title = hitDoc.get("title");
|
|
||||||
TokenStream titleTokenStream =
|
|
||||||
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "title", analyzer);
|
|
||||||
TextFragment[] titleFrag =
|
|
||||||
highlighter.getBestTextFragments(titleTokenStream, title, false, 10);
|
|
||||||
StringBuilder titleValue = new StringBuilder();
|
|
||||||
for (int j = 0; j < titleFrag.length; j++) {
|
|
||||||
if ((frag[j] != null)) {
|
|
||||||
titleValue.append(titleFrag[j].toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
map.put("title", titleValue.toString());
|
|
||||||
map.put("summary", baikeValue.toString());
|
|
||||||
map.put("score", String.valueOf(score));
|
|
||||||
list.add(map);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
} finally {
|
|
||||||
service.shutdownNow();
|
|
||||||
}
|
|
||||||
return list;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 搜索
|
|
||||||
*
|
|
||||||
* @param value
|
|
||||||
* @return
|
|
||||||
* @throws Exception
|
|
||||||
*/
|
|
||||||
public List<Map<String, String>> searchArticle(String value) throws Exception {
|
|
||||||
List<Map<String, String>> list = new ArrayList<>();
|
|
||||||
ExecutorService service = Executors.newCachedThreadPool();
|
|
||||||
// 定义分词器
|
|
||||||
Analyzer analyzer = new IKAnalyzer();
|
|
||||||
try {
|
|
||||||
IndexSearcher searcher =
|
|
||||||
SearchUtil.getIndexSearcherByParentPath(
|
|
||||||
System.getProperty("user.dir") + "/articlesIndex", service);
|
|
||||||
String[] fields = {"title", "summary"};
|
|
||||||
// 构造Query对象
|
|
||||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
|
|
||||||
|
|
||||||
BufferedReader in =
|
|
||||||
new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
|
|
||||||
String line = value != null ? value : in.readLine();
|
|
||||||
Query query = parser.parse(line);
|
|
||||||
// 最终被分词后添加的前缀和后缀处理器,默认是粗体<B></B>
|
|
||||||
SimpleHTMLFormatter htmlFormatter =
|
|
||||||
new SimpleHTMLFormatter("<font color=" + "\"" + "red" + "\"" + ">", "</font>");
|
|
||||||
// 高亮搜索的词添加到高亮处理器中
|
|
||||||
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
|
|
||||||
|
|
||||||
// 获取搜索的结果,指定返回document返回的个数
|
|
||||||
// 默认搜索结果为显示第一页,1000 条,可以优化
|
|
||||||
TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query);
|
|
||||||
ScoreDoc[] hits = results.scoreDocs;
|
|
||||||
|
|
||||||
// 遍历,输出
|
|
||||||
for (ScoreDoc hit : hits) {
|
|
||||||
int id = hit.doc;
|
|
||||||
float score = hit.score;
|
|
||||||
Document hitDoc = searcher.doc(hit.doc);
|
|
||||||
Map<String, String> map = new HashMap<>();
|
|
||||||
map.put("id", hitDoc.get("id"));
|
|
||||||
|
|
||||||
// 获取到summary
|
|
||||||
String name = hitDoc.get("summary");
|
|
||||||
// 将查询的词和搜索词匹配,匹配到添加前缀和后缀
|
|
||||||
TokenStream tokenStream =
|
|
||||||
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "summary", analyzer);
|
|
||||||
// 传入的第二个参数是查询的值
|
|
||||||
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10);
|
|
||||||
StringBuilder baikeValue = new StringBuilder();
|
|
||||||
for (TextFragment textFragment : frag) {
|
|
||||||
if ((textFragment != null) && (textFragment.getScore() > 0)) {
|
|
||||||
// if ((frag[j] != null)) {
|
|
||||||
// 获取 summary 的值
|
|
||||||
baikeValue.append(textFragment.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 获取到title
|
|
||||||
String title = hitDoc.get("title");
|
|
||||||
TokenStream titleTokenStream =
|
|
||||||
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "title", analyzer);
|
|
||||||
TextFragment[] titleFrag =
|
|
||||||
highlighter.getBestTextFragments(titleTokenStream, title, false, 10);
|
|
||||||
StringBuilder titleValue = new StringBuilder();
|
|
||||||
for (int j = 0; j < titleFrag.length; j++) {
|
|
||||||
if ((frag[j] != null)) {
|
|
||||||
titleValue.append(titleFrag[j].toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
map.put("title", titleValue.toString());
|
|
||||||
map.put("summary", baikeValue.toString());
|
|
||||||
map.put("score", String.valueOf(score));
|
|
||||||
list.add(map);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
} finally {
|
|
||||||
service.shutdownNow();
|
|
||||||
}
|
|
||||||
return list;
|
|
||||||
}
|
|
||||||
}
|
|
@ -0,0 +1,182 @@
|
|||||||
|
package com.rymcu.forest.lucene.service.impl;
|
||||||
|
|
||||||
|
import com.rymcu.forest.dto.ArticleDTO;
|
||||||
|
import com.rymcu.forest.lucene.lucene.ArticleBeanIndex;
|
||||||
|
import com.rymcu.forest.lucene.lucene.IKAnalyzer;
|
||||||
|
import com.rymcu.forest.lucene.mapper.ArticleLuceneMapper;
|
||||||
|
import com.rymcu.forest.lucene.model.ArticleLucene;
|
||||||
|
import com.rymcu.forest.lucene.service.LuceneService;
|
||||||
|
import com.rymcu.forest.lucene.util.SearchUtil;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
|
||||||
|
import org.apache.lucene.queryparser.classic.ParseException;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.search.highlight.*;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import javax.annotation.Resource;
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* LuceneServiceImpl
|
||||||
|
*
|
||||||
|
* @author suwen
|
||||||
|
* @date 2021/2/3 10:29
|
||||||
|
*/
|
||||||
|
@Service
|
||||||
|
public class LuceneServiceImpl implements LuceneService {
|
||||||
|
|
||||||
|
@Resource private ArticleLuceneMapper luceneMapper;
|
||||||
|
|
||||||
|
/** Lucene索引文件路径 */
|
||||||
|
private final String indexPath = System.getProperty("user.dir") + "/index";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 将文章的数据解析为一个个关键字词存储到索引文件中
|
||||||
|
*
|
||||||
|
* @param list
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void writeArticle(List<ArticleLucene> list) {
|
||||||
|
try {
|
||||||
|
int totalCount = list.size();
|
||||||
|
int perThreadCount = 3000;
|
||||||
|
int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1);
|
||||||
|
ExecutorService pool = Executors.newFixedThreadPool(threadCount);
|
||||||
|
CountDownLatch countDownLatch1 = new CountDownLatch(1);
|
||||||
|
CountDownLatch countDownLatch2 = new CountDownLatch(threadCount);
|
||||||
|
|
||||||
|
for (int i = 0; i < threadCount; i++) {
|
||||||
|
int start = i * perThreadCount;
|
||||||
|
int end = Math.min((i + 1) * perThreadCount, totalCount);
|
||||||
|
List<ArticleLucene> subList = list.subList(start, end);
|
||||||
|
Runnable runnable =
|
||||||
|
new ArticleBeanIndex("index", i, countDownLatch1, countDownLatch2, subList);
|
||||||
|
// 子线程交给线程池管理
|
||||||
|
pool.execute(runnable);
|
||||||
|
}
|
||||||
|
countDownLatch1.countDown();
|
||||||
|
System.out.println("开始创建索引");
|
||||||
|
// 等待所有线程都完成
|
||||||
|
countDownLatch2.await();
|
||||||
|
// 线程全部完成工作
|
||||||
|
System.out.println("所有线程都创建索引完毕");
|
||||||
|
// 释放线程池资源
|
||||||
|
pool.shutdown();
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 关键词搜索
|
||||||
|
*
|
||||||
|
* @param value
|
||||||
|
* @return
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public List<ArticleLucene> searchArticle(String value) {
|
||||||
|
List<ArticleLucene> resList = new ArrayList<>();
|
||||||
|
ExecutorService service = Executors.newCachedThreadPool();
|
||||||
|
// 定义分词器
|
||||||
|
Analyzer analyzer = new IKAnalyzer();
|
||||||
|
try {
|
||||||
|
IndexSearcher searcher = SearchUtil.getIndexSearcherByParentPath(indexPath, service);
|
||||||
|
String[] fields = {"title", "summary"};
|
||||||
|
// 构造Query对象
|
||||||
|
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
|
||||||
|
|
||||||
|
BufferedReader in =
|
||||||
|
new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
|
||||||
|
String line = value != null ? value : in.readLine();
|
||||||
|
Query query = parser.parse(line);
|
||||||
|
// 最终被分词后添加的前缀和后缀处理器,默认是粗体<B></B>
|
||||||
|
SimpleHTMLFormatter htmlFormatter =
|
||||||
|
new SimpleHTMLFormatter("<font color=" + "\"" + "red" + "\"" + ">", "</font>");
|
||||||
|
// 高亮搜索的词添加到高亮处理器中
|
||||||
|
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
|
||||||
|
|
||||||
|
// 获取搜索的结果,指定返回document返回的个数
|
||||||
|
// 默认搜索结果为显示第一页,1000 条,可以优化
|
||||||
|
TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query);
|
||||||
|
ScoreDoc[] hits = results.scoreDocs;
|
||||||
|
|
||||||
|
// 遍历,输出
|
||||||
|
for (ScoreDoc hit : hits) {
|
||||||
|
int id = hit.doc;
|
||||||
|
float score = hit.score;
|
||||||
|
Document hitDoc = searcher.doc(hit.doc);
|
||||||
|
Map<String, String> map = new HashMap<>();
|
||||||
|
map.put("id", hitDoc.get("id"));
|
||||||
|
|
||||||
|
// 获取到summary
|
||||||
|
String name = hitDoc.get("summary");
|
||||||
|
// 将查询的词和搜索词匹配,匹配到添加前缀和后缀
|
||||||
|
TokenStream tokenStream =
|
||||||
|
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "summary", analyzer);
|
||||||
|
// 传入的第二个参数是查询的值
|
||||||
|
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10);
|
||||||
|
StringBuilder baikeValue = new StringBuilder();
|
||||||
|
for (TextFragment textFragment : frag) {
|
||||||
|
if ((textFragment != null) && (textFragment.getScore() > 0)) {
|
||||||
|
// if ((frag[j] != null)) {
|
||||||
|
// 获取 summary 的值
|
||||||
|
baikeValue.append(textFragment.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获取到title
|
||||||
|
String title = hitDoc.get("title");
|
||||||
|
TokenStream titleTokenStream =
|
||||||
|
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "title", analyzer);
|
||||||
|
TextFragment[] titleFrag =
|
||||||
|
highlighter.getBestTextFragments(titleTokenStream, title, false, 10);
|
||||||
|
StringBuilder titleValue = new StringBuilder();
|
||||||
|
for (int j = 0; j < titleFrag.length; j++) {
|
||||||
|
if ((frag[j] != null)) {
|
||||||
|
titleValue.append(titleFrag[j].toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resList.add(
|
||||||
|
ArticleLucene.builder()
|
||||||
|
.idArticle(hitDoc.get("id"))
|
||||||
|
.articleTitle(titleValue.toString())
|
||||||
|
.articleContent(baikeValue.toString())
|
||||||
|
.score(String.valueOf(score))
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
} catch (IOException | ParseException | InvalidTokenOffsetsException e) {
|
||||||
|
System.out.println(e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
service.shutdownNow();
|
||||||
|
}
|
||||||
|
return resList;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ArticleLucene> getAllArticleLucene() {
|
||||||
|
return luceneMapper.getAllArticleLucene();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ArticleDTO> getArticlesByIds(String[] ids) {
|
||||||
|
return luceneMapper.getArticlesByIds(ids);
|
||||||
|
}
|
||||||
|
}
|
@ -1,21 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8" ?>
|
|
||||||
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
|
|
||||||
<mapper namespace="com.rymcu.forest.lucene.mapper.BaikeMapper" >
|
|
||||||
<resultMap id="BaseResultMap" type="com.rymcu.forest.lucene.model.Baike" >
|
|
||||||
<id column="id" property="id" jdbcType="INTEGER" />
|
|
||||||
<result column="title" property="title" jdbcType="VARCHAR" />
|
|
||||||
</resultMap>
|
|
||||||
<resultMap id="ResultMapWithBLOBs" type="com.rymcu.forest.lucene.model.Baike" extends="BaseResultMap" >
|
|
||||||
<result column="summary" property="summary" jdbcType="LONGVARCHAR" />
|
|
||||||
</resultMap>
|
|
||||||
|
|
||||||
<sql id="Base_Column_List" >
|
|
||||||
id, title
|
|
||||||
</sql>
|
|
||||||
<sql id="Blob_Column_List" >
|
|
||||||
summary
|
|
||||||
</sql>
|
|
||||||
<select id="getAllBaike" resultType="com.rymcu.forest.lucene.model.Baike">
|
|
||||||
select * from lucene_baike limit #{limit} offset #{offset};
|
|
||||||
</select>
|
|
||||||
</mapper>
|
|
64
src/main/java/mapper/lucene/ArticleLuceneMapper.xml
Executable file
64
src/main/java/mapper/lucene/ArticleLuceneMapper.xml
Executable file
@ -0,0 +1,64 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
|
||||||
|
<mapper namespace="com.rymcu.forest.lucene.mapper.ArticleLuceneMapper">
|
||||||
|
<resultMap id="BaseResultMap" type="com.rymcu.forest.lucene.model.ArticleLucene">
|
||||||
|
<id column="id" property="idArticle" jdbcType="INTEGER"/>
|
||||||
|
<result column="article_title" property="articleTitle" jdbcType="VARCHAR"/>
|
||||||
|
</resultMap>
|
||||||
|
<resultMap id="ResultMapWithBLOBs" type="com.rymcu.forest.lucene.model.ArticleLucene" extends="BaseResultMap">
|
||||||
|
<result column="article_content" property="articleContent" jdbcType="LONGVARCHAR"/>
|
||||||
|
</resultMap>
|
||||||
|
<resultMap id="DTOResultMap" type="com.rymcu.forest.dto.ArticleDTO">
|
||||||
|
<result column="id" property="idArticle"></result>
|
||||||
|
<result column="article_title" property="articleTitle"></result>
|
||||||
|
<result column="article_thumbnail_url" property="articleThumbnailUrl"></result>
|
||||||
|
<result column="article_author_id" property="articleAuthorId"></result>
|
||||||
|
<result column="nickname" property="articleAuthorName"></result>
|
||||||
|
<result column="avatar_url" property="articleAuthorAvatarUrl"></result>
|
||||||
|
<result column="article_type" property="articleType"></result>
|
||||||
|
<result column="article_tags" property="articleTags"></result>
|
||||||
|
<result column="article_view_count" property="articleViewCount"></result>
|
||||||
|
<result column="article_preview_content" property="articlePreviewContent"></result>
|
||||||
|
<result column="article_content" property="articleContent"></result>
|
||||||
|
<result column="comment_count" property="articleCommentCount"></result>
|
||||||
|
<result column="time_ago" property="timeAgo"></result>
|
||||||
|
<result column="article_permalink" property="articlePermalink"></result>
|
||||||
|
<result column="article_link" property="articleLink"></result>
|
||||||
|
<result column="article_status" property="articleStatus"></result>
|
||||||
|
<result column="updated_time" property="updatedTime"></result>
|
||||||
|
<result column="sort_no" property="sortNo"></result>
|
||||||
|
<result column="article_perfect" property="articlePerfect"></result>
|
||||||
|
<result column="article_thumbs_up_count" property="articleThumbsUpCount"></result>
|
||||||
|
<result column="article_sponsor_count" property="articleSponsorCount"></result>
|
||||||
|
</resultMap>
|
||||||
|
<sql id="Base_Column_List">
|
||||||
|
id
|
||||||
|
, article_title
|
||||||
|
</sql>
|
||||||
|
<sql id="Blob_Column_List">
|
||||||
|
article_content
|
||||||
|
</sql>
|
||||||
|
<select id="getAllArticleLucene" resultMap="ResultMapWithBLOBs">
|
||||||
|
select art.id, art.article_title, content.article_content
|
||||||
|
from forest_article art
|
||||||
|
left join forest_article_content content on art.id = content.id_article;
|
||||||
|
</select>
|
||||||
|
|
||||||
|
<select id="getArticlesByIds" resultMap="DTOResultMap">
|
||||||
|
select art.*, su.nickname, su.avatar_url
|
||||||
|
from forest_article art
|
||||||
|
join forest_user su on art.article_author_id = su.id
|
||||||
|
where article_status = 0
|
||||||
|
and art.id in
|
||||||
|
<foreach collection="ids" item="id" index="index"
|
||||||
|
open="(" close=")" separator=",">
|
||||||
|
#{id}
|
||||||
|
</foreach>
|
||||||
|
order by
|
||||||
|
field(art.id
|
||||||
|
<foreach collection="ids" item="id" index="index"
|
||||||
|
open="," close=")" separator=",">
|
||||||
|
#{id}
|
||||||
|
</foreach>
|
||||||
|
</select>
|
||||||
|
</mapper>
|
Loading…
Reference in New Issue
Block a user