🎨 替换 Lucene 失效方法 TokenSources.getAnyTokenStream 为 TokenSources.getTokenStream

This commit is contained in:
ronger 2022-06-08 10:30:38 +08:00
parent 1bf1041bb7
commit e650e39d10
3 changed files with 153 additions and 157 deletions

View File

@ -156,12 +156,11 @@ public class LuceneServiceImpl implements LuceneService {
float score = hit.score;
Document hitDoc = searcher.doc(hit.doc);
// 获取到summary
String name = hitDoc.get("summary");
String summary = hitDoc.get("summary");
// 将查询的词和搜索词匹配匹配到添加前缀和后缀
TokenStream tokenStream =
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "summary", analyzer);
TokenStream tokenStream = TokenSources.getTokenStream("summary", searcher.getIndexReader().getTermVectors(id), summary, analyzer, -1);
// 传入的第二个参数是查询的值
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10);
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, summary, false, 10);
StringBuilder baikeValue = new StringBuilder();
for (TextFragment textFragment : frag) {
if ((textFragment != null) && (textFragment.getScore() > 0)) {
@ -173,8 +172,7 @@ public class LuceneServiceImpl implements LuceneService {
// 获取到title
String title = hitDoc.get("title");
TokenStream titleTokenStream =
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "title", analyzer);
TokenStream titleTokenStream = TokenSources.getTokenStream("title", searcher.getIndexReader().getTermVectors(id), title, analyzer, -1);
TextFragment[] titleFrag =
highlighter.getBestTextFragments(titleTokenStream, title, false, 10);
StringBuilder titleValue = new StringBuilder();

View File

@ -41,150 +41,149 @@ import java.util.concurrent.Executors;
@Service
public class PortfolioLuceneServiceImpl implements PortfolioLuceneService {
@Resource private PortfolioLuceneMapper portfolioLuceneMapper;
@Resource
private PortfolioLuceneMapper portfolioLuceneMapper;
/**
* 将文章的数据解析为一个个关键字词存储到索引文件中
*
* @param list
*/
@Override
public void writePortfolio(List<PortfolioLucene> list) {
try {
int totalCount = list.size();
int perThreadCount = 3000;
int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1);
ExecutorService pool = Executors.newFixedThreadPool(threadCount);
CountDownLatch countDownLatch1 = new CountDownLatch(1);
CountDownLatch countDownLatch2 = new CountDownLatch(threadCount);
/**
* 将文章的数据解析为一个个关键字词存储到索引文件中
*
* @param list
*/
@Override
public void writePortfolio(List<PortfolioLucene> list) {
try {
int totalCount = list.size();
int perThreadCount = 3000;
int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1);
ExecutorService pool = Executors.newFixedThreadPool(threadCount);
CountDownLatch countDownLatch1 = new CountDownLatch(1);
CountDownLatch countDownLatch2 = new CountDownLatch(threadCount);
for (int i = 0; i < threadCount; i++) {
int start = i * perThreadCount;
int end = Math.min((i + 1) * perThreadCount, totalCount);
List<PortfolioLucene> subList = list.subList(start, end);
Runnable runnable =
new PortfolioBeanIndex(LucenePath.PORTFOLIO_PATH, i, countDownLatch1, countDownLatch2, subList);
// 子线程交给线程池管理
pool.execute(runnable);
}
countDownLatch1.countDown();
System.out.println("开始创建索引");
// 等待所有线程都完成
countDownLatch2.await();
// 线程全部完成工作
System.out.println("所有线程都创建索引完毕");
// 释放线程池资源
pool.shutdown();
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void writePortfolio(String id) {
writePortfolio(portfolioLuceneMapper.getById(id));
}
@Override
public void writePortfolio(PortfolioLucene portfolioLucene) {
PortfolioIndexUtil.addIndex(portfolioLucene);
}
@Override
public void updatePortfolio(String id) {
PortfolioIndexUtil.updateIndex(portfolioLuceneMapper.getById(id));
}
@Override
public void deletePortfolio(String id) {
PortfolioIndexUtil.deleteIndex(id);
}
@Override
public List<PortfolioLucene> getAllPortfolioLucene() {
return portfolioLuceneMapper.getAllPortfolioLucene();
}
@Override
public List<PortfolioDTO> getPortfoliosByIds(String[] ids) {
return portfolioLuceneMapper.getPortfoliosByIds(ids);
}
@Override
public List<PortfolioLucene> searchPortfolio(String value) {
List<PortfolioLucene> resList = new ArrayList<>();
ExecutorService service = Executors.newCachedThreadPool();
// 定义分词器
Analyzer analyzer = new IKAnalyzer();
try {
IndexSearcher searcher = SearchUtil.getIndexSearcherByParentPath(LucenePath.PORTFOLIO_PATH, service);
String[] fields = {"title", "summary"};
// 构造Query对象
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
BufferedReader in =
new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
String line = value != null ? value : in.readLine();
Query query = parser.parse(line);
// 最终被分词后添加的前缀和后缀处理器默认是粗体<B></B>
SimpleHTMLFormatter htmlFormatter =
new SimpleHTMLFormatter("<font color=" + "\"" + "red" + "\"" + ">", "</font>");
// 高亮搜索的词添加到高亮处理器中
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
// 获取搜索的结果指定返回document返回的个数
// TODO 默认搜索结果为显示第一页1000 可以优化
TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query);
ScoreDoc[] hits = results.scoreDocs;
// 遍历输出
for (ScoreDoc hit : hits) {
int id = hit.doc;
float score = hit.score;
Document hitDoc = searcher.doc(hit.doc);
// 获取到summary
String summary = hitDoc.get("summary");
// 将查询的词和搜索词匹配匹配到添加前缀和后缀
TokenStream tokenStream =
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "summary", analyzer);
// 传入的第二个参数是查询的值
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, summary, false, 10);
StringBuilder sb = new StringBuilder();
for (TextFragment textFragment : frag) {
if ((textFragment != null) && (textFragment.getScore() > 0)) {
// if ((frag[j] != null)) {
// 获取 summary 的值
sb.append(textFragment.toString());
}
for (int i = 0; i < threadCount; i++) {
int start = i * perThreadCount;
int end = Math.min((i + 1) * perThreadCount, totalCount);
List<PortfolioLucene> subList = list.subList(start, end);
Runnable runnable =
new PortfolioBeanIndex(LucenePath.PORTFOLIO_PATH, i, countDownLatch1, countDownLatch2, subList);
// 子线程交给线程池管理
pool.execute(runnable);
}
countDownLatch1.countDown();
System.out.println("开始创建索引");
// 等待所有线程都完成
countDownLatch2.await();
// 线程全部完成工作
System.out.println("所有线程都创建索引完毕");
// 释放线程池资源
pool.shutdown();
} catch (Exception e) {
e.printStackTrace();
}
// 获取到title
String title = hitDoc.get("title");
TokenStream titleTokenStream =
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "title", analyzer);
TextFragment[] titleFrag =
highlighter.getBestTextFragments(titleTokenStream, title, false, 10);
StringBuilder titleValue = new StringBuilder();
for (int j = 0; j < titleFrag.length; j++) {
if ((frag[j] != null)) {
titleValue.append(titleFrag[j].toString());
}
}
resList.add(
PortfolioLucene.builder()
.idPortfolio(hitDoc.get("id"))
.portfolioTitle(titleValue.toString())
.portfolioDescription(sb.toString())
.score(String.valueOf(score))
.build());
}
} catch (IOException | ParseException | InvalidTokenOffsetsException e) {
System.out.println(e.getMessage());
e.printStackTrace();
} finally {
service.shutdownNow();
}
return resList;
}
@Override
public void writePortfolio(String id) {
writePortfolio(portfolioLuceneMapper.getById(id));
}
@Override
public void writePortfolio(PortfolioLucene portfolioLucene) {
PortfolioIndexUtil.addIndex(portfolioLucene);
}
@Override
public void updatePortfolio(String id) {
PortfolioIndexUtil.updateIndex(portfolioLuceneMapper.getById(id));
}
@Override
public void deletePortfolio(String id) {
PortfolioIndexUtil.deleteIndex(id);
}
@Override
public List<PortfolioLucene> getAllPortfolioLucene() {
return portfolioLuceneMapper.getAllPortfolioLucene();
}
@Override
public List<PortfolioDTO> getPortfoliosByIds(String[] ids) {
return portfolioLuceneMapper.getPortfoliosByIds(ids);
}
@Override
public List<PortfolioLucene> searchPortfolio(String value) {
List<PortfolioLucene> resList = new ArrayList<>();
ExecutorService service = Executors.newCachedThreadPool();
// 定义分词器
Analyzer analyzer = new IKAnalyzer();
try {
IndexSearcher searcher = SearchUtil.getIndexSearcherByParentPath(LucenePath.PORTFOLIO_PATH, service);
String[] fields = {"title", "summary"};
// 构造Query对象
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
BufferedReader in =
new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
String line = value != null ? value : in.readLine();
Query query = parser.parse(line);
// 最终被分词后添加的前缀和后缀处理器默认是粗体<B></B>
SimpleHTMLFormatter htmlFormatter =
new SimpleHTMLFormatter("<font color=" + "\"" + "red" + "\"" + ">", "</font>");
// 高亮搜索的词添加到高亮处理器中
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
// 获取搜索的结果指定返回document返回的个数
// TODO 默认搜索结果为显示第一页1000 可以优化
TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query);
ScoreDoc[] hits = results.scoreDocs;
// 遍历输出
for (ScoreDoc hit : hits) {
int id = hit.doc;
float score = hit.score;
Document hitDoc = searcher.doc(hit.doc);
// 获取到summary
String summary = hitDoc.get("summary");
// 将查询的词和搜索词匹配匹配到添加前缀和后缀
TokenStream tokenStream = TokenSources.getTokenStream("summary", searcher.getIndexReader().getTermVectors(id), summary, analyzer, -1);
// 传入的第二个参数是查询的值
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, summary, false, 10);
StringBuilder sb = new StringBuilder();
for (TextFragment textFragment : frag) {
if ((textFragment != null) && (textFragment.getScore() > 0)) {
// if ((frag[j] != null)) {
// 获取 summary 的值
sb.append(textFragment.toString());
}
}
// 获取到title
String title = hitDoc.get("title");
TokenStream titleTokenStream = TokenSources.getTokenStream("title", searcher.getIndexReader().getTermVectors(id), title, analyzer, -1);
TextFragment[] titleFrag =
highlighter.getBestTextFragments(titleTokenStream, title, false, 10);
StringBuilder titleValue = new StringBuilder();
for (int j = 0; j < titleFrag.length; j++) {
if ((frag[j] != null)) {
titleValue.append(titleFrag[j].toString());
}
}
resList.add(
PortfolioLucene.builder()
.idPortfolio(hitDoc.get("id"))
.portfolioTitle(titleValue.toString())
.portfolioDescription(sb.toString())
.score(String.valueOf(score))
.build());
}
} catch (IOException | ParseException | InvalidTokenOffsetsException e) {
System.out.println(e.getMessage());
e.printStackTrace();
} finally {
service.shutdownNow();
}
return resList;
}
}

View File

@ -133,13 +133,13 @@ public class UserLuceneServiceImpl implements UserLuceneService {
int id = hit.doc;
float score = hit.score;
Document hitDoc = searcher.doc(hit.doc);
// 获取到summary
String name = hitDoc.get("signature");
// 获取到 signature
String signature = hitDoc.get("signature");
// 将查询的词和搜索词匹配匹配到添加前缀和后缀
TokenStream tokenStream =
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "signature", analyzer);
TokenStream tokenStream = TokenSources.getTokenStream("signature", searcher.getIndexReader().getTermVectors(id), signature, analyzer, -1);
// 传入的第二个参数是查询的值
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10);
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, signature, false, 10);
StringBuilder baikeValue = new StringBuilder();
for (TextFragment textFragment : frag) {
if ((textFragment != null) && (textFragment.getScore() > 0)) {
@ -148,12 +148,11 @@ public class UserLuceneServiceImpl implements UserLuceneService {
baikeValue.append(textFragment.toString());
}
}
// 获取到title
String title = hitDoc.get("nickname");
TokenStream titleTokenStream =
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "nickname", analyzer);
// 获取到 nickname
String nickname = hitDoc.get("nickname");
TokenStream titleTokenStream = TokenSources.getTokenStream("nickname", searcher.getIndexReader().getTermVectors(id), nickname, analyzer, -1);
TextFragment[] titleFrag =
highlighter.getBestTextFragments(titleTokenStream, title, false, 10);
highlighter.getBestTextFragments(titleTokenStream, nickname, false, 10);
StringBuilder titleValue = new StringBuilder();
for (int j = 0; j < titleFrag.length; j++) {
if ((frag[j] != null)) {