🎨 搜索结果展示效果优化

This commit is contained in:
ronger 2021-06-02 21:47:13 +08:00
parent 60d3380758
commit ce1767ac3f
2 changed files with 154 additions and 148 deletions

View File

@ -9,6 +9,7 @@ import com.rymcu.forest.lucene.service.LuceneService;
import com.rymcu.forest.lucene.util.ArticleIndexUtil; import com.rymcu.forest.lucene.util.ArticleIndexUtil;
import com.rymcu.forest.lucene.util.LucenePath; import com.rymcu.forest.lucene.util.LucenePath;
import com.rymcu.forest.lucene.util.SearchUtil; import com.rymcu.forest.lucene.util.SearchUtil;
import com.rymcu.forest.util.Html2TextUtil;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -41,157 +42,162 @@ import java.util.concurrent.Executors;
@Service @Service
public class LuceneServiceImpl implements LuceneService { public class LuceneServiceImpl implements LuceneService {
@Resource private ArticleLuceneMapper luceneMapper; @Resource
private ArticleLuceneMapper luceneMapper;
/** /**
* 将文章的数据解析为一个个关键字词存储到索引文件中 * 将文章的数据解析为一个个关键字词存储到索引文件中
* *
* @param list * @param list
*/ */
@Override @Override
public void writeArticle(List<ArticleLucene> list) { public void writeArticle(List<ArticleLucene> list) {
try { try {
int totalCount = list.size(); int totalCount = list.size();
int perThreadCount = 3000; int perThreadCount = 3000;
int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1); int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1);
ExecutorService pool = Executors.newFixedThreadPool(threadCount); ExecutorService pool = Executors.newFixedThreadPool(threadCount);
CountDownLatch countDownLatch1 = new CountDownLatch(1); CountDownLatch countDownLatch1 = new CountDownLatch(1);
CountDownLatch countDownLatch2 = new CountDownLatch(threadCount); CountDownLatch countDownLatch2 = new CountDownLatch(threadCount);
for (int i = 0; i < threadCount; i++) { for (int i = 0; i < threadCount; i++) {
int start = i * perThreadCount; int start = i * perThreadCount;
int end = Math.min((i + 1) * perThreadCount, totalCount); int end = Math.min((i + 1) * perThreadCount, totalCount);
List<ArticleLucene> subList = list.subList(start, end); List<ArticleLucene> subList = list.subList(start, end);
Runnable runnable = Runnable runnable =
new ArticleBeanIndex( new ArticleBeanIndex(
LucenePath.ARTICLE_INDEX_PATH, i, countDownLatch1, countDownLatch2, subList); LucenePath.ARTICLE_INDEX_PATH, i, countDownLatch1, countDownLatch2, subList);
// 子线程交给线程池管理 // 子线程交给线程池管理
pool.execute(runnable); pool.execute(runnable);
} }
countDownLatch1.countDown(); countDownLatch1.countDown();
System.out.println("开始创建索引"); System.out.println("开始创建索引");
// 等待所有线程都完成 // 等待所有线程都完成
countDownLatch2.await(); countDownLatch2.await();
// 线程全部完成工作 // 线程全部完成工作
System.out.println("所有线程都创建索引完毕"); System.out.println("所有线程都创建索引完毕");
// 释放线程池资源 // 释放线程池资源
pool.shutdown(); pool.shutdown();
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
}
}
@Override
public void writeArticle(String id) {
writeArticle(luceneMapper.getById(id));
}
@Override
public void writeArticle(ArticleLucene articleLucene) {
ArticleIndexUtil.addIndex(articleLucene);
}
@Override
public void updateArticle(String id) {
ArticleIndexUtil.updateIndex(luceneMapper.getById(id));
}
@Override
public void deleteArticle(String id) {
ArticleIndexUtil.deleteIndex(id);
}
/**
* 关键词搜索
*
* @param value
* @return
* @throws Exception
*/
@Override
public List<ArticleLucene> searchArticle(String value) {
List<ArticleLucene> resList = new ArrayList<>();
ExecutorService service = Executors.newCachedThreadPool();
// 定义分词器
Analyzer analyzer = new IKAnalyzer();
try {
IndexSearcher searcher =
SearchUtil.getIndexSearcherByParentPath(LucenePath.ARTICLE_INDEX_PATH, service);
String[] fields = {"title", "summary"};
// 构造Query对象
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
BufferedReader in =
new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
String line = value != null ? value : in.readLine();
Query query = parser.parse(line);
// 最终被分词后添加的前缀和后缀处理器默认是粗体<B></B>
SimpleHTMLFormatter htmlFormatter =
new SimpleHTMLFormatter("<font color=" + "\"" + "red" + "\"" + ">", "</font>");
// 高亮搜索的词添加到高亮处理器中
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
// 获取搜索的结果指定返回document返回的个数
// TODO 默认搜索结果为显示第一页1000 可以优化
TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query);
ScoreDoc[] hits = results.scoreDocs;
// 遍历输出
for (ScoreDoc hit : hits) {
int id = hit.doc;
float score = hit.score;
Document hitDoc = searcher.doc(hit.doc);
// 获取到summary
String name = hitDoc.get("summary");
// 将查询的词和搜索词匹配匹配到添加前缀和后缀
TokenStream tokenStream =
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "summary", analyzer);
// 传入的第二个参数是查询的值
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10);
StringBuilder baikeValue = new StringBuilder();
for (TextFragment textFragment : frag) {
if ((textFragment != null) && (textFragment.getScore() > 0)) {
// if ((frag[j] != null)) {
// 获取 summary 的值
baikeValue.append(textFragment);
}
} }
// 获取到title
String title = hitDoc.get("title");
TokenStream titleTokenStream =
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "title", analyzer);
TextFragment[] titleFrag =
highlighter.getBestTextFragments(titleTokenStream, title, false, 10);
StringBuilder titleValue = new StringBuilder();
for (int j = 0; j < titleFrag.length; j++) {
if ((frag[j] != null)) {
titleValue.append(titleFrag[j].toString());
}
}
resList.add(
ArticleLucene.builder()
.idArticle(hitDoc.get("id"))
.articleTitle(titleValue.toString())
.articleContent(baikeValue.toString())
.score(String.valueOf(score))
.build());
}
} catch (IOException | ParseException | InvalidTokenOffsetsException e) {
e.printStackTrace();
} finally {
service.shutdownNow();
} }
return resList;
}
@Override @Override
public List<ArticleLucene> getAllArticleLucene() { public void writeArticle(String id) {
return luceneMapper.getAllArticleLucene(); writeArticle(luceneMapper.getById(id));
} }
@Override @Override
public List<ArticleDTO> getArticlesByIds(String[] ids) { public void writeArticle(ArticleLucene articleLucene) {
return luceneMapper.getArticlesByIds(ids); ArticleIndexUtil.addIndex(articleLucene);
} }
@Override
public void updateArticle(String id) {
ArticleIndexUtil.updateIndex(luceneMapper.getById(id));
}
@Override
public void deleteArticle(String id) {
ArticleIndexUtil.deleteIndex(id);
}
/**
* 关键词搜索
*
* @param value
* @return
* @throws Exception
*/
@Override
public List<ArticleLucene> searchArticle(String value) {
List<ArticleLucene> resList = new ArrayList<>();
ExecutorService service = Executors.newCachedThreadPool();
// 定义分词器
Analyzer analyzer = new IKAnalyzer();
try {
IndexSearcher searcher =
SearchUtil.getIndexSearcherByParentPath(LucenePath.ARTICLE_INDEX_PATH, service);
String[] fields = {"title", "summary"};
// 构造Query对象
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
BufferedReader in =
new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
String line = value != null ? value : in.readLine();
Query query = parser.parse(line);
// 最终被分词后添加的前缀和后缀处理器默认是粗体<B></B>
SimpleHTMLFormatter htmlFormatter =
new SimpleHTMLFormatter("<font color=" + "\"" + "red" + "\"" + ">", "</font>");
// 高亮搜索的词添加到高亮处理器中
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
// 获取搜索的结果指定返回document返回的个数
// TODO 默认搜索结果为显示第一页1000 可以优化
TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query);
ScoreDoc[] hits = results.scoreDocs;
// 遍历输出
for (ScoreDoc hit : hits) {
int id = hit.doc;
float score = hit.score;
Document hitDoc = searcher.doc(hit.doc);
// 获取到summary
String name = hitDoc.get("summary");
// 将查询的词和搜索词匹配匹配到添加前缀和后缀
TokenStream tokenStream =
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "summary", analyzer);
// 传入的第二个参数是查询的值
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10);
StringBuilder baikeValue = new StringBuilder();
for (TextFragment textFragment : frag) {
if ((textFragment != null) && (textFragment.getScore() > 0)) {
// if ((frag[j] != null)) {
// 获取 summary 的值
baikeValue.append(textFragment);
}
}
// 获取到title
String title = hitDoc.get("title");
TokenStream titleTokenStream =
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "title", analyzer);
TextFragment[] titleFrag =
highlighter.getBestTextFragments(titleTokenStream, title, false, 10);
StringBuilder titleValue = new StringBuilder();
for (int j = 0; j < titleFrag.length; j++) {
if ((frag[j] != null)) {
titleValue.append(titleFrag[j].toString());
}
}
resList.add(
ArticleLucene.builder()
.idArticle(hitDoc.get("id"))
.articleTitle(titleValue.toString())
.articleContent(baikeValue.toString())
.score(String.valueOf(score))
.build());
}
} catch (IOException | ParseException | InvalidTokenOffsetsException e) {
e.printStackTrace();
} finally {
service.shutdownNow();
}
return resList;
}
@Override
public List<ArticleLucene> getAllArticleLucene() {
List<ArticleLucene> list = luceneMapper.getAllArticleLucene();
for (ArticleLucene articleLucene : list) {
articleLucene.setArticleContent(Html2TextUtil.getContent(articleLucene.getArticleContent()));
}
return list;
}
@Override
public List<ArticleDTO> getArticlesByIds(String[] ids) {
return luceneMapper.getArticlesByIds(ids);
}
} }

View File

@ -32,7 +32,7 @@
<result column="article_sponsor_count" property="articleSponsorCount"></result> <result column="article_sponsor_count" property="articleSponsorCount"></result>
</resultMap> </resultMap>
<select id="getAllArticleLucene" resultMap="ResultMapWithBLOBs"> <select id="getAllArticleLucene" resultMap="ResultMapWithBLOBs">
select art.id, art.article_title, content.article_content select art.id, art.article_title, content.article_content_html as article_content
from forest_article art from forest_article art
join forest_article_content content on art.id = content.id_article join forest_article_content content on art.id = content.id_article
where article_status = 0; where article_status = 0;