Merge branch 'wx-dev' of https://github.com/rymcu/forest into wx-dev
This commit is contained in:
commit
ca05c30630
@ -9,6 +9,7 @@ import com.rymcu.forest.lucene.service.LuceneService;
|
|||||||
import com.rymcu.forest.lucene.util.ArticleIndexUtil;
|
import com.rymcu.forest.lucene.util.ArticleIndexUtil;
|
||||||
import com.rymcu.forest.lucene.util.LucenePath;
|
import com.rymcu.forest.lucene.util.LucenePath;
|
||||||
import com.rymcu.forest.lucene.util.SearchUtil;
|
import com.rymcu.forest.lucene.util.SearchUtil;
|
||||||
|
import com.rymcu.forest.util.Html2TextUtil;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
@ -41,157 +42,162 @@ import java.util.concurrent.Executors;
|
|||||||
@Service
|
@Service
|
||||||
public class LuceneServiceImpl implements LuceneService {
|
public class LuceneServiceImpl implements LuceneService {
|
||||||
|
|
||||||
@Resource private ArticleLuceneMapper luceneMapper;
|
@Resource
|
||||||
|
private ArticleLuceneMapper luceneMapper;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 将文章的数据解析为一个个关键字词存储到索引文件中
|
* 将文章的数据解析为一个个关键字词存储到索引文件中
|
||||||
*
|
*
|
||||||
* @param list
|
* @param list
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void writeArticle(List<ArticleLucene> list) {
|
public void writeArticle(List<ArticleLucene> list) {
|
||||||
try {
|
try {
|
||||||
int totalCount = list.size();
|
int totalCount = list.size();
|
||||||
int perThreadCount = 3000;
|
int perThreadCount = 3000;
|
||||||
int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1);
|
int threadCount = totalCount / perThreadCount + (totalCount % perThreadCount == 0 ? 0 : 1);
|
||||||
ExecutorService pool = Executors.newFixedThreadPool(threadCount);
|
ExecutorService pool = Executors.newFixedThreadPool(threadCount);
|
||||||
CountDownLatch countDownLatch1 = new CountDownLatch(1);
|
CountDownLatch countDownLatch1 = new CountDownLatch(1);
|
||||||
CountDownLatch countDownLatch2 = new CountDownLatch(threadCount);
|
CountDownLatch countDownLatch2 = new CountDownLatch(threadCount);
|
||||||
|
|
||||||
for (int i = 0; i < threadCount; i++) {
|
for (int i = 0; i < threadCount; i++) {
|
||||||
int start = i * perThreadCount;
|
int start = i * perThreadCount;
|
||||||
int end = Math.min((i + 1) * perThreadCount, totalCount);
|
int end = Math.min((i + 1) * perThreadCount, totalCount);
|
||||||
List<ArticleLucene> subList = list.subList(start, end);
|
List<ArticleLucene> subList = list.subList(start, end);
|
||||||
Runnable runnable =
|
Runnable runnable =
|
||||||
new ArticleBeanIndex(
|
new ArticleBeanIndex(
|
||||||
LucenePath.ARTICLE_INDEX_PATH, i, countDownLatch1, countDownLatch2, subList);
|
LucenePath.ARTICLE_INDEX_PATH, i, countDownLatch1, countDownLatch2, subList);
|
||||||
// 子线程交给线程池管理
|
// 子线程交给线程池管理
|
||||||
pool.execute(runnable);
|
pool.execute(runnable);
|
||||||
}
|
}
|
||||||
countDownLatch1.countDown();
|
countDownLatch1.countDown();
|
||||||
System.out.println("开始创建索引");
|
System.out.println("开始创建索引");
|
||||||
// 等待所有线程都完成
|
// 等待所有线程都完成
|
||||||
countDownLatch2.await();
|
countDownLatch2.await();
|
||||||
// 线程全部完成工作
|
// 线程全部完成工作
|
||||||
System.out.println("所有线程都创建索引完毕");
|
System.out.println("所有线程都创建索引完毕");
|
||||||
// 释放线程池资源
|
// 释放线程池资源
|
||||||
pool.shutdown();
|
pool.shutdown();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void writeArticle(String id) {
|
|
||||||
writeArticle(luceneMapper.getById(id));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void writeArticle(ArticleLucene articleLucene) {
|
|
||||||
ArticleIndexUtil.addIndex(articleLucene);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void updateArticle(String id) {
|
|
||||||
ArticleIndexUtil.updateIndex(luceneMapper.getById(id));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void deleteArticle(String id) {
|
|
||||||
ArticleIndexUtil.deleteIndex(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 关键词搜索
|
|
||||||
*
|
|
||||||
* @param value
|
|
||||||
* @return
|
|
||||||
* @throws Exception
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public List<ArticleLucene> searchArticle(String value) {
|
|
||||||
List<ArticleLucene> resList = new ArrayList<>();
|
|
||||||
ExecutorService service = Executors.newCachedThreadPool();
|
|
||||||
// 定义分词器
|
|
||||||
Analyzer analyzer = new IKAnalyzer();
|
|
||||||
try {
|
|
||||||
IndexSearcher searcher =
|
|
||||||
SearchUtil.getIndexSearcherByParentPath(LucenePath.ARTICLE_INDEX_PATH, service);
|
|
||||||
String[] fields = {"title", "summary"};
|
|
||||||
// 构造Query对象
|
|
||||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
|
|
||||||
|
|
||||||
BufferedReader in =
|
|
||||||
new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
|
|
||||||
String line = value != null ? value : in.readLine();
|
|
||||||
Query query = parser.parse(line);
|
|
||||||
// 最终被分词后添加的前缀和后缀处理器,默认是粗体<B></B>
|
|
||||||
SimpleHTMLFormatter htmlFormatter =
|
|
||||||
new SimpleHTMLFormatter("<font color=" + "\"" + "red" + "\"" + ">", "</font>");
|
|
||||||
// 高亮搜索的词添加到高亮处理器中
|
|
||||||
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
|
|
||||||
|
|
||||||
// 获取搜索的结果,指定返回document返回的个数
|
|
||||||
// TODO 默认搜索结果为显示第一页,1000 条,可以优化
|
|
||||||
TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query);
|
|
||||||
ScoreDoc[] hits = results.scoreDocs;
|
|
||||||
|
|
||||||
// 遍历,输出
|
|
||||||
for (ScoreDoc hit : hits) {
|
|
||||||
int id = hit.doc;
|
|
||||||
float score = hit.score;
|
|
||||||
Document hitDoc = searcher.doc(hit.doc);
|
|
||||||
// 获取到summary
|
|
||||||
String name = hitDoc.get("summary");
|
|
||||||
// 将查询的词和搜索词匹配,匹配到添加前缀和后缀
|
|
||||||
TokenStream tokenStream =
|
|
||||||
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "summary", analyzer);
|
|
||||||
// 传入的第二个参数是查询的值
|
|
||||||
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10);
|
|
||||||
StringBuilder baikeValue = new StringBuilder();
|
|
||||||
for (TextFragment textFragment : frag) {
|
|
||||||
if ((textFragment != null) && (textFragment.getScore() > 0)) {
|
|
||||||
// if ((frag[j] != null)) {
|
|
||||||
// 获取 summary 的值
|
|
||||||
baikeValue.append(textFragment);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 获取到title
|
|
||||||
String title = hitDoc.get("title");
|
|
||||||
TokenStream titleTokenStream =
|
|
||||||
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "title", analyzer);
|
|
||||||
TextFragment[] titleFrag =
|
|
||||||
highlighter.getBestTextFragments(titleTokenStream, title, false, 10);
|
|
||||||
StringBuilder titleValue = new StringBuilder();
|
|
||||||
for (int j = 0; j < titleFrag.length; j++) {
|
|
||||||
if ((frag[j] != null)) {
|
|
||||||
titleValue.append(titleFrag[j].toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
resList.add(
|
|
||||||
ArticleLucene.builder()
|
|
||||||
.idArticle(hitDoc.get("id"))
|
|
||||||
.articleTitle(titleValue.toString())
|
|
||||||
.articleContent(baikeValue.toString())
|
|
||||||
.score(String.valueOf(score))
|
|
||||||
.build());
|
|
||||||
}
|
|
||||||
} catch (IOException | ParseException | InvalidTokenOffsetsException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
} finally {
|
|
||||||
service.shutdownNow();
|
|
||||||
}
|
}
|
||||||
return resList;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<ArticleLucene> getAllArticleLucene() {
|
public void writeArticle(String id) {
|
||||||
return luceneMapper.getAllArticleLucene();
|
writeArticle(luceneMapper.getById(id));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<ArticleDTO> getArticlesByIds(String[] ids) {
|
public void writeArticle(ArticleLucene articleLucene) {
|
||||||
return luceneMapper.getArticlesByIds(ids);
|
ArticleIndexUtil.addIndex(articleLucene);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void updateArticle(String id) {
|
||||||
|
ArticleIndexUtil.updateIndex(luceneMapper.getById(id));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void deleteArticle(String id) {
|
||||||
|
ArticleIndexUtil.deleteIndex(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 关键词搜索
|
||||||
|
*
|
||||||
|
* @param value
|
||||||
|
* @return
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public List<ArticleLucene> searchArticle(String value) {
|
||||||
|
List<ArticleLucene> resList = new ArrayList<>();
|
||||||
|
ExecutorService service = Executors.newCachedThreadPool();
|
||||||
|
// 定义分词器
|
||||||
|
Analyzer analyzer = new IKAnalyzer();
|
||||||
|
try {
|
||||||
|
IndexSearcher searcher =
|
||||||
|
SearchUtil.getIndexSearcherByParentPath(LucenePath.ARTICLE_INDEX_PATH, service);
|
||||||
|
String[] fields = {"title", "summary"};
|
||||||
|
// 构造Query对象
|
||||||
|
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
|
||||||
|
|
||||||
|
BufferedReader in =
|
||||||
|
new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
|
||||||
|
String line = value != null ? value : in.readLine();
|
||||||
|
Query query = parser.parse(line);
|
||||||
|
// 最终被分词后添加的前缀和后缀处理器,默认是粗体<B></B>
|
||||||
|
SimpleHTMLFormatter htmlFormatter =
|
||||||
|
new SimpleHTMLFormatter("<font color=" + "\"" + "red" + "\"" + ">", "</font>");
|
||||||
|
// 高亮搜索的词添加到高亮处理器中
|
||||||
|
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
|
||||||
|
|
||||||
|
// 获取搜索的结果,指定返回document返回的个数
|
||||||
|
// TODO 默认搜索结果为显示第一页,1000 条,可以优化
|
||||||
|
TopDocs results = SearchUtil.getScoreDocsByPerPage(1, 100, searcher, query);
|
||||||
|
ScoreDoc[] hits = results.scoreDocs;
|
||||||
|
|
||||||
|
// 遍历,输出
|
||||||
|
for (ScoreDoc hit : hits) {
|
||||||
|
int id = hit.doc;
|
||||||
|
float score = hit.score;
|
||||||
|
Document hitDoc = searcher.doc(hit.doc);
|
||||||
|
// 获取到summary
|
||||||
|
String name = hitDoc.get("summary");
|
||||||
|
// 将查询的词和搜索词匹配,匹配到添加前缀和后缀
|
||||||
|
TokenStream tokenStream =
|
||||||
|
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "summary", analyzer);
|
||||||
|
// 传入的第二个参数是查询的值
|
||||||
|
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, name, false, 10);
|
||||||
|
StringBuilder baikeValue = new StringBuilder();
|
||||||
|
for (TextFragment textFragment : frag) {
|
||||||
|
if ((textFragment != null) && (textFragment.getScore() > 0)) {
|
||||||
|
// if ((frag[j] != null)) {
|
||||||
|
// 获取 summary 的值
|
||||||
|
baikeValue.append(textFragment);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获取到title
|
||||||
|
String title = hitDoc.get("title");
|
||||||
|
TokenStream titleTokenStream =
|
||||||
|
TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "title", analyzer);
|
||||||
|
TextFragment[] titleFrag =
|
||||||
|
highlighter.getBestTextFragments(titleTokenStream, title, false, 10);
|
||||||
|
StringBuilder titleValue = new StringBuilder();
|
||||||
|
for (int j = 0; j < titleFrag.length; j++) {
|
||||||
|
if ((frag[j] != null)) {
|
||||||
|
titleValue.append(titleFrag[j].toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resList.add(
|
||||||
|
ArticleLucene.builder()
|
||||||
|
.idArticle(hitDoc.get("id"))
|
||||||
|
.articleTitle(titleValue.toString())
|
||||||
|
.articleContent(baikeValue.toString())
|
||||||
|
.score(String.valueOf(score))
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
} catch (IOException | ParseException | InvalidTokenOffsetsException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
service.shutdownNow();
|
||||||
|
}
|
||||||
|
return resList;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ArticleLucene> getAllArticleLucene() {
|
||||||
|
List<ArticleLucene> list = luceneMapper.getAllArticleLucene();
|
||||||
|
for (ArticleLucene articleLucene : list) {
|
||||||
|
articleLucene.setArticleContent(Html2TextUtil.getContent(articleLucene.getArticleContent()));
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ArticleDTO> getArticlesByIds(String[] ids) {
|
||||||
|
return luceneMapper.getArticlesByIds(ids);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -32,7 +32,7 @@
|
|||||||
<result column="article_sponsor_count" property="articleSponsorCount"></result>
|
<result column="article_sponsor_count" property="articleSponsorCount"></result>
|
||||||
</resultMap>
|
</resultMap>
|
||||||
<select id="getAllArticleLucene" resultMap="ResultMapWithBLOBs">
|
<select id="getAllArticleLucene" resultMap="ResultMapWithBLOBs">
|
||||||
select art.id, art.article_title, content.article_content
|
select art.id, art.article_title, content.article_content_html as article_content
|
||||||
from forest_article art
|
from forest_article art
|
||||||
join forest_article_content content on art.id = content.id_article
|
join forest_article_content content on art.id = content.id_article
|
||||||
where article_status = 0;
|
where article_status = 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user