feat: lucene字典配置

- lucene字典配置服务
- 字典更改后自动加载
- 删除无用文件
- 删除无用依赖
This commit is contained in:
suwen 2021-02-04 16:35:48 +08:00
parent b9bf3b43f5
commit 40a6e06cb4
9 changed files with 101 additions and 41 deletions

Binary file not shown.

View File

@ -226,19 +226,12 @@
<version>${lucene.version}</version> <version>${lucene.version}</version>
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-suggest -->
<dependency> <dependency>
<groupId>org.apache.lucene</groupId> <groupId>org.apache.lucene</groupId>
<artifactId>lucene-suggest</artifactId> <artifactId>lucene-suggest</artifactId>
<version>${lucene.version}</version> <version>${lucene.version}</version>
</dependency> </dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.5.8</version>
</dependency>
</dependencies> </dependencies>
<build> <build>

View File

@ -8,6 +8,7 @@ import lombok.extern.log4j.Log4j2;
import org.springframework.web.bind.annotation.*; import org.springframework.web.bind.annotation.*;
import javax.annotation.Resource; import javax.annotation.Resource;
import java.io.FileNotFoundException;
/** /**
* UserDicController * UserDicController
@ -34,8 +35,14 @@ public class UserDicController {
return GlobalResultGenerator.genSuccessResult(dicService.getAllDic()); return GlobalResultGenerator.genSuccessResult(dicService.getAllDic());
} }
@PostMapping("/addDic") @GetMapping("/loadUserDic")
public GlobalResult addDic(@RequestBody String dic) { public GlobalResult loadUserDic() throws FileNotFoundException {
dicService.writeUserDic();
return GlobalResultGenerator.genSuccessResult("加载用户自定义字典成功");
}
@PostMapping("/addDic/{dic}")
public GlobalResult addDic(@PathVariable String dic) {
dicService.addDic(dic); dicService.addDic(dic);
return GlobalResultGenerator.genSuccessResult("新增字典成功"); return GlobalResultGenerator.genSuccessResult("新增字典成功");
} }

View File

@ -28,23 +28,21 @@ import java.util.List;
@Component @Component
public class DefaultConfig implements Configuration { public class DefaultConfig implements Configuration {
/** /** 分词器默认字典路径 */
* 分词器默认字典路径
*/
private static final String PATH_DIC_MAIN = "lucene/main2012.dic"; private static final String PATH_DIC_MAIN = "lucene/main2012.dic";
private static final String PATH_DIC_QUANTIFIER = "lucene/quantifier.dic"; private static final String PATH_DIC_QUANTIFIER = "lucene/quantifier.dic";
private static final String PATH_USER_DIC =
System.getProperty("user.dir") + "/lucene/userDic/userDic.dic";
/** 分词器配置文件路径 */
/**
* 分词器配置文件路径
*/
private static final String FILE_NAME = "IKAnalyzer.cfg.xml"; private static final String FILE_NAME = "IKAnalyzer.cfg.xml";
// 配置属性扩展字典 // 配置属性扩展字典
private static final String EXT_DICT = "ext_dic"; private static final String EXT_DICT = "ext_dic";
// 配置属性扩展停止词典 // 配置属性扩展停止词典
private static final String EXT_STOP = "ext_stopword"; private static final String EXT_STOP = "ext_stopword";
private String extDic = "lucene/ext.dic"; private String extDic = "lucene/ext.dic;" + PATH_USER_DIC;
private String extStopword = "lucene/stopword.dic"; private String extStopword = "lucene/stopword.dic";
/* /*
@ -141,19 +139,4 @@ public class DefaultConfig implements Configuration {
return extStopWordDictFiles; return extStopWordDictFiles;
} }
public String getExtDic() {
return extDic;
}
public void setExtDic(String extDic) {
this.extDic = extDic;
}
public String getExtStopword() {
return extStopword;
}
public void setExtStopword(String extStopword) {
this.extStopword = extStopword;
}
} }

View File

@ -23,10 +23,7 @@ import com.rymcu.forest.lucene.cfg.Configuration;
import org.springframework.core.io.ClassPathResource; import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource; import org.springframework.core.io.Resource;
import java.io.BufferedReader; import java.io.*;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
@ -53,6 +50,9 @@ public class Dictionary {
*/ */
private DictSegment _QuantifierDict; private DictSegment _QuantifierDict;
private static final String PATH_USER_DIC =
System.getProperty("user.dir") + "/lucene/userDic/userDic.dic";
/** 配置对象 */ /** 配置对象 */
private Configuration cfg; private Configuration cfg;
@ -217,7 +217,11 @@ public class Dictionary {
is = this.getClass().getClassLoader().getResourceAsStream(extDictName); is = this.getClass().getClassLoader().getResourceAsStream(extDictName);
// 如果找不到扩展的字典则忽略 // 如果找不到扩展的字典则忽略
if (is == null) { if (is == null) {
continue; try {
is = new FileInputStream(extDictName);
} catch (FileNotFoundException e) {
continue;
}
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
@ -226,7 +230,7 @@ public class Dictionary {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
// 加载扩展词典数据到主内存词典中 // 加载扩展词典数据到主内存词典中
// System.out.println(theWord); System.out.println(theWord);
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); _MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
@ -330,4 +334,40 @@ public class Dictionary {
} }
} }
} }
/** 加载用户配置的自定义扩展词典到主词库表 */
public void updateUserDict() {
// 加载扩展词典配置
InputStream is;
// 读取扩展词典文件
System.out.println("更新加载扩展词典:" + PATH_USER_DIC);
try {
is = new FileInputStream(PATH_USER_DIC);
} catch (FileNotFoundException e) {
return;
}
try {
BufferedReader br =
new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
String theWord = null;
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
// 加载扩展词典数据到主内存词典中
System.out.println(theWord);
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
}
} while (theWord != null);
} catch (IOException ioe) {
System.err.println("Extension Dictionary loading exception.");
ioe.printStackTrace();
} finally {
try {
is.close();
is = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}
} }

View File

@ -2,6 +2,7 @@ package com.rymcu.forest.lucene.service;
import com.rymcu.forest.lucene.model.UserDic; import com.rymcu.forest.lucene.model.UserDic;
import java.io.FileNotFoundException;
import java.util.List; import java.util.List;
/** /**
@ -39,10 +40,17 @@ public interface UserDicService {
* @param id * @param id
*/ */
void deleteDic(String id); void deleteDic(String id);
/** /**
* 更新字典 * 更新字典
* *
* @param userDic * @param userDic
*/ */
void updateDic(UserDic userDic); void updateDic(UserDic userDic);
/**
* 写入字典至内存
*
*/
void writeUserDic() throws FileNotFoundException;
} }

View File

@ -44,7 +44,7 @@ public class LuceneServiceImpl implements LuceneService {
@Resource private ArticleLuceneMapper luceneMapper; @Resource private ArticleLuceneMapper luceneMapper;
/** Lucene索引文件路径 */ /** Lucene索引文件路径 */
private final String indexPath = System.getProperty("user.dir") + "/index"; private final String indexPath = System.getProperty("user.dir") + "/lucene/index";
/** /**
* 将文章的数据解析为一个个关键字词存储到索引文件中 * 将文章的数据解析为一个个关键字词存储到索引文件中
@ -66,7 +66,7 @@ public class LuceneServiceImpl implements LuceneService {
int end = Math.min((i + 1) * perThreadCount, totalCount); int end = Math.min((i + 1) * perThreadCount, totalCount);
List<ArticleLucene> subList = list.subList(start, end); List<ArticleLucene> subList = list.subList(start, end);
Runnable runnable = Runnable runnable =
new ArticleBeanIndex("index", i, countDownLatch1, countDownLatch2, subList); new ArticleBeanIndex("lucene/index", i, countDownLatch1, countDownLatch2, subList);
// 子线程交给线程池管理 // 子线程交给线程池管理
pool.execute(runnable); pool.execute(runnable);
} }

View File

@ -7,6 +7,8 @@ import com.rymcu.forest.lucene.service.UserDicService;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import javax.annotation.Resource; import javax.annotation.Resource;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.List; import java.util.List;
/** /**
@ -19,7 +21,9 @@ import java.util.List;
public class UserDicServiceImpl implements UserDicService { public class UserDicServiceImpl implements UserDicService {
@Resource private UserDicMapper userDicMapper; @Resource private UserDicMapper userDicMapper;
@Resource private Dictionary dictionary;
/** Lucene索引文件路径 */
private final String dicPath = System.getProperty("user.dir") + "/lucene/userDic/userDic.dic";
@Override @Override
public List<String> getAllDic() { public List<String> getAllDic() {
@ -35,15 +39,40 @@ public class UserDicServiceImpl implements UserDicService {
@Override @Override
public void addDic(String dic) { public void addDic(String dic) {
userDicMapper.addDic(dic); userDicMapper.addDic(dic);
writeUserDic();
} }
@Override @Override
public void deleteDic(String id) { public void deleteDic(String id) {
userDicMapper.deleteDic(id); userDicMapper.deleteDic(id);
writeUserDic();
} }
@Override @Override
public void updateDic(UserDic userDic) { public void updateDic(UserDic userDic) {
userDicMapper.updateDic(userDic.getId(), userDic.getDic()); userDicMapper.updateDic(userDic.getId(), userDic.getDic());
writeUserDic();
}
@Override
public void writeUserDic() {
try {
File file = new File(dicPath);
FileOutputStream stream = new FileOutputStream(file, false);
OutputStreamWriter outfw = new OutputStreamWriter(stream, StandardCharsets.UTF_8);
PrintWriter fw = new PrintWriter(new BufferedWriter(outfw));
userDicMapper
.getAllDic()
.forEach(
each -> {
fw.write(each);
fw.write("\r\n");
});
fw.flush();
fw.close();
Dictionary.getSingleton().updateUserDict();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
} }
} }