commit
9874c0c1fd
2
.gitignore
vendored
2
.gitignore
vendored
@ -33,4 +33,4 @@ build/
|
||||
|
||||
### lucene ###
|
||||
index
|
||||
userDic
|
||||
userDic
|
12
README.md
12
README.md
@ -1,8 +1,10 @@
|
||||
# forest
|
||||
 [](https://app.fossa.com/projects/git%2Bgithub.com%2Frymcu%2Fforest?ref=badge_shield)
|
||||
|
||||

|
||||
|
||||
下一代的知识社区系统,为未来而建
|
||||
|
||||
[](https://app.fossa.com/projects/git%2Bgithub.com%2Frymcu%2Fforest?ref=badge_shield)
|
||||
|
||||
## 💡 简介
|
||||
|
||||
forest([ˈfôrəst],n.森林)是一款现代化的知识社区项目,使用 SpringBoot + Shiro + MyBatis + JWT + Redis 实现。
|
||||
@ -96,6 +98,12 @@ forest([ˈfôrəst],n.森林)是一款现代化的知识社区项目,使
|
||||
在提功能建议前可以先看一下 [计划表](https://rymcu.com/article/29) ,避免重复提议
|
||||
|
||||
## 鸣谢
|
||||
- 感谢以下开发者对 Forest 作出的贡献:
|
||||
|
||||
<a href="https://github.com/rymcu/forest/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=rymcu/forest&max=1000" />
|
||||
</a>
|
||||
|
||||
- 感谢 `JetBrains` 对本项目的帮助,为作者提供了开源许可版 `JetBrains` 全家桶
|
||||
|
||||

|
||||
|
@ -25,7 +25,7 @@ public class AnswerController {
|
||||
@GetMapping("/today")
|
||||
public GlobalResult today() {
|
||||
User user = UserUtils.getCurrentUserByToken();
|
||||
String result = HttpUtils.sendGet(ANSWER_API_URL + "/record/" + user.getIdUser() );
|
||||
String result = HttpUtils.sendGet(ANSWER_API_URL + "/record/" + user.getIdUser());
|
||||
return JSONObject.parseObject(result, GlobalResult.class);
|
||||
}
|
||||
|
||||
@ -43,7 +43,7 @@ public class AnswerController {
|
||||
|
||||
@GetMapping("/get-answer")
|
||||
public GlobalResult getAnswer(Integer idSubjectQuestion) {
|
||||
String result = HttpUtils.sendGet(ANSWER_API_URL + "/show-answer/" + idSubjectQuestion );
|
||||
String result = HttpUtils.sendGet(ANSWER_API_URL + "/show-answer/" + idSubjectQuestion);
|
||||
return JSONObject.parseObject(result, GlobalResult.class);
|
||||
}
|
||||
}
|
||||
|
@ -6,21 +6,20 @@ package com.rymcu.forest.auth;
|
||||
public class JwtConstants {
|
||||
|
||||
|
||||
/**
|
||||
* 上线需要变更
|
||||
*/
|
||||
public static final String JWT_SECRET = "JYJ5Qv2WF4lA6jPl5GKuAG";
|
||||
|
||||
/**
|
||||
* 上线需要变更
|
||||
*/
|
||||
public static final String JWT_SECRET = "JYJ5Qv2WF4lA6jPl5GKuAG";
|
||||
public static final String AUTHORIZATION = "Authorization";
|
||||
public static final String UPLOAD_TOKEN = "X-Upload-Token";
|
||||
public static final String CURRENT_USER_NAME = "CURRENT_TOKEN_USER_NAME";
|
||||
public static final String CURRENT_TOKEN_CLAIMS = "CURRENT_TOKEN_CLAIMS";
|
||||
public static final String LAST_ONLINE = "last_online:";
|
||||
|
||||
public static final String AUTHORIZATION = "Authorization";
|
||||
public static final String UPLOAD_TOKEN = "X-Upload-Token";
|
||||
public static final String CURRENT_USER_NAME = "CURRENT_TOKEN_USER_NAME";
|
||||
public static final String CURRENT_TOKEN_CLAIMS = "CURRENT_TOKEN_CLAIMS";
|
||||
public static final String LAST_ONLINE = "last_online:";
|
||||
|
||||
public static final long TOKEN_EXPIRES_HOUR = 2;
|
||||
public static final long LAST_ONLINE_EXPIRES_MINUTE = 10;
|
||||
public static final long TOKEN_EXPIRES_MINUTE = 15;
|
||||
public static final long REFRESH_TOKEN_EXPIRES_HOUR = 2;
|
||||
public static final long TOKEN_EXPIRES_HOUR = 2;
|
||||
public static final long LAST_ONLINE_EXPIRES_MINUTE = 10;
|
||||
public static final long TOKEN_EXPIRES_MINUTE = 15;
|
||||
public static final long REFRESH_TOKEN_EXPIRES_HOUR = 2;
|
||||
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* 通过Redis存储和验证token的实现类
|
||||
*
|
||||
* @author ScienJus
|
||||
* @date 2015/7/31.
|
||||
*/
|
||||
@ -56,7 +57,7 @@ public class RedisTokenManager implements TokenManager {
|
||||
}
|
||||
StringBuilder key = new StringBuilder();
|
||||
key.append(JwtConstants.LAST_ONLINE).append(model.getUsername());
|
||||
String result = redisTemplate.boundValueOps(key.toString()).get();
|
||||
String result = redisTemplate.boundValueOps(key.toString()).get();
|
||||
if (StringUtils.isBlank(result)) {
|
||||
// 更新最后在线时间
|
||||
applicationEventPublisher.publishEvent(new AccountEvent(model.getUsername()));
|
||||
|
@ -2,6 +2,7 @@ package com.rymcu.forest.auth;
|
||||
|
||||
/**
|
||||
* 对token进行操作的接口
|
||||
*
|
||||
* @author ScienJus
|
||||
* @date 2015/7/31.
|
||||
*/
|
||||
@ -9,6 +10,7 @@ public interface TokenManager {
|
||||
|
||||
/**
|
||||
* 创建一个token关联上指定用户
|
||||
*
|
||||
* @param id
|
||||
* @return 生成的token
|
||||
*/
|
||||
@ -16,6 +18,7 @@ public interface TokenManager {
|
||||
|
||||
/**
|
||||
* 检查token是否有效
|
||||
*
|
||||
* @param model token
|
||||
* @return 是否有效
|
||||
*/
|
||||
@ -23,6 +26,7 @@ public interface TokenManager {
|
||||
|
||||
/**
|
||||
* 从字符串中解析token
|
||||
*
|
||||
* @param token
|
||||
* @param account
|
||||
* @return
|
||||
@ -31,6 +35,7 @@ public interface TokenManager {
|
||||
|
||||
/**
|
||||
* 清除token
|
||||
*
|
||||
* @param account 登录用户账号
|
||||
*/
|
||||
public void deleteToken(String account);
|
||||
|
@ -4,6 +4,7 @@ import org.apache.shiro.authc.AuthenticationToken;
|
||||
|
||||
/**
|
||||
* Token的Model类,可以增加字段提高安全性,例如时间戳、url签名
|
||||
*
|
||||
* @author ScienJus
|
||||
* @date 2015/7/31.
|
||||
*/
|
||||
@ -23,14 +24,14 @@ public class TokenModel implements AuthenticationToken {
|
||||
}
|
||||
|
||||
public String getUsername() {
|
||||
return username;
|
||||
}
|
||||
return username;
|
||||
}
|
||||
|
||||
public void setUsername(String username) {
|
||||
this.username = username;
|
||||
}
|
||||
public void setUsername(String username) {
|
||||
this.username = username;
|
||||
}
|
||||
|
||||
public String getToken() {
|
||||
public String getToken() {
|
||||
return token;
|
||||
}
|
||||
|
||||
|
@ -6,10 +6,8 @@ import com.rymcu.forest.core.exception.ServiceException;
|
||||
import com.rymcu.forest.core.exception.TransactionException;
|
||||
import com.rymcu.forest.core.result.GlobalResult;
|
||||
import com.rymcu.forest.core.result.ResultCode;
|
||||
import com.rymcu.forest.enumerate.TransactionCode;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.shiro.authc.AccountException;
|
||||
import org.apache.shiro.authc.AuthenticationException;
|
||||
import org.apache.shiro.authc.UnknownAccountException;
|
||||
import org.apache.shiro.authz.UnauthenticatedException;
|
||||
import org.apache.shiro.authz.UnauthorizedException;
|
||||
|
@ -17,6 +17,7 @@ import static com.rymcu.forest.core.constant.ProjectConstant.*;
|
||||
|
||||
/**
|
||||
* Mybatis & Mapper & PageHelper 配置
|
||||
*
|
||||
* @author ronger
|
||||
*/
|
||||
@Configuration
|
||||
|
@ -82,7 +82,7 @@ public class ShiroConfig {
|
||||
* Shiro生命周期处理器
|
||||
*/
|
||||
@Bean
|
||||
public LifecycleBeanPostProcessor lifecycleBeanPostProcessor(){
|
||||
public LifecycleBeanPostProcessor lifecycleBeanPostProcessor() {
|
||||
return new LifecycleBeanPostProcessor();
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,9 @@
|
||||
package com.rymcu.forest.config;
|
||||
|
||||
|
||||
import com.alibaba.fastjson.serializer.SerializeConfig;
|
||||
import com.alibaba.fastjson.serializer.SerializerFeature;
|
||||
import com.alibaba.fastjson.serializer.ToStringSerializer;
|
||||
import com.alibaba.fastjson.support.config.FastJsonConfig;
|
||||
import com.alibaba.fastjson.support.spring.FastJsonHttpMessageConverter;
|
||||
import org.slf4j.Logger;
|
||||
@ -40,6 +42,11 @@ public class WebMvcConfigurer extends WebMvcConfigurationSupport {
|
||||
// SerializerFeature.WriteNullNumberAsZero);//Number null -> 0
|
||||
//关闭循环引用
|
||||
config.setSerializerFeatures(SerializerFeature.DisableCircularReferenceDetect);
|
||||
// 设置 Long 类型转为 String
|
||||
SerializeConfig serializeConfig = new SerializeConfig();
|
||||
serializeConfig.put(Long.class, ToStringSerializer.instance);
|
||||
serializeConfig.put(Long.TYPE, ToStringSerializer.instance);
|
||||
config.setSerializeConfig(serializeConfig);
|
||||
converter.setFastJsonConfig(config);
|
||||
converter.setSupportedMediaTypes(Arrays.asList(MediaType.APPLICATION_JSON));
|
||||
converter.setDefaultCharset(Charset.forName("UTF-8"));
|
||||
|
@ -14,6 +14,7 @@ import org.springframework.web.socket.config.annotation.WebSocketMessageBrokerCo
|
||||
public class WebSocketStompConfig implements WebSocketMessageBrokerConfigurer {
|
||||
/**
|
||||
* 注册stomp端点
|
||||
*
|
||||
* @param registry
|
||||
*/
|
||||
@Override
|
||||
@ -25,6 +26,7 @@ public class WebSocketStompConfig implements WebSocketMessageBrokerConfigurer {
|
||||
|
||||
/**
|
||||
* 配置信息代理
|
||||
*
|
||||
* @param registry
|
||||
*/
|
||||
@Override
|
||||
|
@ -2,6 +2,7 @@ package com.rymcu.forest.core.constant;
|
||||
|
||||
/**
|
||||
* 消息通知类型
|
||||
*
|
||||
* @author ronger
|
||||
*/
|
||||
public class NotificationConstant {
|
||||
|
@ -2,25 +2,44 @@ package com.rymcu.forest.core.constant;
|
||||
|
||||
/**
|
||||
* 项目常量
|
||||
*
|
||||
* @author ronger
|
||||
*/
|
||||
public final class ProjectConstant {
|
||||
/**当前环境*/
|
||||
/**
|
||||
* 当前环境
|
||||
*/
|
||||
public static final String ENV = "dev";
|
||||
/**项目基础包名称,根据自己公司的项目修改*/
|
||||
/**
|
||||
* 项目基础包名称,根据自己公司的项目修改
|
||||
*/
|
||||
public static final String BASE_PACKAGE = "com.rymcu.forest";
|
||||
/**DTO所在包*/
|
||||
/**
|
||||
* DTO所在包
|
||||
*/
|
||||
public static final String DTO_PACKAGE = BASE_PACKAGE + ".dto";
|
||||
/**Model所在包*/
|
||||
/**
|
||||
* Model所在包
|
||||
*/
|
||||
public static final String MODEL_PACKAGE = BASE_PACKAGE + ".entity";
|
||||
/**Mapper所在包*/
|
||||
/**
|
||||
* Mapper所在包
|
||||
*/
|
||||
public static final String MAPPER_PACKAGE = BASE_PACKAGE + ".mapper";
|
||||
/**Service所在包*/
|
||||
/**
|
||||
* Service所在包
|
||||
*/
|
||||
public static final String SERVICE_PACKAGE = BASE_PACKAGE + ".service";
|
||||
/**ServiceImpl所在包*/
|
||||
/**
|
||||
* ServiceImpl所在包
|
||||
*/
|
||||
public static final String SERVICE_IMPL_PACKAGE = SERVICE_PACKAGE + ".impl";
|
||||
/**Controller所在包*/
|
||||
/**
|
||||
* Controller所在包
|
||||
*/
|
||||
public static final String CONTROLLER_PACKAGE = BASE_PACKAGE + ".web";
|
||||
/**Mapper插件基础接口的完全限定名*/
|
||||
/**
|
||||
* Mapper插件基础接口的完全限定名
|
||||
*/
|
||||
public static final String MAPPER_INTERFACE_REFERENCE = BASE_PACKAGE + ".core.mapper.Mapper";
|
||||
}
|
||||
|
@ -7,12 +7,10 @@ import org.apache.shiro.authc.AuthenticationException;
|
||||
*
|
||||
* @author ronger
|
||||
*/
|
||||
public class CaptchaException extends AuthenticationException
|
||||
{
|
||||
public class CaptchaException extends AuthenticationException {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
public CaptchaException()
|
||||
{
|
||||
public CaptchaException() {
|
||||
super("验证码不正确");
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ import com.rymcu.forest.core.result.ResultCode;
|
||||
|
||||
/**
|
||||
* 服务(业务)异常如“ 账号或密码错误 ”,该异常只做INFO级别的日志记录 @see WebMvcConfigurer
|
||||
*
|
||||
* @author ronger
|
||||
*/
|
||||
public class ServiceException extends RuntimeException {
|
||||
@ -13,27 +14,27 @@ public class ServiceException extends RuntimeException {
|
||||
|
||||
public ServiceException(ResultCode resultCode) {
|
||||
super(resultCode.getMessage());
|
||||
this.code=resultCode.getCode();
|
||||
this.code = resultCode.getCode();
|
||||
}
|
||||
|
||||
|
||||
public ServiceException(String message, Throwable cause) {
|
||||
|
||||
}
|
||||
public ServiceException(int code, String message, String extraMessage, Throwable cause){
|
||||
super(message,cause);
|
||||
this.code=code;
|
||||
this.extraMessage=extraMessage;
|
||||
|
||||
public ServiceException(int code, String message, String extraMessage, Throwable cause) {
|
||||
super(message, cause);
|
||||
this.code = code;
|
||||
this.extraMessage = extraMessage;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public ServiceException(ResultCode resultCode, String extraMessage){
|
||||
this(resultCode.getCode(),resultCode.getMessage(),extraMessage,null);
|
||||
public ServiceException(ResultCode resultCode, String extraMessage) {
|
||||
this(resultCode.getCode(), resultCode.getMessage(), extraMessage, null);
|
||||
}
|
||||
|
||||
public ServiceException(String extraMessage){
|
||||
this(ResultCode.INVALID_PARAM,extraMessage);
|
||||
public ServiceException(String extraMessage) {
|
||||
this(ResultCode.INVALID_PARAM, extraMessage);
|
||||
}
|
||||
|
||||
|
||||
|
@ -6,6 +6,7 @@ public interface TreeMapper<T> extends Mapper<T> {
|
||||
|
||||
/**
|
||||
* 找到所有子节点
|
||||
*
|
||||
* @param entity
|
||||
* @return
|
||||
*/
|
||||
@ -13,6 +14,7 @@ public interface TreeMapper<T> extends Mapper<T> {
|
||||
|
||||
/**
|
||||
* 更新所有父节点字段
|
||||
*
|
||||
* @param entity
|
||||
* @return
|
||||
*/
|
||||
|
@ -20,7 +20,7 @@ public class GlobalResult<T> {
|
||||
this.message = resultCode.getMessage();
|
||||
}
|
||||
|
||||
public static <T> GlobalResult<T> newInstance() {
|
||||
public static <T> GlobalResult<T> newInstance() {
|
||||
return new GlobalResult();
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@ public class GlobalResultGenerator {
|
||||
|
||||
/**
|
||||
* normal
|
||||
*
|
||||
* @param success
|
||||
* @param data
|
||||
* @param message
|
||||
@ -28,6 +29,7 @@ public class GlobalResultGenerator {
|
||||
|
||||
/**
|
||||
* success
|
||||
*
|
||||
* @param data
|
||||
* @param <T>
|
||||
* @return
|
||||
@ -39,6 +41,7 @@ public class GlobalResultGenerator {
|
||||
|
||||
/**
|
||||
* error message
|
||||
*
|
||||
* @param message error message
|
||||
* @param <T>
|
||||
* @return
|
||||
@ -50,6 +53,7 @@ public class GlobalResultGenerator {
|
||||
|
||||
/**
|
||||
* error
|
||||
*
|
||||
* @param error error enum
|
||||
* @param <T>
|
||||
* @return
|
||||
@ -61,6 +65,7 @@ public class GlobalResultGenerator {
|
||||
|
||||
/**
|
||||
* success no message
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static GlobalResult genSuccessResult() {
|
||||
@ -69,6 +74,7 @@ public class GlobalResultGenerator {
|
||||
|
||||
/**
|
||||
* success
|
||||
*
|
||||
* @param <T>
|
||||
* @return
|
||||
*/
|
||||
|
@ -12,7 +12,7 @@ public enum GlobalResultMessage {
|
||||
|
||||
private String message;
|
||||
|
||||
GlobalResultMessage(String message){
|
||||
GlobalResultMessage(String message) {
|
||||
this.message = message;
|
||||
}
|
||||
}
|
||||
|
@ -20,7 +20,6 @@ import java.util.Objects;
|
||||
|
||||
/**
|
||||
* @author ronger
|
||||
*
|
||||
*/
|
||||
@Aspect
|
||||
@Component
|
||||
@ -32,7 +31,9 @@ public class TransactionAspect {
|
||||
private TransactionRecordService transactionRecordService;
|
||||
|
||||
@Pointcut("@annotation(com.rymcu.forest.core.service.log.annotation.TransactionLogger)")
|
||||
public void pointCut() {}
|
||||
public void pointCut() {
|
||||
}
|
||||
|
||||
/**
|
||||
* 保存交易操作日志
|
||||
*
|
||||
@ -40,7 +41,7 @@ public class TransactionAspect {
|
||||
* @return 方法执行结果
|
||||
* @throws Throwable 调用出错
|
||||
*/
|
||||
@AfterReturning(value = "pointCut()", returning="obj")
|
||||
@AfterReturning(value = "pointCut()", returning = "obj")
|
||||
public void save(JoinPoint joinPoint, Object obj) throws Exception {
|
||||
logger.info("保存交易记录 start ...");
|
||||
/**
|
||||
|
@ -5,6 +5,7 @@ import java.lang.annotation.RetentionPolicy;
|
||||
|
||||
/**
|
||||
* 浏览记录器
|
||||
*
|
||||
* @author ronger
|
||||
*/
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
|
@ -61,6 +61,7 @@ public class RedisResult<T> extends BaseDO {
|
||||
public void setKeyExists(boolean keyExists) {
|
||||
this.keyExists = keyExists;
|
||||
}
|
||||
|
||||
public boolean isKeyExists() {
|
||||
return keyExists;
|
||||
}
|
||||
|
@ -7,6 +7,7 @@ import java.util.Set;
|
||||
|
||||
/**
|
||||
* Redis 服务接口
|
||||
*
|
||||
* @author Jimersy Lee
|
||||
* 2017-09-18 14:58:21
|
||||
*/
|
||||
|
@ -25,7 +25,7 @@ import java.util.*;
|
||||
* Redis 服务接口实现类
|
||||
*
|
||||
* @author liwei
|
||||
* 16/10/30 下午5:28
|
||||
* 16/10/30 下午5:28
|
||||
*/
|
||||
@Component("redisService")
|
||||
@EnableConfigurationProperties({RedisProperties.class})
|
||||
@ -547,29 +547,32 @@ public class RedisServiceImpl implements RedisService {
|
||||
public String put(String cacheName, String key, Object value) {
|
||||
String result = get(cacheName);
|
||||
Map map = new HashMap();
|
||||
if (StringUtils.isNotBlank(result)){
|
||||
map = JSON.parseObject(result, new TypeReference<Map>() {});
|
||||
if (StringUtils.isNotBlank(result)) {
|
||||
map = JSON.parseObject(result, new TypeReference<Map>() {
|
||||
});
|
||||
}
|
||||
map.put(key,value);
|
||||
return set(cacheName,map);
|
||||
map.put(key, value);
|
||||
return set(cacheName, map);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String put(String cacheName, String key, Object value, int expireTime) {
|
||||
String result = get(cacheName);
|
||||
Map map = new HashMap();
|
||||
if (StringUtils.isNotBlank(result)){
|
||||
map = JSON.parseObject(result, new TypeReference<Map>() {});
|
||||
if (StringUtils.isNotBlank(result)) {
|
||||
map = JSON.parseObject(result, new TypeReference<Map>() {
|
||||
});
|
||||
}
|
||||
map.put(key,value);
|
||||
return set(cacheName,map,expireTime);
|
||||
map.put(key, value);
|
||||
return set(cacheName, map, expireTime);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object get(String cacheName, String key){
|
||||
public Object get(String cacheName, String key) {
|
||||
String result = get(cacheName);
|
||||
if (StringUtils.isNotBlank(result)){
|
||||
Map map = JSON.parseObject(result, new TypeReference<Map>() {});
|
||||
if (StringUtils.isNotBlank(result)) {
|
||||
Map map = JSON.parseObject(result, new TypeReference<Map>() {
|
||||
});
|
||||
return map.get(key);
|
||||
}
|
||||
return null;
|
||||
|
@ -5,6 +5,7 @@ import java.lang.annotation.RetentionPolicy;
|
||||
|
||||
/**
|
||||
* 安全拦截器
|
||||
*
|
||||
* @author ronger
|
||||
*/
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
|
@ -1,7 +1,6 @@
|
||||
package com.rymcu.forest.dto;
|
||||
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -18,42 +17,74 @@ import java.util.List;
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ArticleDTO {
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idArticle;
|
||||
/** 文章标题 */
|
||||
/**
|
||||
* 文章标题
|
||||
*/
|
||||
private String articleTitle;
|
||||
/** 文章缩略图 */
|
||||
/**
|
||||
* 文章缩略图
|
||||
*/
|
||||
private String articleThumbnailUrl;
|
||||
/** 文章作者id */
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
/**
|
||||
* 文章作者id
|
||||
*/
|
||||
private Long articleAuthorId;
|
||||
/** 文章作者 */
|
||||
/**
|
||||
* 文章作者
|
||||
*/
|
||||
private String articleAuthorName;
|
||||
/** 文章作者头像 */
|
||||
/**
|
||||
* 文章作者头像
|
||||
*/
|
||||
private String articleAuthorAvatarUrl;
|
||||
/** 文章类型 */
|
||||
/**
|
||||
* 文章类型
|
||||
*/
|
||||
private String articleType;
|
||||
/** 文章标签 */
|
||||
/**
|
||||
* 文章标签
|
||||
*/
|
||||
private String articleTags;
|
||||
/** 浏览总数 */
|
||||
/**
|
||||
* 浏览总数
|
||||
*/
|
||||
private Integer articleViewCount;
|
||||
/** 预览内容 */
|
||||
/**
|
||||
* 预览内容
|
||||
*/
|
||||
private String articlePreviewContent;
|
||||
/** 文章内容 */
|
||||
/**
|
||||
* 文章内容
|
||||
*/
|
||||
private String articleContent;
|
||||
/** 文章内容html */
|
||||
/**
|
||||
* 文章内容html
|
||||
*/
|
||||
private String articleContentHtml;
|
||||
/** 评论总数 */
|
||||
/**
|
||||
* 评论总数
|
||||
*/
|
||||
private Integer articleCommentCount;
|
||||
/** 过去时长 */
|
||||
/**
|
||||
* 过去时长
|
||||
*/
|
||||
private String timeAgo;
|
||||
/** 文章永久链接 */
|
||||
/**
|
||||
* 文章永久链接
|
||||
*/
|
||||
private String articlePermalink;
|
||||
/** 站内链接 */
|
||||
/**
|
||||
* 站内链接
|
||||
*/
|
||||
private String articleLink;
|
||||
/** 文章状态 */
|
||||
/**
|
||||
* 文章状态
|
||||
*/
|
||||
private String articleStatus;
|
||||
/** 更新时间 */
|
||||
/**
|
||||
* 更新时间
|
||||
*/
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date updatedTime;
|
||||
|
||||
@ -64,11 +95,17 @@ public class ArticleDTO {
|
||||
private List<PortfolioArticleDTO> portfolios;
|
||||
|
||||
private Integer sortNo;
|
||||
/** 0:非优选1:优选;0 */
|
||||
/**
|
||||
* 0:非优选1:优选;0
|
||||
*/
|
||||
private String articlePerfect;
|
||||
/** 点赞总数 */
|
||||
/**
|
||||
* 点赞总数
|
||||
*/
|
||||
private Integer articleThumbsUpCount;
|
||||
/** 赞赏总数 */
|
||||
/**
|
||||
* 赞赏总数
|
||||
*/
|
||||
private Integer articleSponsorCount;
|
||||
|
||||
private Boolean canSponsor;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -14,12 +14,10 @@ import lombok.NoArgsConstructor;
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ArticleTagDTO {
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idArticleTag;
|
||||
|
||||
private Integer idTag;
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idArticle;
|
||||
|
||||
private String tagTitle;
|
||||
@ -30,6 +28,5 @@ public class ArticleTagDTO {
|
||||
|
||||
private String tagIconPath;
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long tagAuthorId;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -15,7 +15,6 @@ import lombok.NoArgsConstructor;
|
||||
@AllArgsConstructor
|
||||
public class Author {
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idUser;
|
||||
|
||||
private String userNickname;
|
||||
|
@ -14,19 +14,33 @@ import java.util.List;
|
||||
public class BankAccountDTO {
|
||||
|
||||
private Integer idBankAccount;
|
||||
/** 所属银行 */
|
||||
/**
|
||||
* 所属银行
|
||||
*/
|
||||
private Integer idBank;
|
||||
/** 所属银行名称 */
|
||||
/**
|
||||
* 所属银行名称
|
||||
*/
|
||||
private String bankName;
|
||||
/** 银行账户 */
|
||||
/**
|
||||
* 银行账户
|
||||
*/
|
||||
private String bankAccount;
|
||||
/** 账户余额 */
|
||||
/**
|
||||
* 账户余额
|
||||
*/
|
||||
private BigDecimal accountBalance;
|
||||
/** 账户所有者 */
|
||||
/**
|
||||
* 账户所有者
|
||||
*/
|
||||
private Integer accountOwner;
|
||||
/** 账户所有者姓名 */
|
||||
/**
|
||||
* 账户所有者姓名
|
||||
*/
|
||||
private String accountOwnerName;
|
||||
/** 创建时间 */
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date createdTime;
|
||||
|
||||
|
@ -7,11 +7,17 @@ import lombok.Data;
|
||||
*/
|
||||
@Data
|
||||
public class BankAccountSearchDTO {
|
||||
/** 所属银行名称 */
|
||||
/**
|
||||
* 所属银行名称
|
||||
*/
|
||||
private String bankName;
|
||||
/** 银行账户 */
|
||||
/**
|
||||
* 银行账户
|
||||
*/
|
||||
private String bankAccount;
|
||||
/** 账户所有者姓名 */
|
||||
/**
|
||||
* 账户所有者姓名
|
||||
*/
|
||||
private String accountOwnerName;
|
||||
|
||||
}
|
||||
|
@ -13,21 +13,37 @@ import java.util.Date;
|
||||
public class BankDTO {
|
||||
|
||||
private Integer idBank;
|
||||
/** 银行名称 */
|
||||
/**
|
||||
* 银行名称
|
||||
*/
|
||||
private String bankName;
|
||||
/** 银行负责人 */
|
||||
/**
|
||||
* 银行负责人
|
||||
*/
|
||||
private Integer bankOwner;
|
||||
/** 银行负责人 */
|
||||
/**
|
||||
* 银行负责人
|
||||
*/
|
||||
private String bankOwnerName;
|
||||
/** 银行账户 */
|
||||
/**
|
||||
* 银行账户
|
||||
*/
|
||||
private String bankAccount;
|
||||
/** 账户余额 */
|
||||
/**
|
||||
* 账户余额
|
||||
*/
|
||||
private BigDecimal accountBalance;
|
||||
/** 银行描述 */
|
||||
/**
|
||||
* 银行描述
|
||||
*/
|
||||
private String bankDescription;
|
||||
/** 创建人 */
|
||||
/**
|
||||
* 创建人
|
||||
*/
|
||||
private Integer createdBy;
|
||||
/** 创建时间 */
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date createdTime;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
@ -9,7 +9,6 @@ import lombok.Data;
|
||||
@Data
|
||||
public class ChangeEmailDTO {
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idUser;
|
||||
|
||||
private String email;
|
||||
|
@ -11,31 +11,57 @@ import java.util.Date;
|
||||
@Data
|
||||
public class CommentDTO {
|
||||
private Integer idComment;
|
||||
/** 评论内容 */
|
||||
/**
|
||||
* 评论内容
|
||||
*/
|
||||
private String commentContent;
|
||||
/** 作者 id */
|
||||
/**
|
||||
* 作者 id
|
||||
*/
|
||||
private Integer commentAuthorId;
|
||||
/** 文章 id */
|
||||
/**
|
||||
* 文章 id
|
||||
*/
|
||||
private Integer commentArticleId;
|
||||
/** 锚点 url */
|
||||
/**
|
||||
* 锚点 url
|
||||
*/
|
||||
private String commentSharpUrl;
|
||||
/** 父评论 id */
|
||||
/**
|
||||
* 父评论 id
|
||||
*/
|
||||
private Integer commentOriginalCommentId;
|
||||
/** 父评论作者头像 */
|
||||
/**
|
||||
* 父评论作者头像
|
||||
*/
|
||||
private String commentOriginalAuthorThumbnailURL;
|
||||
/** 父评论作者昵称 */
|
||||
/**
|
||||
* 父评论作者昵称
|
||||
*/
|
||||
private String commentOriginalAuthorNickname;
|
||||
/** 父评论作者昵称 */
|
||||
/**
|
||||
* 父评论作者昵称
|
||||
*/
|
||||
private String commentOriginalContent;
|
||||
/** 状态 */
|
||||
/**
|
||||
* 状态
|
||||
*/
|
||||
private String commentStatus;
|
||||
/** 0:公开回帖,1:匿名回帖 */
|
||||
/**
|
||||
* 0:公开回帖,1:匿名回帖
|
||||
*/
|
||||
private String commentAnonymous;
|
||||
/** 回帖计数 */
|
||||
/**
|
||||
* 回帖计数
|
||||
*/
|
||||
private Integer commentReplyCount;
|
||||
/** 0:所有人可见,1:仅楼主和自己可见 */
|
||||
/**
|
||||
* 0:所有人可见,1:仅楼主和自己可见
|
||||
*/
|
||||
private String commentVisible;
|
||||
/** 创建时间 */
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date createdTime;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import com.rymcu.forest.entity.Notification;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
@ -12,7 +12,6 @@ import lombok.EqualsAndHashCode;
|
||||
@EqualsAndHashCode(callSuper = false)
|
||||
public class NotificationDTO extends Notification {
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idNotification;
|
||||
|
||||
private String dataTitle;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
@ -11,13 +11,10 @@ import java.util.List;
|
||||
@Data
|
||||
public class PortfolioArticleDTO {
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long id;
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idPortfolio;
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idArticle;
|
||||
|
||||
private String headImgUrl;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.Date;
|
||||
@ -11,24 +11,38 @@ import java.util.Date;
|
||||
@Data
|
||||
public class PortfolioDTO {
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idPortfolio;
|
||||
/** 作品集头像 */
|
||||
/**
|
||||
* 作品集头像
|
||||
*/
|
||||
private String headImgUrl;
|
||||
/** 作品集作者 */
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
/**
|
||||
* 作品集作者
|
||||
*/
|
||||
private Long portfolioAuthorId;
|
||||
/** 作品集作者 */
|
||||
/**
|
||||
* 作品集作者
|
||||
*/
|
||||
private String portfolioAuthorName;
|
||||
/** 作品集作者头像 */
|
||||
/**
|
||||
* 作品集作者头像
|
||||
*/
|
||||
private String portfolioAuthorAvatarUrl;
|
||||
/** 作品集名称 */
|
||||
/**
|
||||
* 作品集名称
|
||||
*/
|
||||
private String portfolioTitle;
|
||||
/** 作品集介绍 */
|
||||
/**
|
||||
* 作品集介绍
|
||||
*/
|
||||
private String portfolioDescription;
|
||||
/** 更新时间 */
|
||||
/**
|
||||
* 更新时间
|
||||
*/
|
||||
private Date updatedTime;
|
||||
/** 过去时长 */
|
||||
/**
|
||||
* 过去时长
|
||||
*/
|
||||
private String timeAgo;
|
||||
|
||||
private Author portfolioAuthor;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.Set;
|
||||
@ -11,7 +11,6 @@ import java.util.Set;
|
||||
@Data
|
||||
public class TokenUser {
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idUser;
|
||||
|
||||
private String account;
|
||||
|
@ -13,23 +13,41 @@ import java.util.Date;
|
||||
public class TransactionRecordDTO {
|
||||
|
||||
private Integer idTransactionRecord;
|
||||
/** 交易流水号 */
|
||||
/**
|
||||
* 交易流水号
|
||||
*/
|
||||
private String transactionNo;
|
||||
/** 款项 */
|
||||
/**
|
||||
* 款项
|
||||
*/
|
||||
private String funds;
|
||||
/** 交易发起方 */
|
||||
/**
|
||||
* 交易发起方
|
||||
*/
|
||||
private String formBankAccount;
|
||||
/** 交易发起方 */
|
||||
/**
|
||||
* 交易发起方
|
||||
*/
|
||||
private BankAccountDTO formBankAccountInfo;
|
||||
/** 交易收款方 */
|
||||
/**
|
||||
* 交易收款方
|
||||
*/
|
||||
private String toBankAccount;
|
||||
/** 交易收款方 */
|
||||
/**
|
||||
* 交易收款方
|
||||
*/
|
||||
private BankAccountDTO toBankAccountInfo;
|
||||
/** 交易金额 */
|
||||
/**
|
||||
* 交易金额
|
||||
*/
|
||||
private BigDecimal money;
|
||||
/** 交易类型 */
|
||||
/**
|
||||
* 交易类型
|
||||
*/
|
||||
private String transactionType;
|
||||
/** 交易时间 */
|
||||
/**
|
||||
* 交易时间
|
||||
*/
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date transactionTime;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
@ -9,7 +9,6 @@ import lombok.Data;
|
||||
@Data
|
||||
public class UpdatePasswordDTO {
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idUser;
|
||||
|
||||
private String password;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
@ -9,7 +9,6 @@ import lombok.Data;
|
||||
@Data
|
||||
public class UserDTO {
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idUser;
|
||||
|
||||
private String account;
|
||||
|
@ -1,7 +1,6 @@
|
||||
package com.rymcu.forest.dto;
|
||||
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import lombok.Data;
|
||||
|
||||
import java.io.Serializable;
|
||||
@ -13,7 +12,6 @@ import java.util.Date;
|
||||
@Data
|
||||
public class UserInfoDTO implements Serializable {
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idUser;
|
||||
|
||||
private String account;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.dto.admin;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
@ -9,9 +9,7 @@ import lombok.Data;
|
||||
@Data
|
||||
public class TopicTagDTO {
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idTopic;
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idTag;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.dto.admin;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
@ -9,9 +9,7 @@ import lombok.Data;
|
||||
@Data
|
||||
public class UserRoleDTO {
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idUser;
|
||||
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idRole;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -15,44 +15,76 @@ import java.util.Date;
|
||||
*/
|
||||
@Data
|
||||
@Table(name = "forest_article")
|
||||
public class Article implements Serializable,Cloneable {
|
||||
/** 主键 */
|
||||
public class Article implements Serializable, Cloneable {
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@Column(name = "id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idArticle;
|
||||
/** 文章标题 */
|
||||
/**
|
||||
* 文章标题
|
||||
*/
|
||||
private String articleTitle;
|
||||
/** 文章缩略图 */
|
||||
/**
|
||||
* 文章缩略图
|
||||
*/
|
||||
private String articleThumbnailUrl;
|
||||
/** 文章作者id */
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
/**
|
||||
* 文章作者id
|
||||
*/
|
||||
private Long articleAuthorId;
|
||||
/** 文章类型 */
|
||||
/**
|
||||
* 文章类型
|
||||
*/
|
||||
private String articleType;
|
||||
/** 文章标签 */
|
||||
/**
|
||||
* 文章标签
|
||||
*/
|
||||
private String articleTags;
|
||||
/** 浏览总数 */
|
||||
/**
|
||||
* 浏览总数
|
||||
*/
|
||||
private Integer articleViewCount;
|
||||
/** 预览内容 */
|
||||
/**
|
||||
* 预览内容
|
||||
*/
|
||||
private String articlePreviewContent;
|
||||
/** 评论总数 */
|
||||
/**
|
||||
* 评论总数
|
||||
*/
|
||||
private Integer articleCommentCount;
|
||||
/** 0:非优选1:优选; */
|
||||
/**
|
||||
* 0:非优选1:优选;
|
||||
*/
|
||||
private String articlePerfect;
|
||||
/** 文章永久链接 */
|
||||
/**
|
||||
* 文章永久链接
|
||||
*/
|
||||
private String articlePermalink;
|
||||
/** 站内链接 */
|
||||
/**
|
||||
* 站内链接
|
||||
*/
|
||||
private String articleLink;
|
||||
/** 创建时间 */
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
private Date createdTime;
|
||||
/** 更新时间 */
|
||||
/**
|
||||
* 更新时间
|
||||
*/
|
||||
private Date updatedTime;
|
||||
/** 文章状态 */
|
||||
/**
|
||||
* 文章状态
|
||||
*/
|
||||
private String articleStatus;
|
||||
/** 点赞总数 */
|
||||
/**
|
||||
* 点赞总数
|
||||
*/
|
||||
private Integer articleThumbsUpCount;
|
||||
/** 赞赏总数 */
|
||||
/**
|
||||
* 赞赏总数
|
||||
*/
|
||||
private Integer articleSponsorCount;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -19,7 +19,6 @@ public class ArticleContent {
|
||||
@Id
|
||||
@Column(name = "id")
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idArticle;
|
||||
|
||||
private String articleContent;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -14,7 +14,7 @@ import java.util.Date;
|
||||
* @author ronger
|
||||
*/
|
||||
@Data
|
||||
@Table(name="forest_article_thumbs_up")
|
||||
@Table(name = "forest_article_thumbs_up")
|
||||
public class ArticleThumbsUp implements Serializable, Cloneable {
|
||||
/**
|
||||
* 主键
|
||||
@ -22,17 +22,14 @@ public class ArticleThumbsUp implements Serializable, Cloneable {
|
||||
@Id
|
||||
@Column(name = "id")
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idArticleThumbsUp;
|
||||
/**
|
||||
* 文章表主键
|
||||
*/
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idArticle;
|
||||
/**
|
||||
* 用户表主键
|
||||
*/
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idUser;
|
||||
/**
|
||||
* 点赞时间
|
||||
|
@ -1,7 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -12,28 +11,39 @@ import java.util.Date;
|
||||
|
||||
/**
|
||||
* 银行
|
||||
*
|
||||
* @author ronger
|
||||
*/
|
||||
@Table(name = "forest_bank")
|
||||
@Data
|
||||
public class Bank {
|
||||
|
||||
/** 主键 */
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@Column(name = "id")
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idBank;
|
||||
/** 银行名称 */
|
||||
/**
|
||||
* 银行名称
|
||||
*/
|
||||
private String bankName;
|
||||
/** 银行负责人 */
|
||||
/**
|
||||
* 银行负责人
|
||||
*/
|
||||
private Long bankOwner;
|
||||
/** 银行描述 */
|
||||
/**
|
||||
* 银行描述
|
||||
*/
|
||||
private String bankDescription;
|
||||
/** 创建人 */
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
/**
|
||||
* 创建人
|
||||
*/
|
||||
private Long createdBy;
|
||||
/** 创建时间 */
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date createdTime;
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -13,30 +12,42 @@ import java.util.Date;
|
||||
|
||||
/**
|
||||
* 银行账户
|
||||
*
|
||||
* @author ronger
|
||||
*/
|
||||
@Table(name = "forest_bank_account")
|
||||
@Data
|
||||
public class BankAccount {
|
||||
/** 主键 */
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@Column(name = "id")
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idBankAccount;
|
||||
/** 所属银行 */
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
/**
|
||||
* 所属银行
|
||||
*/
|
||||
private Long idBank;
|
||||
/** 银行账户 */
|
||||
/**
|
||||
* 银行账户
|
||||
*/
|
||||
private String bankAccount;
|
||||
/** 账户余额 */
|
||||
/**
|
||||
* 账户余额
|
||||
*/
|
||||
private BigDecimal accountBalance;
|
||||
/** 账户所有者 */
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
/**
|
||||
* 账户所有者
|
||||
*/
|
||||
private Long accountOwner;
|
||||
/** 创建时间 */
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date createdTime;
|
||||
/** 账户类型 */
|
||||
/**
|
||||
* 账户类型
|
||||
*/
|
||||
private String accountType;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -14,51 +14,73 @@ import java.util.Date;
|
||||
* @author ronger
|
||||
*/
|
||||
@Data
|
||||
@Table(name="forest_comment")
|
||||
public class Comment implements Serializable,Cloneable {
|
||||
/** 主键 */
|
||||
@Table(name = "forest_comment")
|
||||
public class Comment implements Serializable, Cloneable {
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@Column(name = "id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idComment;
|
||||
/** 评论内容 */
|
||||
/**
|
||||
* 评论内容
|
||||
*/
|
||||
@Column(name = "comment_content")
|
||||
private String commentContent;
|
||||
/** 作者 id */
|
||||
/**
|
||||
* 作者 id
|
||||
*/
|
||||
@Column(name = "comment_author_id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long commentAuthorId;
|
||||
/** 文章 id */
|
||||
/**
|
||||
* 文章 id
|
||||
*/
|
||||
@Column(name = "comment_article_id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long commentArticleId;
|
||||
/** 锚点 url */
|
||||
/**
|
||||
* 锚点 url
|
||||
*/
|
||||
@Column(name = "comment_sharp_url")
|
||||
private String commentSharpUrl;
|
||||
/** 父评论 id */
|
||||
/**
|
||||
* 父评论 id
|
||||
*/
|
||||
@Column(name = "comment_original_comment_id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long commentOriginalCommentId;
|
||||
/** 状态 */
|
||||
/**
|
||||
* 状态
|
||||
*/
|
||||
@Column(name = "comment_status")
|
||||
private String commentStatus;
|
||||
/** 评论 IP */
|
||||
/**
|
||||
* 评论 IP
|
||||
*/
|
||||
@Column(name = "comment_ip")
|
||||
private String commentIP;
|
||||
/** User-Agent */
|
||||
/**
|
||||
* User-Agent
|
||||
*/
|
||||
@Column(name = "comment_ua")
|
||||
private String commentUA;
|
||||
/** 0:公开回帖,1:匿名回帖 */
|
||||
/**
|
||||
* 0:公开回帖,1:匿名回帖
|
||||
*/
|
||||
@Column(name = "comment_anonymous")
|
||||
private String commentAnonymous;
|
||||
/** 回帖计数 */
|
||||
/**
|
||||
* 回帖计数
|
||||
*/
|
||||
@Column(name = "comment_reply_count")
|
||||
private Integer commentReplyCount;
|
||||
/** 0:所有人可见,1:仅楼主和自己可见 */
|
||||
/**
|
||||
* 0:所有人可见,1:仅楼主和自己可见
|
||||
*/
|
||||
@Column(name = "comment_visible")
|
||||
private String commentVisible;
|
||||
/** 创建时间 */
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
@Column(name = "created_time")
|
||||
private Date createdTime;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.GeneratedValue;
|
||||
@ -11,22 +11,29 @@ import java.util.Date;
|
||||
|
||||
/**
|
||||
* 货币发行记录
|
||||
*
|
||||
* @author ronger
|
||||
*/
|
||||
@Table(name = "forest_currency_issue")
|
||||
@Data
|
||||
public class CurrencyIssue {
|
||||
/** 主键 */
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long id;
|
||||
/** 发行数额 */
|
||||
/**
|
||||
* 发行数额
|
||||
*/
|
||||
private BigDecimal issueValue;
|
||||
/** 发行人 */
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
/**
|
||||
* 发行人
|
||||
*/
|
||||
private Long createdBy;
|
||||
/** 发行时间 */
|
||||
/**
|
||||
* 发行时间
|
||||
*/
|
||||
private Date createdTime;
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -13,7 +13,7 @@ import java.math.BigDecimal;
|
||||
/**
|
||||
* @author ronger
|
||||
*/
|
||||
@Table(name="forest_currency_rule")
|
||||
@Table(name = "forest_currency_rule")
|
||||
@Data
|
||||
public class CurrencyRule implements Serializable, Cloneable {
|
||||
/**
|
||||
@ -22,7 +22,6 @@ public class CurrencyRule implements Serializable, Cloneable {
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@Column(name = "id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idCurrencyRule;
|
||||
/**
|
||||
* 规则名称
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -13,23 +13,28 @@ import java.io.Serializable;
|
||||
* @author ronger
|
||||
*/
|
||||
@Data
|
||||
@Table(name="forest_follow")
|
||||
public class Follow implements Serializable,Cloneable {
|
||||
/** 主键 */
|
||||
@Table(name = "forest_follow")
|
||||
public class Follow implements Serializable, Cloneable {
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@Column(name = "id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idFollow;
|
||||
/** 关注者 id */
|
||||
/**
|
||||
* 关注者 id
|
||||
*/
|
||||
@Column(name = "follower_id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long followerId;
|
||||
/** 关注数据 id */
|
||||
/**
|
||||
* 关注数据 id
|
||||
*/
|
||||
@Column(name = "following_id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long followingId;
|
||||
/** 0:用户,1:标签,2:帖子收藏,3:帖子关注 */
|
||||
/**
|
||||
* 0:用户,1:标签,2:帖子收藏,3:帖子关注
|
||||
*/
|
||||
@Column(name = "following_type")
|
||||
private String followingType;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -22,14 +22,12 @@ public class ForestFile {
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@Column(name = "id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long id;
|
||||
|
||||
/**
|
||||
* 文件大小
|
||||
*/
|
||||
@Column(name = "file_size")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long fileSize;
|
||||
|
||||
/**
|
||||
@ -71,7 +69,6 @@ public class ForestFile {
|
||||
* 创建人
|
||||
*/
|
||||
@Column(name = "created_by")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long createdBy;
|
||||
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -13,41 +12,58 @@ import java.util.Date;
|
||||
|
||||
/**
|
||||
* 登录记录表
|
||||
*
|
||||
* @author ronger
|
||||
*/
|
||||
@Data
|
||||
@Table(name="forest_login_record")
|
||||
public class LoginRecord implements Serializable,Cloneable {
|
||||
@Table(name = "forest_login_record")
|
||||
public class LoginRecord implements Serializable, Cloneable {
|
||||
|
||||
/** 主键 */
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@Column(name = "id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long id;
|
||||
/** IP */
|
||||
/**
|
||||
* IP
|
||||
*/
|
||||
@Column(name = "login_ip")
|
||||
private String loginIp;
|
||||
/** User-Agent */
|
||||
/**
|
||||
* User-Agent
|
||||
*/
|
||||
@Column(name = "login_ua")
|
||||
private String loginUa;
|
||||
/** 城市 */
|
||||
/**
|
||||
* 城市
|
||||
*/
|
||||
@Column(name = "login_city")
|
||||
private String loginCity;
|
||||
/** 设备唯一标识 */
|
||||
/**
|
||||
* 设备唯一标识
|
||||
*/
|
||||
@Column(name = "login_device_id")
|
||||
private String loginDeviceId;
|
||||
/** 设备操作系统 */
|
||||
/**
|
||||
* 设备操作系统
|
||||
*/
|
||||
@Column(name = "login_os")
|
||||
private String loginOS;
|
||||
/** 设备浏览器 */
|
||||
/**
|
||||
* 设备浏览器
|
||||
*/
|
||||
@Column(name = "login_browser")
|
||||
private String loginBrowser;
|
||||
/** 用户 id */
|
||||
/**
|
||||
* 用户 id
|
||||
*/
|
||||
@Column(name = "id_user")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idUser;
|
||||
/** 创建时间 */
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
@Column(name = "created_time")
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date createdTime;
|
||||
|
@ -1,7 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -16,21 +15,19 @@ import java.util.Date;
|
||||
* @author ronger
|
||||
*/
|
||||
@Data
|
||||
@Table(name="forest_notification")
|
||||
public class Notification implements Serializable,Cloneable {
|
||||
@Table(name = "forest_notification")
|
||||
public class Notification implements Serializable, Cloneable {
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@Column(name = "id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idNotification;
|
||||
/**
|
||||
* 用户id
|
||||
*/
|
||||
@Column(name = "id_user")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idUser;
|
||||
/**
|
||||
* 数据类型
|
||||
@ -41,7 +38,6 @@ public class Notification implements Serializable,Cloneable {
|
||||
* 数据id
|
||||
*/
|
||||
@Column(name = "data_id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long dataId;
|
||||
/**
|
||||
* 数据摘要
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
import tk.mybatis.mapper.annotation.ColumnType;
|
||||
|
||||
@ -15,17 +15,16 @@ import java.io.Serializable;
|
||||
*/
|
||||
@Data
|
||||
@Table(name = "forest_permission")
|
||||
public class Permission implements Serializable,Cloneable {
|
||||
public class Permission implements Serializable, Cloneable {
|
||||
|
||||
@Id
|
||||
@Column(name = "id")
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idPermission;
|
||||
|
||||
/**
|
||||
* 权限标识
|
||||
* */
|
||||
*/
|
||||
@ColumnType(column = "permission_category")
|
||||
private String permissionCategory;
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.*;
|
||||
@ -12,27 +12,41 @@ import java.util.Date;
|
||||
@Data
|
||||
@Table(name = "forest_portfolio")
|
||||
public class Portfolio {
|
||||
/** 主键 */
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@Column(name = "id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idPortfolio;
|
||||
/** 作品集头像 */
|
||||
/**
|
||||
* 作品集头像
|
||||
*/
|
||||
@Column(name = "portfolio_head_img_url")
|
||||
private String headImgUrl;
|
||||
/** 作品集名称 */
|
||||
/**
|
||||
* 作品集名称
|
||||
*/
|
||||
private String portfolioTitle;
|
||||
/** 作品集作者 */
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
/**
|
||||
* 作品集作者
|
||||
*/
|
||||
private Long portfolioAuthorId;
|
||||
/** 作品集介绍 */
|
||||
/**
|
||||
* 作品集介绍
|
||||
*/
|
||||
private String portfolioDescription;
|
||||
/** 作品集介绍 Html */
|
||||
/**
|
||||
* 作品集介绍 Html
|
||||
*/
|
||||
private String portfolioDescriptionHtml;
|
||||
/** 创建时间 */
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
private Date createdTime;
|
||||
/** 更新时间 */
|
||||
/**
|
||||
* 更新时间
|
||||
*/
|
||||
private Date updatedTime;
|
||||
@Transient
|
||||
private String headImgType;
|
||||
|
@ -1,7 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -27,7 +26,6 @@ public class Product implements Serializable, Cloneable {
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@Column(name = "id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idProduct;
|
||||
/**
|
||||
* 产品名
|
||||
|
@ -1,7 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Table;
|
||||
@ -21,7 +20,6 @@ public class ProductContent implements Serializable, Cloneable {
|
||||
/**
|
||||
* 产品表主键
|
||||
*/
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idProduct;
|
||||
/**
|
||||
* 产品详情原文
|
||||
|
@ -1,7 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -16,47 +15,46 @@ import java.util.Date;
|
||||
*/
|
||||
@Data
|
||||
@Table(name = "forest_role")
|
||||
public class Role implements Serializable,Cloneable {
|
||||
public class Role implements Serializable, Cloneable {
|
||||
@Id
|
||||
@Column(name = "id")
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idRole;
|
||||
|
||||
/**
|
||||
* 角色名称
|
||||
* */
|
||||
*/
|
||||
@Column(name = "name")
|
||||
private String name;
|
||||
|
||||
/**
|
||||
* 拼音码
|
||||
* */
|
||||
*/
|
||||
@Column(name = "input_code")
|
||||
private String inputCode;
|
||||
|
||||
/**
|
||||
* 权重
|
||||
* */
|
||||
*/
|
||||
@Column(name = "weights")
|
||||
private Integer weights;
|
||||
|
||||
/**
|
||||
* 状态
|
||||
* */
|
||||
*/
|
||||
@Column(name = "status")
|
||||
private String status;
|
||||
|
||||
/**
|
||||
* 创建时间
|
||||
* */
|
||||
*/
|
||||
@Column(name = "created_time")
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date createdTime;
|
||||
|
||||
/**
|
||||
* 更新时间
|
||||
* */
|
||||
*/
|
||||
@Column(name = "updated_time")
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date updatedTime;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -14,33 +14,54 @@ import java.util.Date;
|
||||
* @author ronger
|
||||
*/
|
||||
@Data
|
||||
@Table(name="forest_special_day")
|
||||
public class SpecialDay implements Serializable,Cloneable{
|
||||
/** */
|
||||
@Table(name = "forest_special_day")
|
||||
public class SpecialDay implements Serializable, Cloneable {
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@Id
|
||||
@Column(name = "id")
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idSpecialDay;
|
||||
/** 名称 */
|
||||
/**
|
||||
* 名称
|
||||
*/
|
||||
private String specialDayName;
|
||||
/** 权重/优先级,小数优秀 */
|
||||
/**
|
||||
* 权重/优先级,小数优秀
|
||||
*/
|
||||
private Integer weights;
|
||||
/** 开始时间 */
|
||||
/**
|
||||
* 开始时间
|
||||
*/
|
||||
private Date startTime;
|
||||
/** 过期时间 */
|
||||
/**
|
||||
* 过期时间
|
||||
*/
|
||||
private Date expirationTime;
|
||||
/** 是否重复 */
|
||||
/**
|
||||
* 是否重复
|
||||
*/
|
||||
private Integer repeat;
|
||||
/** 重复周期 */
|
||||
/**
|
||||
* 重复周期
|
||||
*/
|
||||
private Integer repeatCycle;
|
||||
/** 0:天1:周2:月3:年 */
|
||||
/**
|
||||
* 0:天1:周2:月3:年
|
||||
*/
|
||||
private Integer repeatCycleUnit;
|
||||
/** 创建时间 */
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
private Date createdTime;
|
||||
/** 图片路径 */
|
||||
/**
|
||||
* 图片路径
|
||||
*/
|
||||
private String imgUrl;
|
||||
/** 执行全局样式 */
|
||||
/**
|
||||
* 执行全局样式
|
||||
*/
|
||||
private String cssStyle;
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.GeneratedValue;
|
||||
@ -14,14 +14,13 @@ import java.util.Date;
|
||||
* @author ronger
|
||||
*/
|
||||
@Data
|
||||
@Table(name="forest_sponsor")
|
||||
@Table(name = "forest_sponsor")
|
||||
public class Sponsor implements Serializable, Cloneable {
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long id;
|
||||
/**
|
||||
* 数据类型
|
||||
@ -30,12 +29,10 @@ public class Sponsor implements Serializable, Cloneable {
|
||||
/**
|
||||
* 数据主键
|
||||
*/
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long dataId;
|
||||
/**
|
||||
* 赞赏人
|
||||
*/
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long sponsor;
|
||||
/**
|
||||
* 赞赏日期
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -15,37 +15,64 @@ import java.util.Date;
|
||||
*/
|
||||
@Data
|
||||
@Table(name = "forest_tag")
|
||||
public class Tag implements Serializable,Cloneable {
|
||||
/** 主键 */
|
||||
public class Tag implements Serializable, Cloneable {
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@Column(name = "id")
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idTag;
|
||||
/** 标签名 */
|
||||
/**
|
||||
* 标签名
|
||||
*/
|
||||
private String tagTitle;
|
||||
/** 标签图标 */
|
||||
/**
|
||||
* 标签图标
|
||||
*/
|
||||
private String tagIconPath;
|
||||
/** 标签uri */
|
||||
/**
|
||||
* 标签uri
|
||||
*/
|
||||
private String tagUri;
|
||||
/** 描述 */
|
||||
/**
|
||||
* 描述
|
||||
*/
|
||||
private String tagDescription;
|
||||
/** 浏览量 */
|
||||
/**
|
||||
* 浏览量
|
||||
*/
|
||||
private Integer tagViewCount;
|
||||
/** 关联文章总数 */
|
||||
/**
|
||||
* 关联文章总数
|
||||
*/
|
||||
private Integer tagArticleCount;
|
||||
/** 标签广告 */
|
||||
/**
|
||||
* 标签广告
|
||||
*/
|
||||
private String tagAd;
|
||||
/** 是否显示全站侧边栏广告 */
|
||||
/**
|
||||
* 是否显示全站侧边栏广告
|
||||
*/
|
||||
private String tagShowSideAd;
|
||||
/** 标签状态 */
|
||||
/**
|
||||
* 标签状态
|
||||
*/
|
||||
private String tagStatus;
|
||||
/** 创建时间 */
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
private Date createdTime;
|
||||
/** 更新时间 */
|
||||
/**
|
||||
* 更新时间
|
||||
*/
|
||||
private Date updatedTime;
|
||||
/** 保留标签 */
|
||||
/**
|
||||
* 保留标签
|
||||
*/
|
||||
private String tagReservation;
|
||||
/** 描述 */
|
||||
/**
|
||||
* 描述
|
||||
*/
|
||||
private String tagDescriptionHtml;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -16,35 +16,60 @@ import java.util.Date;
|
||||
@Table(name = "forest_topic")
|
||||
public class Topic {
|
||||
|
||||
/** 主键 */
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@Column(name = "id")
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idTopic;
|
||||
/** 专题标题 */
|
||||
/**
|
||||
* 专题标题
|
||||
*/
|
||||
private String topicTitle;
|
||||
/** 专题路径 */
|
||||
/**
|
||||
* 专题路径
|
||||
*/
|
||||
private String topicUri;
|
||||
/** 专题描述 */
|
||||
/**
|
||||
* 专题描述
|
||||
*/
|
||||
private String topicDescription;
|
||||
/** 专题类型 */
|
||||
/**
|
||||
* 专题类型
|
||||
*/
|
||||
private String topicType;
|
||||
/** 专题序号;10 */
|
||||
/**
|
||||
* 专题序号;10
|
||||
*/
|
||||
private Integer topicSort;
|
||||
/** 专题图片路径 */
|
||||
/**
|
||||
* 专题图片路径
|
||||
*/
|
||||
private String topicIconPath;
|
||||
/** 0:作为导航1:不作为导航;0 */
|
||||
/**
|
||||
* 0:作为导航1:不作为导航;0
|
||||
*/
|
||||
private String topicNva;
|
||||
/** 专题下标签总数;0 */
|
||||
/**
|
||||
* 专题下标签总数;0
|
||||
*/
|
||||
private Integer topicTagCount;
|
||||
/** 0:正常1:禁用;0 */
|
||||
/**
|
||||
* 0:正常1:禁用;0
|
||||
*/
|
||||
private String topicStatus;
|
||||
/** 创建时间 */
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
private Date createdTime;
|
||||
/** 更新时间 */
|
||||
/**
|
||||
* 更新时间
|
||||
*/
|
||||
private Date updatedTime;
|
||||
/** 专题描述 Html */
|
||||
/**
|
||||
* 专题描述 Html
|
||||
*/
|
||||
private String topicDescriptionHtml;
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -12,29 +12,45 @@ import java.util.Date;
|
||||
|
||||
/**
|
||||
* 交易记录
|
||||
*
|
||||
* @author ronger
|
||||
*/
|
||||
@Table(name = "forest_transaction_record")
|
||||
@Data
|
||||
public class TransactionRecord {
|
||||
/** 主键 */
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@Column(name = "id")
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idTransactionRecord;
|
||||
/** 交易流水号 */
|
||||
/**
|
||||
* 交易流水号
|
||||
*/
|
||||
private String transactionNo;
|
||||
/** 款项 */
|
||||
/**
|
||||
* 款项
|
||||
*/
|
||||
private String funds;
|
||||
/** 交易发起方 */
|
||||
/**
|
||||
* 交易发起方
|
||||
*/
|
||||
private String formBankAccount;
|
||||
/** 交易收款方 */
|
||||
/**
|
||||
* 交易收款方
|
||||
*/
|
||||
private String toBankAccount;
|
||||
/** 交易金额 */
|
||||
/**
|
||||
* 交易金额
|
||||
*/
|
||||
private BigDecimal money;
|
||||
/** 交易类型 */
|
||||
/**
|
||||
* 交易类型
|
||||
*/
|
||||
private String transactionType;
|
||||
/** 交易时间 */
|
||||
/**
|
||||
* 交易时间
|
||||
*/
|
||||
private Date transactionTime;
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import lombok.Data;
|
||||
import org.apache.ibatis.type.JdbcType;
|
||||
import tk.mybatis.mapper.annotation.ColumnType;
|
||||
@ -18,114 +17,113 @@ import java.util.Date;
|
||||
*/
|
||||
@Table(name = "forest_user")
|
||||
@Data
|
||||
public class User implements Serializable,Cloneable {
|
||||
public class User implements Serializable, Cloneable {
|
||||
@Id
|
||||
@Column(name = "id")
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idUser;
|
||||
|
||||
/**
|
||||
* 登录账号
|
||||
* */
|
||||
*/
|
||||
@Column(name = "account")
|
||||
private String account;
|
||||
|
||||
/**
|
||||
* 密码
|
||||
* */
|
||||
*/
|
||||
@Column(name = "password")
|
||||
@JSONField(serialize=false)
|
||||
@JSONField(serialize = false)
|
||||
private String password;
|
||||
|
||||
/**
|
||||
* 昵称
|
||||
* */
|
||||
*/
|
||||
@Column(name = "nickname")
|
||||
private String nickname;
|
||||
|
||||
/**
|
||||
* 真实姓名
|
||||
* */
|
||||
*/
|
||||
@Column(name = "real_name")
|
||||
private String realName;
|
||||
|
||||
/**
|
||||
* 性别 1:男性 2:女性
|
||||
* */
|
||||
*/
|
||||
@Column(name = "sex")
|
||||
private String sex;
|
||||
|
||||
/**
|
||||
* 头像文件类型
|
||||
* */
|
||||
*/
|
||||
@Column(name = "avatar_type")
|
||||
private String avatarType;
|
||||
|
||||
/**
|
||||
* 头像路径
|
||||
* */
|
||||
*/
|
||||
@Column(name = "avatar_url")
|
||||
private String avatarUrl;
|
||||
|
||||
/**
|
||||
* 邮箱地址
|
||||
* */
|
||||
*/
|
||||
@ColumnType(column = "email",
|
||||
jdbcType = JdbcType.VARCHAR)
|
||||
private String email;
|
||||
|
||||
/**
|
||||
* 手机号码
|
||||
* */
|
||||
*/
|
||||
@ColumnType(column = "phone",
|
||||
jdbcType = JdbcType.VARCHAR)
|
||||
private String phone;
|
||||
|
||||
/**
|
||||
* 签名
|
||||
* */
|
||||
*/
|
||||
@ColumnType(column = "signature",
|
||||
jdbcType = JdbcType.VARCHAR)
|
||||
private String signature;
|
||||
|
||||
/**
|
||||
* 状态
|
||||
* */
|
||||
*/
|
||||
@Column(name = "status")
|
||||
private String status;
|
||||
|
||||
/**
|
||||
* 最后登录时间
|
||||
* */
|
||||
*/
|
||||
@Column(name = "last_login_time")
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date lastLoginTime;
|
||||
|
||||
/**
|
||||
* 创建时间
|
||||
* */
|
||||
*/
|
||||
@Column(name = "created_time")
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date createdTime;
|
||||
|
||||
/**
|
||||
* 更新时间
|
||||
* */
|
||||
*/
|
||||
@Column(name = "updated_time")
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date updatedTime;
|
||||
|
||||
/**
|
||||
* 最后在线时间
|
||||
* */
|
||||
*/
|
||||
@Column(name = "last_online_time")
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date lastOnlineTime;
|
||||
|
||||
/**
|
||||
* 个人中心背景图片
|
||||
* */
|
||||
*/
|
||||
@Column(name = "bg_img_url")
|
||||
private String bgImgUrl;
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Id;
|
||||
@ -14,7 +14,6 @@ import javax.persistence.Table;
|
||||
public class UserExtend {
|
||||
|
||||
@Id
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idUser;
|
||||
|
||||
private String github;
|
||||
|
@ -1,7 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.alibaba.fastjson.annotation.JSONField;
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -13,45 +12,64 @@ import java.util.Date;
|
||||
|
||||
/**
|
||||
* 浏览表
|
||||
*
|
||||
* @author ronger
|
||||
*/
|
||||
@Data
|
||||
@Table(name="forest_visit")
|
||||
public class Visit implements Serializable,Cloneable {
|
||||
@Table(name = "forest_visit")
|
||||
public class Visit implements Serializable, Cloneable {
|
||||
|
||||
/** 主键 */
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@Column(name = "id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long id;
|
||||
/** 浏览链接 */
|
||||
/**
|
||||
* 浏览链接
|
||||
*/
|
||||
@Column(name = "visit_url")
|
||||
private String visitUrl;
|
||||
/** IP */
|
||||
/**
|
||||
* IP
|
||||
*/
|
||||
@Column(name = "visit_ip")
|
||||
private String visitIp;
|
||||
/** User-Agent */
|
||||
/**
|
||||
* User-Agent
|
||||
*/
|
||||
@Column(name = "visit_ua")
|
||||
private String visitUa;
|
||||
/** 城市 */
|
||||
/**
|
||||
* 城市
|
||||
*/
|
||||
@Column(name = "visit_city")
|
||||
private String visitCity;
|
||||
/** 设备唯一标识 */
|
||||
/**
|
||||
* 设备唯一标识
|
||||
*/
|
||||
@Column(name = "visit_device_id")
|
||||
private String visitDeviceId;
|
||||
/** 浏览者 id */
|
||||
/**
|
||||
* 浏览者 id
|
||||
*/
|
||||
@Column(name = "visit_user_id")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long visitUserId;
|
||||
/** 上游链接 */
|
||||
/**
|
||||
* 上游链接
|
||||
*/
|
||||
@Column(name = "visit_referer_url")
|
||||
private String visitRefererUrl;
|
||||
/** 创建时间 */
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
@Column(name = "created_time")
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date createdTime;
|
||||
/** 过期时间 */
|
||||
/**
|
||||
* 过期时间
|
||||
*/
|
||||
@Column(name = "expired_time")
|
||||
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
|
||||
private Date expiredTime;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.entity;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import javax.persistence.Column;
|
||||
@ -18,7 +18,6 @@ public class WxUser {
|
||||
@Id
|
||||
@Column(name = "id")
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idWxUser;
|
||||
|
||||
private Boolean subscribe;
|
||||
|
@ -24,37 +24,38 @@ import java.util.Map;
|
||||
@RequestMapping("/api/v1/lucene/dic")
|
||||
public class UserDicController {
|
||||
|
||||
@Resource private UserDicService dicService;
|
||||
@Resource
|
||||
private UserDicService dicService;
|
||||
|
||||
@GetMapping("/getAll")
|
||||
public GlobalResult getAll(
|
||||
@RequestParam(defaultValue = "0") Integer page,
|
||||
@RequestParam(defaultValue = "10") Integer rows) {
|
||||
PageHelper.startPage(page, rows);
|
||||
List<UserDic> list = dicService.getAll();
|
||||
PageInfo<UserDic> pageInfo = new PageInfo<>(list);
|
||||
Map<String, Object> map = new HashMap<>(2);
|
||||
map.put("userDic", pageInfo.getList());
|
||||
Map pagination = Utils.getPagination(pageInfo);
|
||||
map.put("pagination", pagination);
|
||||
return GlobalResultGenerator.genSuccessResult(map);
|
||||
}
|
||||
@GetMapping("/getAll")
|
||||
public GlobalResult getAll(
|
||||
@RequestParam(defaultValue = "0") Integer page,
|
||||
@RequestParam(defaultValue = "10") Integer rows) {
|
||||
PageHelper.startPage(page, rows);
|
||||
List<UserDic> list = dicService.getAll();
|
||||
PageInfo<UserDic> pageInfo = new PageInfo<>(list);
|
||||
Map<String, Object> map = new HashMap<>(2);
|
||||
map.put("userDic", pageInfo.getList());
|
||||
Map pagination = Utils.getPagination(pageInfo);
|
||||
map.put("pagination", pagination);
|
||||
return GlobalResultGenerator.genSuccessResult(map);
|
||||
}
|
||||
|
||||
@PostMapping("/addDic/{dic}")
|
||||
public GlobalResult addDic(@PathVariable String dic) {
|
||||
dicService.addDic(dic);
|
||||
return GlobalResultGenerator.genSuccessResult("新增字典成功");
|
||||
}
|
||||
@PostMapping("/addDic/{dic}")
|
||||
public GlobalResult addDic(@PathVariable String dic) {
|
||||
dicService.addDic(dic);
|
||||
return GlobalResultGenerator.genSuccessResult("新增字典成功");
|
||||
}
|
||||
|
||||
@PutMapping("/editDic")
|
||||
public GlobalResult getAllDic(@RequestBody UserDic dic) {
|
||||
dicService.updateDic(dic);
|
||||
return GlobalResultGenerator.genSuccessResult("更新字典成功");
|
||||
}
|
||||
@PutMapping("/editDic")
|
||||
public GlobalResult getAllDic(@RequestBody UserDic dic) {
|
||||
dicService.updateDic(dic);
|
||||
return GlobalResultGenerator.genSuccessResult("更新字典成功");
|
||||
}
|
||||
|
||||
@DeleteMapping("/deleteDic/{id}")
|
||||
public GlobalResult deleteDic(@PathVariable String id) {
|
||||
dicService.deleteDic(id);
|
||||
return GlobalResultGenerator.genSuccessResult("删除字典成功");
|
||||
}
|
||||
@DeleteMapping("/deleteDic/{id}")
|
||||
public GlobalResult deleteDic(@PathVariable String id) {
|
||||
dicService.deleteDic(id);
|
||||
return GlobalResultGenerator.genSuccessResult("删除字典成功");
|
||||
}
|
||||
}
|
||||
|
@ -1,75 +1,74 @@
|
||||
/**
|
||||
* IK 中文分词 版本 5.0
|
||||
* IK Analyzer release 5.0
|
||||
*
|
||||
* <p>
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* <p>
|
||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
*/
|
||||
package com.rymcu.forest.lucene.cfg;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* 配置管理类接口
|
||||
*
|
||||
*
|
||||
*/
|
||||
public interface Configuration {
|
||||
|
||||
/**
|
||||
* 返回useSmart标志位
|
||||
* useSmart =true ,分词器使用智能切分策略, =false则使用细粒度切分
|
||||
* @return useSmart
|
||||
*/
|
||||
boolean useSmart();
|
||||
/**
|
||||
* 返回useSmart标志位
|
||||
* useSmart =true ,分词器使用智能切分策略, =false则使用细粒度切分
|
||||
* @return useSmart
|
||||
*/
|
||||
boolean useSmart();
|
||||
|
||||
/**
|
||||
* 设置useSmart标志位
|
||||
* useSmart =true ,分词器使用智能切分策略, =false则使用细粒度切分
|
||||
* @param useSmart
|
||||
*/
|
||||
void setUseSmart(boolean useSmart);
|
||||
/**
|
||||
* 设置useSmart标志位
|
||||
* useSmart =true ,分词器使用智能切分策略, =false则使用细粒度切分
|
||||
* @param useSmart
|
||||
*/
|
||||
void setUseSmart(boolean useSmart);
|
||||
|
||||
/**
|
||||
* 获取主词典路径
|
||||
*
|
||||
* @return String 主词典路径
|
||||
*/
|
||||
String getMainDictionary();
|
||||
/**
|
||||
* 获取主词典路径
|
||||
*
|
||||
* @return String 主词典路径
|
||||
*/
|
||||
String getMainDictionary();
|
||||
|
||||
/**
|
||||
* 获取量词词典路径
|
||||
* @return String 量词词典路径
|
||||
*/
|
||||
String getQuantifierDictionary();
|
||||
/**
|
||||
* 获取量词词典路径
|
||||
* @return String 量词词典路径
|
||||
*/
|
||||
String getQuantifierDictionary();
|
||||
|
||||
/**
|
||||
* 获取扩展字典配置路径
|
||||
* @return List<String> 相对类加载器的路径
|
||||
*/
|
||||
List<String> getExtDictionary();
|
||||
/**
|
||||
* 获取扩展字典配置路径
|
||||
* @return List<String> 相对类加载器的路径
|
||||
*/
|
||||
List<String> getExtDictionary();
|
||||
|
||||
/**
|
||||
* 获取扩展停止词典配置路径
|
||||
* @return List<String> 相对类加载器的路径
|
||||
*/
|
||||
List<String> getExtStopWordDictionary();
|
||||
/**
|
||||
* 获取扩展停止词典配置路径
|
||||
* @return List<String> 相对类加载器的路径
|
||||
*/
|
||||
List<String> getExtStopWordDictionary();
|
||||
|
||||
}
|
||||
|
@ -24,110 +24,124 @@ import org.springframework.stereotype.Component;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/** Configuration 默认实现 2012-5-8 */
|
||||
/**
|
||||
* Configuration 默认实现 2012-5-8
|
||||
*/
|
||||
@Component
|
||||
public class DefaultConfig implements Configuration {
|
||||
|
||||
/** 分词器默认字典路径 */
|
||||
private static final String PATH_DIC_MAIN = "lucene/main2012.dic";
|
||||
/** 题词字典路径 */
|
||||
private static final String PATH_DIC_QUANTIFIER = "lucene/quantifier.dic";
|
||||
/** 用户自定义字典路径 */
|
||||
private static final String PATH_USER_DIC =
|
||||
System.getProperty("user.dir") + "/lucene/userDic/userDic.dic";
|
||||
/** 配置属性——扩展字典 */
|
||||
private String extDic = "lucene/ext.dic;" + PATH_USER_DIC;
|
||||
/** 配置属性——扩展停止词典 */
|
||||
private String extStopword = "lucene/stopword.dic";
|
||||
/** 是否使用smart方式分词 */
|
||||
private boolean useSmart;
|
||||
/**
|
||||
* 分词器默认字典路径
|
||||
*/
|
||||
private static final String PATH_DIC_MAIN = "lucene/main2012.dic";
|
||||
/**
|
||||
* 题词字典路径
|
||||
*/
|
||||
private static final String PATH_DIC_QUANTIFIER = "lucene/quantifier.dic";
|
||||
/**
|
||||
* 用户自定义字典路径
|
||||
*/
|
||||
private static final String PATH_USER_DIC =
|
||||
System.getProperty("user.dir") + "/lucene/userDic/userDic.dic";
|
||||
/**
|
||||
* 配置属性——扩展字典
|
||||
*/
|
||||
private String extDic = "lucene/ext.dic;" + PATH_USER_DIC;
|
||||
/**
|
||||
* 配置属性——扩展停止词典
|
||||
*/
|
||||
private String extStopword = "lucene/stopword.dic";
|
||||
/**
|
||||
* 是否使用smart方式分词
|
||||
*/
|
||||
private boolean useSmart;
|
||||
|
||||
/**
|
||||
* 返回单例
|
||||
*
|
||||
* @return Configuration单例
|
||||
*/
|
||||
public static Configuration getInstance() {
|
||||
return new DefaultConfig();
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回useSmart标志位 useSmart =true ,分词器使用智能切分策略, =false则使用细粒度切分
|
||||
*
|
||||
* @return useSmart
|
||||
*/
|
||||
@Override
|
||||
public boolean useSmart() {
|
||||
return useSmart;
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置useSmart标志位 useSmart =true ,分词器使用智能切分策略, =false则使用细粒度切分
|
||||
*
|
||||
* @param useSmart
|
||||
*/
|
||||
@Override
|
||||
public void setUseSmart(boolean useSmart) {
|
||||
this.useSmart = useSmart;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取主词典路径
|
||||
*
|
||||
* @return String 主词典路径
|
||||
*/
|
||||
@Override
|
||||
public String getMainDictionary() {
|
||||
return PATH_DIC_MAIN;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取量词词典路径
|
||||
*
|
||||
* @return String 量词词典路径
|
||||
*/
|
||||
@Override
|
||||
public String getQuantifierDictionary() {
|
||||
return PATH_DIC_QUANTIFIER;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取扩展字典配置路径
|
||||
*
|
||||
* @return List<String> 相对类加载器的路径
|
||||
*/
|
||||
@Override
|
||||
public List<String> getExtDictionary() {
|
||||
List<String> extDictFiles = new ArrayList<String>(2);
|
||||
if (extDic != null) {
|
||||
// 使用;分割多个扩展字典配置
|
||||
String[] filePaths = extDic.split(";");
|
||||
for (String filePath : filePaths) {
|
||||
if (filePath != null && !"".equals(filePath.trim())) {
|
||||
extDictFiles.add(filePath.trim());
|
||||
}
|
||||
}
|
||||
/**
|
||||
* 返回单例
|
||||
*
|
||||
* @return Configuration单例
|
||||
*/
|
||||
public static Configuration getInstance() {
|
||||
return new DefaultConfig();
|
||||
}
|
||||
return extDictFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取扩展停止词典配置路径
|
||||
*
|
||||
* @return List<String> 相对类加载器的路径
|
||||
*/
|
||||
@Override
|
||||
public List<String> getExtStopWordDictionary() {
|
||||
List<String> extStopWordDictFiles = new ArrayList<>(2);
|
||||
if (extStopword != null) {
|
||||
// 使用;分割多个扩展字典配置
|
||||
String[] filePaths = extStopword.split(";");
|
||||
for (String filePath : filePaths) {
|
||||
if (filePath != null && !"".equals(filePath.trim())) {
|
||||
extStopWordDictFiles.add(filePath.trim());
|
||||
}
|
||||
}
|
||||
/**
|
||||
* 返回useSmart标志位 useSmart =true ,分词器使用智能切分策略, =false则使用细粒度切分
|
||||
*
|
||||
* @return useSmart
|
||||
*/
|
||||
@Override
|
||||
public boolean useSmart() {
|
||||
return useSmart;
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置useSmart标志位 useSmart =true ,分词器使用智能切分策略, =false则使用细粒度切分
|
||||
*
|
||||
* @param useSmart
|
||||
*/
|
||||
@Override
|
||||
public void setUseSmart(boolean useSmart) {
|
||||
this.useSmart = useSmart;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取主词典路径
|
||||
*
|
||||
* @return String 主词典路径
|
||||
*/
|
||||
@Override
|
||||
public String getMainDictionary() {
|
||||
return PATH_DIC_MAIN;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取量词词典路径
|
||||
*
|
||||
* @return String 量词词典路径
|
||||
*/
|
||||
@Override
|
||||
public String getQuantifierDictionary() {
|
||||
return PATH_DIC_QUANTIFIER;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取扩展字典配置路径
|
||||
*
|
||||
* @return List<String> 相对类加载器的路径
|
||||
*/
|
||||
@Override
|
||||
public List<String> getExtDictionary() {
|
||||
List<String> extDictFiles = new ArrayList<String>(2);
|
||||
if (extDic != null) {
|
||||
// 使用;分割多个扩展字典配置
|
||||
String[] filePaths = extDic.split(";");
|
||||
for (String filePath : filePaths) {
|
||||
if (filePath != null && !"".equals(filePath.trim())) {
|
||||
extDictFiles.add(filePath.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
return extDictFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取扩展停止词典配置路径
|
||||
*
|
||||
* @return List<String> 相对类加载器的路径
|
||||
*/
|
||||
@Override
|
||||
public List<String> getExtStopWordDictionary() {
|
||||
List<String> extStopWordDictFiles = new ArrayList<>(2);
|
||||
if (extStopword != null) {
|
||||
// 使用;分割多个扩展字典配置
|
||||
String[] filePaths = extStopword.split(";");
|
||||
for (String filePath : filePaths) {
|
||||
if (filePath != null && !"".equals(filePath.trim())) {
|
||||
extStopWordDictFiles.add(filePath.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
return extStopWordDictFiles;
|
||||
}
|
||||
return extStopWordDictFiles;
|
||||
}
|
||||
}
|
||||
|
@ -26,328 +26,364 @@ import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.*;
|
||||
|
||||
/** 分词器上下文状态 */
|
||||
/**
|
||||
* 分词器上下文状态
|
||||
*/
|
||||
class AnalyzeContext {
|
||||
|
||||
/** 默认缓冲区大小 */
|
||||
private static final int BUFF_SIZE = 4096;
|
||||
/** 缓冲区耗尽的临界值 */
|
||||
private static final int BUFF_EXHAUST_CRITICAL = 100;
|
||||
/** 字符窜读取缓冲 */
|
||||
private char[] segmentBuff;
|
||||
/** 字符类型数组 */
|
||||
private int[] charTypes;
|
||||
/** 记录Reader内已分析的字串总长度, 在分多段分析词元时,该变量累计当前的segmentBuff相对于reader起始位置的位移 */
|
||||
private int buffOffset;
|
||||
/** 当前缓冲区位置指针 */
|
||||
private int cursor;
|
||||
/** 最近一次读入的,可处理的字串长度 */
|
||||
private int available;
|
||||
/** 子分词器锁, 该集合非空,说明有子分词器在占用segmentBuff */
|
||||
private final Set<String> buffLocker;
|
||||
/** 原始分词结果集合,未经歧义处理 */
|
||||
private QuickSortSet orgLexemes;
|
||||
/** LexemePath位置索引表 */
|
||||
private final Map<Integer, LexemePath> pathMap;
|
||||
/** 最终分词结果集 */
|
||||
private final LinkedList<Lexeme> results;
|
||||
/** 分词器配置项 */
|
||||
private final Configuration cfg;
|
||||
/**
|
||||
* 默认缓冲区大小
|
||||
*/
|
||||
private static final int BUFF_SIZE = 4096;
|
||||
/**
|
||||
* 缓冲区耗尽的临界值
|
||||
*/
|
||||
private static final int BUFF_EXHAUST_CRITICAL = 100;
|
||||
/**
|
||||
* 字符窜读取缓冲
|
||||
*/
|
||||
private char[] segmentBuff;
|
||||
/**
|
||||
* 字符类型数组
|
||||
*/
|
||||
private int[] charTypes;
|
||||
/**
|
||||
* 记录Reader内已分析的字串总长度, 在分多段分析词元时,该变量累计当前的segmentBuff相对于reader起始位置的位移
|
||||
*/
|
||||
private int buffOffset;
|
||||
/**
|
||||
* 当前缓冲区位置指针
|
||||
*/
|
||||
private int cursor;
|
||||
/**
|
||||
* 最近一次读入的,可处理的字串长度
|
||||
*/
|
||||
private int available;
|
||||
/**
|
||||
* 子分词器锁, 该集合非空,说明有子分词器在占用segmentBuff
|
||||
*/
|
||||
private final Set<String> buffLocker;
|
||||
/**
|
||||
* 原始分词结果集合,未经歧义处理
|
||||
*/
|
||||
private QuickSortSet orgLexemes;
|
||||
/**
|
||||
* LexemePath位置索引表
|
||||
*/
|
||||
private final Map<Integer, LexemePath> pathMap;
|
||||
/**
|
||||
* 最终分词结果集
|
||||
*/
|
||||
private final LinkedList<Lexeme> results;
|
||||
/**
|
||||
* 分词器配置项
|
||||
*/
|
||||
private final Configuration cfg;
|
||||
|
||||
public AnalyzeContext(Configuration cfg) {
|
||||
this.cfg = cfg;
|
||||
this.segmentBuff = new char[BUFF_SIZE];
|
||||
this.charTypes = new int[BUFF_SIZE];
|
||||
this.buffLocker = new HashSet<>();
|
||||
this.orgLexemes = new QuickSortSet();
|
||||
this.pathMap = new HashMap<>();
|
||||
this.results = new LinkedList<>();
|
||||
}
|
||||
|
||||
int getCursor() {
|
||||
return this.cursor;
|
||||
}
|
||||
|
||||
char[] getSegmentBuff() {
|
||||
return this.segmentBuff;
|
||||
}
|
||||
|
||||
char getCurrentChar() {
|
||||
return this.segmentBuff[this.cursor];
|
||||
}
|
||||
|
||||
int getCurrentCharType() {
|
||||
return this.charTypes[this.cursor];
|
||||
}
|
||||
|
||||
int getBufferOffset() {
|
||||
return this.buffOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据context的上下文情况,填充segmentBuff
|
||||
*
|
||||
* @param reader
|
||||
* @return 返回待分析的(有效的)字串长度
|
||||
* @throws IOException
|
||||
*/
|
||||
int fillBuffer(Reader reader) throws IOException {
|
||||
int readCount = 0;
|
||||
if (this.buffOffset == 0) {
|
||||
// 首次读取reader
|
||||
readCount = reader.read(segmentBuff);
|
||||
} else {
|
||||
int offset = this.available - this.cursor;
|
||||
if (offset > 0) {
|
||||
// 最近一次读取的>最近一次处理的,将未处理的字串拷贝到segmentBuff头部
|
||||
System.arraycopy(this.segmentBuff, this.cursor, this.segmentBuff, 0, offset);
|
||||
readCount = offset;
|
||||
}
|
||||
// 继续读取reader ,以onceReadIn - onceAnalyzed为起始位置,继续填充segmentBuff剩余的部分
|
||||
readCount += reader.read(this.segmentBuff, offset, BUFF_SIZE - offset);
|
||||
public AnalyzeContext(Configuration cfg) {
|
||||
this.cfg = cfg;
|
||||
this.segmentBuff = new char[BUFF_SIZE];
|
||||
this.charTypes = new int[BUFF_SIZE];
|
||||
this.buffLocker = new HashSet<>();
|
||||
this.orgLexemes = new QuickSortSet();
|
||||
this.pathMap = new HashMap<>();
|
||||
this.results = new LinkedList<>();
|
||||
}
|
||||
// 记录最后一次从Reader中读入的可用字符长度
|
||||
this.available = readCount;
|
||||
// 重置当前指针
|
||||
this.cursor = 0;
|
||||
return readCount;
|
||||
}
|
||||
|
||||
/** 初始化buff指针,处理第一个字符 */
|
||||
void initCursor() {
|
||||
this.cursor = 0;
|
||||
this.segmentBuff[this.cursor] = CharacterUtil.regularize(this.segmentBuff[this.cursor]);
|
||||
this.charTypes[this.cursor] = CharacterUtil.identifyCharType(this.segmentBuff[this.cursor]);
|
||||
}
|
||||
|
||||
/** 指针+1 成功返回 true; 指针已经到了buff尾部,不能前进,返回false 并处理当前字符 */
|
||||
boolean moveCursor() {
|
||||
if (this.cursor < this.available - 1) {
|
||||
this.cursor++;
|
||||
this.segmentBuff[this.cursor] = CharacterUtil.regularize(this.segmentBuff[this.cursor]);
|
||||
this.charTypes[this.cursor] = CharacterUtil.identifyCharType(this.segmentBuff[this.cursor]);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
int getCursor() {
|
||||
return this.cursor;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置当前segmentBuff为锁定状态 加入占用segmentBuff的子分词器名称,表示占用segmentBuff
|
||||
*
|
||||
* @param segmenterName
|
||||
*/
|
||||
void lockBuffer(String segmenterName) {
|
||||
this.buffLocker.add(segmenterName);
|
||||
}
|
||||
|
||||
/**
|
||||
* 移除指定的子分词器名,释放对segmentBuff的占用
|
||||
*
|
||||
* @param segmenterName
|
||||
*/
|
||||
void unlockBuffer(String segmenterName) {
|
||||
this.buffLocker.remove(segmenterName);
|
||||
}
|
||||
|
||||
/**
|
||||
* 只要buffLocker中存在segmenterName 则buffer被锁定
|
||||
*
|
||||
* @return boolean 缓冲去是否被锁定
|
||||
*/
|
||||
boolean isBufferLocked() {
|
||||
return this.buffLocker.size() > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断当前segmentBuff是否已经用完 当前执针cursor移至segmentBuff末端this.available - 1
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
boolean isBufferConsumed() {
|
||||
return this.cursor == this.available - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断segmentBuff是否需要读取新数据
|
||||
*
|
||||
* <p>满足一下条件时, 1.available == BUFF_SIZE 表示buffer满载 2.buffIndex < available - 1 && buffIndex >
|
||||
* available - BUFF_EXHAUST_CRITICAL表示当前指针处于临界区内 3.!context.isBufferLocked()表示没有segmenter在占用buffer
|
||||
* 要中断当前循环(buffer要进行移位,并再读取数据的操作)
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
boolean needRefillBuffer() {
|
||||
return this.available == BUFF_SIZE
|
||||
&& this.cursor < this.available - 1
|
||||
&& this.cursor > this.available - BUFF_EXHAUST_CRITICAL
|
||||
&& !this.isBufferLocked();
|
||||
}
|
||||
|
||||
/** 累计当前的segmentBuff相对于reader起始位置的位移 */
|
||||
void markBufferOffset() {
|
||||
this.buffOffset += this.cursor;
|
||||
}
|
||||
|
||||
/**
|
||||
* 向分词结果集添加词元
|
||||
*
|
||||
* @param lexeme
|
||||
*/
|
||||
void addLexeme(Lexeme lexeme) {
|
||||
this.orgLexemes.addLexeme(lexeme);
|
||||
}
|
||||
|
||||
/**
|
||||
* 添加分词结果路径 路径起始位置 ---> 路径 映射表
|
||||
*
|
||||
* @param path
|
||||
*/
|
||||
void addLexemePath(LexemePath path) {
|
||||
if (path != null) {
|
||||
this.pathMap.put(path.getPathBegin(), path);
|
||||
char[] getSegmentBuff() {
|
||||
return this.segmentBuff;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回原始分词结果
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
QuickSortSet getOrgLexemes() {
|
||||
return this.orgLexemes;
|
||||
}
|
||||
char getCurrentChar() {
|
||||
return this.segmentBuff[this.cursor];
|
||||
}
|
||||
|
||||
/**
|
||||
* 推送分词结果到结果集合 1.从buff头部遍历到this.cursor已处理位置 2.将map中存在的分词结果推入results
|
||||
* 3.将map中不存在的CJDK字符以单字方式推入results
|
||||
*/
|
||||
void outputToResult() {
|
||||
int index = 0;
|
||||
while (index <= this.cursor) {
|
||||
// 跳过非CJK字符
|
||||
if (CharacterUtil.CHAR_USELESS == this.charTypes[index]) {
|
||||
index++;
|
||||
continue;
|
||||
}
|
||||
// 从pathMap找出对应index位置的LexemePath
|
||||
LexemePath path = this.pathMap.get(index);
|
||||
if (path != null) {
|
||||
// 输出LexemePath中的lexeme到results集合
|
||||
Lexeme l = path.pollFirst();
|
||||
while (l != null) {
|
||||
this.results.add(l);
|
||||
// 将index移至lexeme后
|
||||
index = l.getBegin() + l.getLength();
|
||||
l = path.pollFirst();
|
||||
if (l != null) {
|
||||
// 输出path内部,词元间遗漏的单字
|
||||
for (; index < l.getBegin(); index++) {
|
||||
this.outputSingleCJK(index);
|
||||
int getCurrentCharType() {
|
||||
return this.charTypes[this.cursor];
|
||||
}
|
||||
|
||||
int getBufferOffset() {
|
||||
return this.buffOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据context的上下文情况,填充segmentBuff
|
||||
*
|
||||
* @param reader
|
||||
* @return 返回待分析的(有效的)字串长度
|
||||
* @throws IOException
|
||||
*/
|
||||
int fillBuffer(Reader reader) throws IOException {
|
||||
int readCount = 0;
|
||||
if (this.buffOffset == 0) {
|
||||
// 首次读取reader
|
||||
readCount = reader.read(segmentBuff);
|
||||
} else {
|
||||
int offset = this.available - this.cursor;
|
||||
if (offset > 0) {
|
||||
// 最近一次读取的>最近一次处理的,将未处理的字串拷贝到segmentBuff头部
|
||||
System.arraycopy(this.segmentBuff, this.cursor, this.segmentBuff, 0, offset);
|
||||
readCount = offset;
|
||||
}
|
||||
}
|
||||
// 继续读取reader ,以onceReadIn - onceAnalyzed为起始位置,继续填充segmentBuff剩余的部分
|
||||
readCount += reader.read(this.segmentBuff, offset, BUFF_SIZE - offset);
|
||||
}
|
||||
} else { // pathMap中找不到index对应的LexemePath
|
||||
// 单字输出
|
||||
this.outputSingleCJK(index);
|
||||
index++;
|
||||
}
|
||||
// 记录最后一次从Reader中读入的可用字符长度
|
||||
this.available = readCount;
|
||||
// 重置当前指针
|
||||
this.cursor = 0;
|
||||
return readCount;
|
||||
}
|
||||
// 清空当前的Map
|
||||
this.pathMap.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* 对CJK字符进行单字输出
|
||||
*
|
||||
* @param index
|
||||
*/
|
||||
private void outputSingleCJK(int index) {
|
||||
if (CharacterUtil.CHAR_CHINESE == this.charTypes[index]) {
|
||||
Lexeme singleCharLexeme = new Lexeme(this.buffOffset, index, 1, Lexeme.TYPE_CNCHAR);
|
||||
this.results.add(singleCharLexeme);
|
||||
} else if (CharacterUtil.CHAR_OTHER_CJK == this.charTypes[index]) {
|
||||
Lexeme singleCharLexeme = new Lexeme(this.buffOffset, index, 1, Lexeme.TYPE_OTHER_CJK);
|
||||
this.results.add(singleCharLexeme);
|
||||
/**
|
||||
* 初始化buff指针,处理第一个字符
|
||||
*/
|
||||
void initCursor() {
|
||||
this.cursor = 0;
|
||||
this.segmentBuff[this.cursor] = CharacterUtil.regularize(this.segmentBuff[this.cursor]);
|
||||
this.charTypes[this.cursor] = CharacterUtil.identifyCharType(this.segmentBuff[this.cursor]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回lexeme
|
||||
*
|
||||
* <p>同时处理合并
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
Lexeme getNextLexeme() {
|
||||
// 从结果集取出,并移除第一个Lexme
|
||||
Lexeme result = this.results.pollFirst();
|
||||
while (result != null) {
|
||||
// 数量词合并
|
||||
this.compound(result);
|
||||
if (Dictionary.getSingleton()
|
||||
.isStopWord(this.segmentBuff, result.getBegin(), result.getLength())) {
|
||||
// 是停止词继续取列表的下一个
|
||||
result = this.results.pollFirst();
|
||||
} else {
|
||||
// 不是停止词, 生成lexeme的词元文本,输出
|
||||
result.setLexemeText(String.valueOf(segmentBuff, result.getBegin(), result.getLength()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** 重置分词上下文状态 */
|
||||
void reset() {
|
||||
this.buffLocker.clear();
|
||||
this.orgLexemes = new QuickSortSet();
|
||||
this.available = 0;
|
||||
this.buffOffset = 0;
|
||||
this.charTypes = new int[BUFF_SIZE];
|
||||
this.cursor = 0;
|
||||
this.results.clear();
|
||||
this.segmentBuff = new char[BUFF_SIZE];
|
||||
this.pathMap.clear();
|
||||
}
|
||||
|
||||
/** 组合词元 */
|
||||
private void compound(Lexeme result) {
|
||||
if (!this.cfg.useSmart()) {
|
||||
return;
|
||||
}
|
||||
// 数量词合并处理
|
||||
if (!this.results.isEmpty()) {
|
||||
|
||||
if (Lexeme.TYPE_ARABIC == result.getLexemeType()) {
|
||||
Lexeme nextLexeme = this.results.peekFirst();
|
||||
boolean appendOk = false;
|
||||
if (Lexeme.TYPE_CNUM == nextLexeme.getLexemeType()) {
|
||||
// 合并英文数词+中文数词
|
||||
appendOk = result.append(nextLexeme, Lexeme.TYPE_CNUM);
|
||||
} else if (Lexeme.TYPE_COUNT == nextLexeme.getLexemeType()) {
|
||||
// 合并英文数词+中文量词
|
||||
appendOk = result.append(nextLexeme, Lexeme.TYPE_CQUAN);
|
||||
/**
|
||||
* 指针+1 成功返回 true; 指针已经到了buff尾部,不能前进,返回false 并处理当前字符
|
||||
*/
|
||||
boolean moveCursor() {
|
||||
if (this.cursor < this.available - 1) {
|
||||
this.cursor++;
|
||||
this.segmentBuff[this.cursor] = CharacterUtil.regularize(this.segmentBuff[this.cursor]);
|
||||
this.charTypes[this.cursor] = CharacterUtil.identifyCharType(this.segmentBuff[this.cursor]);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置当前segmentBuff为锁定状态 加入占用segmentBuff的子分词器名称,表示占用segmentBuff
|
||||
*
|
||||
* @param segmenterName
|
||||
*/
|
||||
void lockBuffer(String segmenterName) {
|
||||
this.buffLocker.add(segmenterName);
|
||||
}
|
||||
|
||||
/**
|
||||
* 移除指定的子分词器名,释放对segmentBuff的占用
|
||||
*
|
||||
* @param segmenterName
|
||||
*/
|
||||
void unlockBuffer(String segmenterName) {
|
||||
this.buffLocker.remove(segmenterName);
|
||||
}
|
||||
|
||||
/**
|
||||
* 只要buffLocker中存在segmenterName 则buffer被锁定
|
||||
*
|
||||
* @return boolean 缓冲去是否被锁定
|
||||
*/
|
||||
boolean isBufferLocked() {
|
||||
return this.buffLocker.size() > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断当前segmentBuff是否已经用完 当前执针cursor移至segmentBuff末端this.available - 1
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
boolean isBufferConsumed() {
|
||||
return this.cursor == this.available - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断segmentBuff是否需要读取新数据
|
||||
*
|
||||
* <p>满足一下条件时, 1.available == BUFF_SIZE 表示buffer满载 2.buffIndex < available - 1 && buffIndex >
|
||||
* available - BUFF_EXHAUST_CRITICAL表示当前指针处于临界区内 3.!context.isBufferLocked()表示没有segmenter在占用buffer
|
||||
* 要中断当前循环(buffer要进行移位,并再读取数据的操作)
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
boolean needRefillBuffer() {
|
||||
return this.available == BUFF_SIZE
|
||||
&& this.cursor < this.available - 1
|
||||
&& this.cursor > this.available - BUFF_EXHAUST_CRITICAL
|
||||
&& !this.isBufferLocked();
|
||||
}
|
||||
|
||||
/**
|
||||
* 累计当前的segmentBuff相对于reader起始位置的位移
|
||||
*/
|
||||
void markBufferOffset() {
|
||||
this.buffOffset += this.cursor;
|
||||
}
|
||||
|
||||
/**
|
||||
* 向分词结果集添加词元
|
||||
*
|
||||
* @param lexeme
|
||||
*/
|
||||
void addLexeme(Lexeme lexeme) {
|
||||
this.orgLexemes.addLexeme(lexeme);
|
||||
}
|
||||
|
||||
/**
|
||||
* 添加分词结果路径 路径起始位置 ---> 路径 映射表
|
||||
*
|
||||
* @param path
|
||||
*/
|
||||
void addLexemePath(LexemePath path) {
|
||||
if (path != null) {
|
||||
this.pathMap.put(path.getPathBegin(), path);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回原始分词结果
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
QuickSortSet getOrgLexemes() {
|
||||
return this.orgLexemes;
|
||||
}
|
||||
|
||||
/**
|
||||
* 推送分词结果到结果集合 1.从buff头部遍历到this.cursor已处理位置 2.将map中存在的分词结果推入results
|
||||
* 3.将map中不存在的CJDK字符以单字方式推入results
|
||||
*/
|
||||
void outputToResult() {
|
||||
int index = 0;
|
||||
while (index <= this.cursor) {
|
||||
// 跳过非CJK字符
|
||||
if (CharacterUtil.CHAR_USELESS == this.charTypes[index]) {
|
||||
index++;
|
||||
continue;
|
||||
}
|
||||
// 从pathMap找出对应index位置的LexemePath
|
||||
LexemePath path = this.pathMap.get(index);
|
||||
if (path != null) {
|
||||
// 输出LexemePath中的lexeme到results集合
|
||||
Lexeme l = path.pollFirst();
|
||||
while (l != null) {
|
||||
this.results.add(l);
|
||||
// 将index移至lexeme后
|
||||
index = l.getBegin() + l.getLength();
|
||||
l = path.pollFirst();
|
||||
if (l != null) {
|
||||
// 输出path内部,词元间遗漏的单字
|
||||
for (; index < l.getBegin(); index++) {
|
||||
this.outputSingleCJK(index);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else { // pathMap中找不到index对应的LexemePath
|
||||
// 单字输出
|
||||
this.outputSingleCJK(index);
|
||||
index++;
|
||||
}
|
||||
}
|
||||
// 清空当前的Map
|
||||
this.pathMap.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* 对CJK字符进行单字输出
|
||||
*
|
||||
* @param index
|
||||
*/
|
||||
private void outputSingleCJK(int index) {
|
||||
if (CharacterUtil.CHAR_CHINESE == this.charTypes[index]) {
|
||||
Lexeme singleCharLexeme = new Lexeme(this.buffOffset, index, 1, Lexeme.TYPE_CNCHAR);
|
||||
this.results.add(singleCharLexeme);
|
||||
} else if (CharacterUtil.CHAR_OTHER_CJK == this.charTypes[index]) {
|
||||
Lexeme singleCharLexeme = new Lexeme(this.buffOffset, index, 1, Lexeme.TYPE_OTHER_CJK);
|
||||
this.results.add(singleCharLexeme);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回lexeme
|
||||
*
|
||||
* <p>同时处理合并
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
Lexeme getNextLexeme() {
|
||||
// 从结果集取出,并移除第一个Lexme
|
||||
Lexeme result = this.results.pollFirst();
|
||||
while (result != null) {
|
||||
// 数量词合并
|
||||
this.compound(result);
|
||||
if (Dictionary.getSingleton()
|
||||
.isStopWord(this.segmentBuff, result.getBegin(), result.getLength())) {
|
||||
// 是停止词继续取列表的下一个
|
||||
result = this.results.pollFirst();
|
||||
} else {
|
||||
// 不是停止词, 生成lexeme的词元文本,输出
|
||||
result.setLexemeText(String.valueOf(segmentBuff, result.getBegin(), result.getLength()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 重置分词上下文状态
|
||||
*/
|
||||
void reset() {
|
||||
this.buffLocker.clear();
|
||||
this.orgLexemes = new QuickSortSet();
|
||||
this.available = 0;
|
||||
this.buffOffset = 0;
|
||||
this.charTypes = new int[BUFF_SIZE];
|
||||
this.cursor = 0;
|
||||
this.results.clear();
|
||||
this.segmentBuff = new char[BUFF_SIZE];
|
||||
this.pathMap.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* 组合词元
|
||||
*/
|
||||
private void compound(Lexeme result) {
|
||||
if (!this.cfg.useSmart()) {
|
||||
return;
|
||||
}
|
||||
// 数量词合并处理
|
||||
if (!this.results.isEmpty()) {
|
||||
|
||||
if (Lexeme.TYPE_ARABIC == result.getLexemeType()) {
|
||||
Lexeme nextLexeme = this.results.peekFirst();
|
||||
boolean appendOk = false;
|
||||
if (Lexeme.TYPE_CNUM == nextLexeme.getLexemeType()) {
|
||||
// 合并英文数词+中文数词
|
||||
appendOk = result.append(nextLexeme, Lexeme.TYPE_CNUM);
|
||||
} else if (Lexeme.TYPE_COUNT == nextLexeme.getLexemeType()) {
|
||||
// 合并英文数词+中文量词
|
||||
appendOk = result.append(nextLexeme, Lexeme.TYPE_CQUAN);
|
||||
}
|
||||
if (appendOk) {
|
||||
// 弹出
|
||||
this.results.pollFirst();
|
||||
}
|
||||
}
|
||||
|
||||
// 可能存在第二轮合并
|
||||
if (Lexeme.TYPE_CNUM == result.getLexemeType() && !this.results.isEmpty()) {
|
||||
Lexeme nextLexeme = this.results.peekFirst();
|
||||
boolean appendOk = false;
|
||||
if (Lexeme.TYPE_COUNT == nextLexeme.getLexemeType()) {
|
||||
// 合并中文数词+中文量词
|
||||
appendOk = result.append(nextLexeme, Lexeme.TYPE_CQUAN);
|
||||
}
|
||||
if (appendOk) {
|
||||
// 弹出
|
||||
this.results.pollFirst();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (appendOk) {
|
||||
// 弹出
|
||||
this.results.pollFirst();
|
||||
}
|
||||
}
|
||||
|
||||
// 可能存在第二轮合并
|
||||
if (Lexeme.TYPE_CNUM == result.getLexemeType() && !this.results.isEmpty()) {
|
||||
Lexeme nextLexeme = this.results.peekFirst();
|
||||
boolean appendOk = false;
|
||||
if (Lexeme.TYPE_COUNT == nextLexeme.getLexemeType()) {
|
||||
// 合并中文数词+中文量词
|
||||
appendOk = result.append(nextLexeme, Lexeme.TYPE_CQUAN);
|
||||
}
|
||||
if (appendOk) {
|
||||
// 弹出
|
||||
this.results.pollFirst();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -25,95 +25,101 @@ import com.rymcu.forest.lucene.dic.Hit;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/** 中文-日韩文子分词器 */
|
||||
/**
|
||||
* 中文-日韩文子分词器
|
||||
*/
|
||||
class CJKSegmenter implements ISegmenter {
|
||||
|
||||
/** 子分词器标签 */
|
||||
static final String SEGMENTER_NAME = "CJK_SEGMENTER";
|
||||
/** 待处理的分词hit队列 */
|
||||
private List<Hit> tmpHits;
|
||||
/**
|
||||
* 子分词器标签
|
||||
*/
|
||||
static final String SEGMENTER_NAME = "CJK_SEGMENTER";
|
||||
/**
|
||||
* 待处理的分词hit队列
|
||||
*/
|
||||
private List<Hit> tmpHits;
|
||||
|
||||
CJKSegmenter() {
|
||||
this.tmpHits = new LinkedList<Hit>();
|
||||
}
|
||||
CJKSegmenter() {
|
||||
this.tmpHits = new LinkedList<Hit>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void analyze(AnalyzeContext context) {
|
||||
if (CharacterUtil.CHAR_USELESS != context.getCurrentCharType()) {
|
||||
@Override
|
||||
public void analyze(AnalyzeContext context) {
|
||||
if (CharacterUtil.CHAR_USELESS != context.getCurrentCharType()) {
|
||||
|
||||
// 优先处理tmpHits中的hit
|
||||
if (!this.tmpHits.isEmpty()) {
|
||||
// 处理词段队列
|
||||
Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
|
||||
for (Hit hit : tmpArray) {
|
||||
hit =
|
||||
Dictionary.getSingleton()
|
||||
.matchWithHit(context.getSegmentBuff(), context.getCursor(), hit);
|
||||
if (hit.isMatch()) {
|
||||
// 输出当前的词
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
hit.getBegin(),
|
||||
context.getCursor() - hit.getBegin() + 1,
|
||||
Lexeme.TYPE_CNWORD);
|
||||
context.addLexeme(newLexeme);
|
||||
// 优先处理tmpHits中的hit
|
||||
if (!this.tmpHits.isEmpty()) {
|
||||
// 处理词段队列
|
||||
Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
|
||||
for (Hit hit : tmpArray) {
|
||||
hit =
|
||||
Dictionary.getSingleton()
|
||||
.matchWithHit(context.getSegmentBuff(), context.getCursor(), hit);
|
||||
if (hit.isMatch()) {
|
||||
// 输出当前的词
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
hit.getBegin(),
|
||||
context.getCursor() - hit.getBegin() + 1,
|
||||
Lexeme.TYPE_CNWORD);
|
||||
context.addLexeme(newLexeme);
|
||||
|
||||
if (!hit.isPrefix()) { // 不是词前缀,hit不需要继续匹配,移除
|
||||
this.tmpHits.remove(hit);
|
||||
if (!hit.isPrefix()) { // 不是词前缀,hit不需要继续匹配,移除
|
||||
this.tmpHits.remove(hit);
|
||||
}
|
||||
|
||||
} else if (hit.isUnmatch()) {
|
||||
// hit不是词,移除
|
||||
this.tmpHits.remove(hit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else if (hit.isUnmatch()) {
|
||||
// hit不是词,移除
|
||||
this.tmpHits.remove(hit);
|
||||
}
|
||||
// 再对当前指针位置的字符进行单字匹配
|
||||
Hit singleCharHit =
|
||||
Dictionary.getSingleton()
|
||||
.matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
|
||||
if (singleCharHit.isMatch()) { // 首字成词
|
||||
// 输出当前的词
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(context.getBufferOffset(), context.getCursor(), 1, Lexeme.TYPE_CNWORD);
|
||||
context.addLexeme(newLexeme);
|
||||
|
||||
// 同时也是词前缀
|
||||
if (singleCharHit.isPrefix()) {
|
||||
// 前缀匹配则放入hit列表
|
||||
this.tmpHits.add(singleCharHit);
|
||||
}
|
||||
} else if (singleCharHit.isPrefix()) { // 首字为词前缀
|
||||
// 前缀匹配则放入hit列表
|
||||
this.tmpHits.add(singleCharHit);
|
||||
}
|
||||
|
||||
} else {
|
||||
// 遇到CHAR_USELESS字符
|
||||
// 清空队列
|
||||
this.tmpHits.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// 再对当前指针位置的字符进行单字匹配
|
||||
Hit singleCharHit =
|
||||
Dictionary.getSingleton()
|
||||
.matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
|
||||
if (singleCharHit.isMatch()) { // 首字成词
|
||||
// 输出当前的词
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(context.getBufferOffset(), context.getCursor(), 1, Lexeme.TYPE_CNWORD);
|
||||
context.addLexeme(newLexeme);
|
||||
|
||||
// 同时也是词前缀
|
||||
if (singleCharHit.isPrefix()) {
|
||||
// 前缀匹配则放入hit列表
|
||||
this.tmpHits.add(singleCharHit);
|
||||
// 判断缓冲区是否已经读完
|
||||
if (context.isBufferConsumed()) {
|
||||
// 清空队列
|
||||
this.tmpHits.clear();
|
||||
}
|
||||
} else if (singleCharHit.isPrefix()) { // 首字为词前缀
|
||||
// 前缀匹配则放入hit列表
|
||||
this.tmpHits.add(singleCharHit);
|
||||
}
|
||||
|
||||
} else {
|
||||
// 遇到CHAR_USELESS字符
|
||||
// 清空队列
|
||||
this.tmpHits.clear();
|
||||
// 判断是否锁定缓冲区
|
||||
if (this.tmpHits.size() == 0) {
|
||||
context.unlockBuffer(SEGMENTER_NAME);
|
||||
|
||||
} else {
|
||||
context.lockBuffer(SEGMENTER_NAME);
|
||||
}
|
||||
}
|
||||
|
||||
// 判断缓冲区是否已经读完
|
||||
if (context.isBufferConsumed()) {
|
||||
// 清空队列
|
||||
this.tmpHits.clear();
|
||||
@Override
|
||||
public void reset() {
|
||||
// 清空队列
|
||||
this.tmpHits.clear();
|
||||
}
|
||||
|
||||
// 判断是否锁定缓冲区
|
||||
if (this.tmpHits.size() == 0) {
|
||||
context.unlockBuffer(SEGMENTER_NAME);
|
||||
|
||||
} else {
|
||||
context.lockBuffer(SEGMENTER_NAME);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
// 清空队列
|
||||
this.tmpHits.clear();
|
||||
}
|
||||
}
|
||||
|
@ -27,200 +27,218 @@ import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/** 中文数量词子分词器 */
|
||||
/**
|
||||
* 中文数量词子分词器
|
||||
*/
|
||||
class CN_QuantifierSegmenter implements ISegmenter {
|
||||
|
||||
/** 子分词器标签 */
|
||||
static final String SEGMENTER_NAME = "QUAN_SEGMENTER";
|
||||
/**
|
||||
* 子分词器标签
|
||||
*/
|
||||
static final String SEGMENTER_NAME = "QUAN_SEGMENTER";
|
||||
|
||||
/** 中文数词 */
|
||||
private static String Chn_Num = "一二两三四五六七八九十零壹贰叁肆伍陆柒捌玖拾百千万亿拾佰仟萬億兆卅廿";
|
||||
/**
|
||||
* 中文数词
|
||||
*/
|
||||
private static String Chn_Num = "一二两三四五六七八九十零壹贰叁肆伍陆柒捌玖拾百千万亿拾佰仟萬億兆卅廿";
|
||||
|
||||
private static Set<Character> ChnNumberChars = new HashSet<>();
|
||||
private static Set<Character> ChnNumberChars = new HashSet<>();
|
||||
|
||||
static {
|
||||
char[] ca = Chn_Num.toCharArray();
|
||||
for (char nChar : ca) {
|
||||
ChnNumberChars.add(nChar);
|
||||
static {
|
||||
char[] ca = Chn_Num.toCharArray();
|
||||
for (char nChar : ca) {
|
||||
ChnNumberChars.add(nChar);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** 词元的开始位置, 同时作为子分词器状态标识 当start > -1 时,标识当前的分词器正在处理字符 */
|
||||
private int nStart;
|
||||
/** 记录词元结束位置 end记录的是在词元中最后一个出现的合理的数词结束 */
|
||||
private int nEnd;
|
||||
/**
|
||||
* 词元的开始位置, 同时作为子分词器状态标识 当start > -1 时,标识当前的分词器正在处理字符
|
||||
*/
|
||||
private int nStart;
|
||||
/**
|
||||
* 记录词元结束位置 end记录的是在词元中最后一个出现的合理的数词结束
|
||||
*/
|
||||
private int nEnd;
|
||||
|
||||
/** 待处理的量词hit队列 */
|
||||
private final List<Hit> countHits;
|
||||
/**
|
||||
* 待处理的量词hit队列
|
||||
*/
|
||||
private final List<Hit> countHits;
|
||||
|
||||
CN_QuantifierSegmenter() {
|
||||
nStart = -1;
|
||||
nEnd = -1;
|
||||
this.countHits = new LinkedList<Hit>();
|
||||
}
|
||||
|
||||
/** 分词 */
|
||||
@Override
|
||||
public void analyze(AnalyzeContext context) {
|
||||
// 处理中文数词
|
||||
this.processCNumber(context);
|
||||
// 处理中文量词
|
||||
this.processCount(context);
|
||||
// 判断是否锁定缓冲区
|
||||
if (this.nStart == -1 && this.nEnd == -1 && countHits.isEmpty()) {
|
||||
// 对缓冲区解锁
|
||||
context.unlockBuffer(SEGMENTER_NAME);
|
||||
} else {
|
||||
context.lockBuffer(SEGMENTER_NAME);
|
||||
}
|
||||
}
|
||||
|
||||
/** 重置子分词器状态 */
|
||||
@Override
|
||||
public void reset() {
|
||||
nStart = -1;
|
||||
nEnd = -1;
|
||||
countHits.clear();
|
||||
}
|
||||
|
||||
/** 处理数词 */
|
||||
private void processCNumber(AnalyzeContext context) {
|
||||
if (nStart == -1 && nEnd == -1) { // 初始状态
|
||||
if (CharacterUtil.CHAR_CHINESE == context.getCurrentCharType()
|
||||
&& ChnNumberChars.contains(context.getCurrentChar())) {
|
||||
// 记录数词的起始、结束位置
|
||||
nStart = context.getCursor();
|
||||
nEnd = context.getCursor();
|
||||
}
|
||||
} else { // 正在处理状态
|
||||
if (CharacterUtil.CHAR_CHINESE == context.getCurrentCharType()
|
||||
&& ChnNumberChars.contains(context.getCurrentChar())) {
|
||||
// 记录数词的结束位置
|
||||
nEnd = context.getCursor();
|
||||
} else {
|
||||
// 输出数词
|
||||
this.outputNumLexeme(context);
|
||||
// 重置头尾指针
|
||||
CN_QuantifierSegmenter() {
|
||||
nStart = -1;
|
||||
nEnd = -1;
|
||||
}
|
||||
this.countHits = new LinkedList<Hit>();
|
||||
}
|
||||
|
||||
// 缓冲区已经用完,还有尚未输出的数词
|
||||
if (context.isBufferConsumed()) {
|
||||
if (nStart != -1 && nEnd != -1) {
|
||||
// 输出数词
|
||||
outputNumLexeme(context);
|
||||
// 重置头尾指针
|
||||
/**
|
||||
* 分词
|
||||
*/
|
||||
@Override
|
||||
public void analyze(AnalyzeContext context) {
|
||||
// 处理中文数词
|
||||
this.processCNumber(context);
|
||||
// 处理中文量词
|
||||
this.processCount(context);
|
||||
// 判断是否锁定缓冲区
|
||||
if (this.nStart == -1 && this.nEnd == -1 && countHits.isEmpty()) {
|
||||
// 对缓冲区解锁
|
||||
context.unlockBuffer(SEGMENTER_NAME);
|
||||
} else {
|
||||
context.lockBuffer(SEGMENTER_NAME);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 重置子分词器状态
|
||||
*/
|
||||
@Override
|
||||
public void reset() {
|
||||
nStart = -1;
|
||||
nEnd = -1;
|
||||
}
|
||||
countHits.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理中文量词
|
||||
*
|
||||
* @param context
|
||||
*/
|
||||
private void processCount(AnalyzeContext context) {
|
||||
// 判断是否需要启动量词扫描
|
||||
if (!this.needCountScan(context)) {
|
||||
return;
|
||||
}
|
||||
if (CharacterUtil.CHAR_CHINESE == context.getCurrentCharType()) {
|
||||
// 优先处理countHits中的hit
|
||||
if (!this.countHits.isEmpty()) {
|
||||
// 处理词段队列
|
||||
Hit[] tmpArray = this.countHits.toArray(new Hit[this.countHits.size()]);
|
||||
for (Hit hit : tmpArray) {
|
||||
hit =
|
||||
Dictionary.getSingleton()
|
||||
.matchWithHit(context.getSegmentBuff(), context.getCursor(), hit);
|
||||
if (hit.isMatch()) {
|
||||
// 输出当前的词
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
hit.getBegin(),
|
||||
context.getCursor() - hit.getBegin() + 1,
|
||||
Lexeme.TYPE_COUNT);
|
||||
context.addLexeme(newLexeme);
|
||||
|
||||
if (!hit.isPrefix()) { // 不是词前缀,hit不需要继续匹配,移除
|
||||
this.countHits.remove(hit);
|
||||
/**
|
||||
* 处理数词
|
||||
*/
|
||||
private void processCNumber(AnalyzeContext context) {
|
||||
if (nStart == -1 && nEnd == -1) { // 初始状态
|
||||
if (CharacterUtil.CHAR_CHINESE == context.getCurrentCharType()
|
||||
&& ChnNumberChars.contains(context.getCurrentChar())) {
|
||||
// 记录数词的起始、结束位置
|
||||
nStart = context.getCursor();
|
||||
nEnd = context.getCursor();
|
||||
}
|
||||
} else { // 正在处理状态
|
||||
if (CharacterUtil.CHAR_CHINESE == context.getCurrentCharType()
|
||||
&& ChnNumberChars.contains(context.getCurrentChar())) {
|
||||
// 记录数词的结束位置
|
||||
nEnd = context.getCursor();
|
||||
} else {
|
||||
// 输出数词
|
||||
this.outputNumLexeme(context);
|
||||
// 重置头尾指针
|
||||
nStart = -1;
|
||||
nEnd = -1;
|
||||
}
|
||||
|
||||
} else if (hit.isUnmatch()) {
|
||||
// hit不是词,移除
|
||||
this.countHits.remove(hit);
|
||||
}
|
||||
}
|
||||
}
|
||||
// 对当前指针位置的字符进行单字匹配
|
||||
Hit singleCharHit =
|
||||
Dictionary.getSingleton()
|
||||
.matchInQuantifierDict(context.getSegmentBuff(), context.getCursor(), 1);
|
||||
if (singleCharHit.isMatch()) { // 首字成量词词
|
||||
// 输出当前的词
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(context.getBufferOffset(), context.getCursor(), 1, Lexeme.TYPE_COUNT);
|
||||
context.addLexeme(newLexeme);
|
||||
// 同时也是词前缀
|
||||
if (singleCharHit.isPrefix()) {
|
||||
// 前缀匹配则放入hit列表
|
||||
this.countHits.add(singleCharHit);
|
||||
}
|
||||
} else if (singleCharHit.isPrefix()) { // 首字为量词前缀
|
||||
// 前缀匹配则放入hit列表
|
||||
this.countHits.add(singleCharHit);
|
||||
}
|
||||
} else {
|
||||
// 输入的不是中文字符
|
||||
// 清空未成形的量词
|
||||
this.countHits.clear();
|
||||
}
|
||||
// 缓冲区数据已经读完,还有尚未输出的量词
|
||||
if (context.isBufferConsumed()) {
|
||||
// 清空未成形的量词
|
||||
this.countHits.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否需要扫描量词
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private boolean needCountScan(AnalyzeContext context) {
|
||||
if ((nStart != -1 && nEnd != -1) || !countHits.isEmpty()) {
|
||||
// 正在处理中文数词,或者正在处理量词
|
||||
return true;
|
||||
} else {
|
||||
// 找到一个相邻的数词
|
||||
if (!context.getOrgLexemes().isEmpty()) {
|
||||
Lexeme l = context.getOrgLexemes().peekLast();
|
||||
if (Lexeme.TYPE_CNUM == l.getLexemeType() || Lexeme.TYPE_ARABIC == l.getLexemeType()) {
|
||||
if (l.getBegin() + l.getLength() == context.getCursor()) {
|
||||
// 缓冲区已经用完,还有尚未输出的数词
|
||||
if (context.isBufferConsumed()) {
|
||||
if (nStart != -1 && nEnd != -1) {
|
||||
// 输出数词
|
||||
outputNumLexeme(context);
|
||||
// 重置头尾指针
|
||||
nStart = -1;
|
||||
nEnd = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理中文量词
|
||||
*
|
||||
* @param context
|
||||
*/
|
||||
private void processCount(AnalyzeContext context) {
|
||||
// 判断是否需要启动量词扫描
|
||||
if (!this.needCountScan(context)) {
|
||||
return;
|
||||
}
|
||||
if (CharacterUtil.CHAR_CHINESE == context.getCurrentCharType()) {
|
||||
// 优先处理countHits中的hit
|
||||
if (!this.countHits.isEmpty()) {
|
||||
// 处理词段队列
|
||||
Hit[] tmpArray = this.countHits.toArray(new Hit[this.countHits.size()]);
|
||||
for (Hit hit : tmpArray) {
|
||||
hit =
|
||||
Dictionary.getSingleton()
|
||||
.matchWithHit(context.getSegmentBuff(), context.getCursor(), hit);
|
||||
if (hit.isMatch()) {
|
||||
// 输出当前的词
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
hit.getBegin(),
|
||||
context.getCursor() - hit.getBegin() + 1,
|
||||
Lexeme.TYPE_COUNT);
|
||||
context.addLexeme(newLexeme);
|
||||
|
||||
if (!hit.isPrefix()) { // 不是词前缀,hit不需要继续匹配,移除
|
||||
this.countHits.remove(hit);
|
||||
}
|
||||
|
||||
} else if (hit.isUnmatch()) {
|
||||
// hit不是词,移除
|
||||
this.countHits.remove(hit);
|
||||
}
|
||||
}
|
||||
}
|
||||
// 对当前指针位置的字符进行单字匹配
|
||||
Hit singleCharHit =
|
||||
Dictionary.getSingleton()
|
||||
.matchInQuantifierDict(context.getSegmentBuff(), context.getCursor(), 1);
|
||||
if (singleCharHit.isMatch()) { // 首字成量词词
|
||||
// 输出当前的词
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(context.getBufferOffset(), context.getCursor(), 1, Lexeme.TYPE_COUNT);
|
||||
context.addLexeme(newLexeme);
|
||||
// 同时也是词前缀
|
||||
if (singleCharHit.isPrefix()) {
|
||||
// 前缀匹配则放入hit列表
|
||||
this.countHits.add(singleCharHit);
|
||||
}
|
||||
} else if (singleCharHit.isPrefix()) { // 首字为量词前缀
|
||||
// 前缀匹配则放入hit列表
|
||||
this.countHits.add(singleCharHit);
|
||||
}
|
||||
} else {
|
||||
// 输入的不是中文字符
|
||||
// 清空未成形的量词
|
||||
this.countHits.clear();
|
||||
}
|
||||
// 缓冲区数据已经读完,还有尚未输出的量词
|
||||
if (context.isBufferConsumed()) {
|
||||
// 清空未成形的量词
|
||||
this.countHits.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否需要扫描量词
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private boolean needCountScan(AnalyzeContext context) {
|
||||
if ((nStart != -1 && nEnd != -1) || !countHits.isEmpty()) {
|
||||
// 正在处理中文数词,或者正在处理量词
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
// 找到一个相邻的数词
|
||||
if (!context.getOrgLexemes().isEmpty()) {
|
||||
Lexeme l = context.getOrgLexemes().peekLast();
|
||||
if (Lexeme.TYPE_CNUM == l.getLexemeType() || Lexeme.TYPE_ARABIC == l.getLexemeType()) {
|
||||
if (l.getBegin() + l.getLength() == context.getCursor()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* 添加数词词元到结果集
|
||||
*
|
||||
* @param context
|
||||
*/
|
||||
private void outputNumLexeme(AnalyzeContext context) {
|
||||
if (nStart > -1 && nEnd > -1) {
|
||||
// 输出数词
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(context.getBufferOffset(), nStart, nEnd - nStart + 1, Lexeme.TYPE_CNUM);
|
||||
context.addLexeme(newLexeme);
|
||||
/**
|
||||
* 添加数词词元到结果集
|
||||
*
|
||||
* @param context
|
||||
*/
|
||||
private void outputNumLexeme(AnalyzeContext context) {
|
||||
if (nStart > -1 && nEnd > -1) {
|
||||
// 输出数词
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(context.getBufferOffset(), nStart, nEnd - nStart + 1, Lexeme.TYPE_CNUM);
|
||||
context.addLexeme(newLexeme);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -21,74 +21,76 @@
|
||||
*/
|
||||
package com.rymcu.forest.lucene.core;
|
||||
|
||||
/** 字符集识别工具类 */
|
||||
/**
|
||||
* 字符集识别工具类
|
||||
*/
|
||||
class CharacterUtil {
|
||||
|
||||
public static final int CHAR_USELESS = 0;
|
||||
public static final int CHAR_USELESS = 0;
|
||||
|
||||
public static final int CHAR_ARABIC = 0X00000001;
|
||||
public static final int CHAR_ARABIC = 0X00000001;
|
||||
|
||||
public static final int CHAR_ENGLISH = 0X00000002;
|
||||
public static final int CHAR_ENGLISH = 0X00000002;
|
||||
|
||||
public static final int CHAR_CHINESE = 0X00000004;
|
||||
public static final int CHAR_CHINESE = 0X00000004;
|
||||
|
||||
public static final int CHAR_OTHER_CJK = 0X00000008;
|
||||
public static final int CHAR_OTHER_CJK = 0X00000008;
|
||||
|
||||
/**
|
||||
* 识别字符类型
|
||||
*
|
||||
* @param input
|
||||
* @return int CharacterUtil定义的字符类型常量
|
||||
*/
|
||||
static int identifyCharType(char input) {
|
||||
if (input >= '0' && input <= '9') {
|
||||
return CHAR_ARABIC;
|
||||
/**
|
||||
* 识别字符类型
|
||||
*
|
||||
* @param input
|
||||
* @return int CharacterUtil定义的字符类型常量
|
||||
*/
|
||||
static int identifyCharType(char input) {
|
||||
if (input >= '0' && input <= '9') {
|
||||
return CHAR_ARABIC;
|
||||
|
||||
} else if ((input >= 'a' && input <= 'z') || (input >= 'A' && input <= 'Z')) {
|
||||
return CHAR_ENGLISH;
|
||||
} else if ((input >= 'a' && input <= 'z') || (input >= 'A' && input <= 'Z')) {
|
||||
return CHAR_ENGLISH;
|
||||
|
||||
} else {
|
||||
Character.UnicodeBlock ub = Character.UnicodeBlock.of(input);
|
||||
} else {
|
||||
Character.UnicodeBlock ub = Character.UnicodeBlock.of(input);
|
||||
|
||||
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|
||||
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|
||||
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A) {
|
||||
// 目前已知的中文字符UTF-8集合
|
||||
return CHAR_CHINESE;
|
||||
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|
||||
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|
||||
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A) {
|
||||
// 目前已知的中文字符UTF-8集合
|
||||
return CHAR_CHINESE;
|
||||
|
||||
} else if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS // 全角数字字符和日韩字符
|
||||
// 韩文字符集
|
||||
|| ub == Character.UnicodeBlock.HANGUL_SYLLABLES
|
||||
|| ub == Character.UnicodeBlock.HANGUL_JAMO
|
||||
|| ub == Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO
|
||||
// 日文字符集
|
||||
|| ub == Character.UnicodeBlock.HIRAGANA // 平假名
|
||||
|| ub == Character.UnicodeBlock.KATAKANA // 片假名
|
||||
|| ub == Character.UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS) {
|
||||
return CHAR_OTHER_CJK;
|
||||
}
|
||||
}
|
||||
// 其他的不做处理的字符
|
||||
return CHAR_USELESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* 进行字符规格化(全角转半角,大写转小写处理)
|
||||
*
|
||||
* @param input
|
||||
* @return char
|
||||
*/
|
||||
static char regularize(char input) {
|
||||
if (input == 12288) {
|
||||
input = (char) 32;
|
||||
|
||||
} else if (input > 65280 && input < 65375) {
|
||||
input = (char) (input - 65248);
|
||||
|
||||
} else if (input >= 'A' && input <= 'Z') {
|
||||
input += 32;
|
||||
} else if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS // 全角数字字符和日韩字符
|
||||
// 韩文字符集
|
||||
|| ub == Character.UnicodeBlock.HANGUL_SYLLABLES
|
||||
|| ub == Character.UnicodeBlock.HANGUL_JAMO
|
||||
|| ub == Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO
|
||||
// 日文字符集
|
||||
|| ub == Character.UnicodeBlock.HIRAGANA // 平假名
|
||||
|| ub == Character.UnicodeBlock.KATAKANA // 片假名
|
||||
|| ub == Character.UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS) {
|
||||
return CHAR_OTHER_CJK;
|
||||
}
|
||||
}
|
||||
// 其他的不做处理的字符
|
||||
return CHAR_USELESS;
|
||||
}
|
||||
|
||||
return input;
|
||||
}
|
||||
/**
|
||||
* 进行字符规格化(全角转半角,大写转小写处理)
|
||||
*
|
||||
* @param input
|
||||
* @return char
|
||||
*/
|
||||
static char regularize(char input) {
|
||||
if (input == 12288) {
|
||||
input = (char) 32;
|
||||
|
||||
} else if (input > 65280 && input < 65375) {
|
||||
input = (char) (input - 65248);
|
||||
|
||||
} else if (input >= 'A' && input <= 'Z') {
|
||||
input += 32;
|
||||
}
|
||||
|
||||
return input;
|
||||
}
|
||||
}
|
||||
|
@ -22,123 +22,126 @@ package com.rymcu.forest.lucene.core;
|
||||
import java.util.Stack;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/** IK分词歧义裁决器 */
|
||||
/**
|
||||
* IK分词歧义裁决器
|
||||
*/
|
||||
class IKArbitrator {
|
||||
|
||||
IKArbitrator() {}
|
||||
IKArbitrator() {
|
||||
}
|
||||
|
||||
/**
|
||||
* 分词歧义处理
|
||||
*
|
||||
* @param context
|
||||
* @param useSmart
|
||||
*/
|
||||
void process(AnalyzeContext context, boolean useSmart) {
|
||||
QuickSortSet orgLexemes = context.getOrgLexemes();
|
||||
Lexeme orgLexeme = orgLexemes.pollFirst();
|
||||
/**
|
||||
* 分词歧义处理
|
||||
*
|
||||
* @param context
|
||||
* @param useSmart
|
||||
*/
|
||||
void process(AnalyzeContext context, boolean useSmart) {
|
||||
QuickSortSet orgLexemes = context.getOrgLexemes();
|
||||
Lexeme orgLexeme = orgLexemes.pollFirst();
|
||||
|
||||
LexemePath crossPath = new LexemePath();
|
||||
while (orgLexeme != null) {
|
||||
if (!crossPath.addCrossLexeme(orgLexeme)) {
|
||||
// 找到与crossPath不相交的下一个crossPath
|
||||
if (crossPath.size() == 1 || !useSmart) {
|
||||
// crossPath没有歧义 或者 不做歧义处理
|
||||
// 直接输出当前crossPath
|
||||
context.addLexemePath(crossPath);
|
||||
} else {
|
||||
// 对当前的crossPath进行歧义处理
|
||||
QuickSortSet.Cell headCell = crossPath.getHead();
|
||||
LexemePath judgeResult = this.judge(headCell, crossPath.getPathLength());
|
||||
// 输出歧义处理结果judgeResult
|
||||
context.addLexemePath(judgeResult);
|
||||
LexemePath crossPath = new LexemePath();
|
||||
while (orgLexeme != null) {
|
||||
if (!crossPath.addCrossLexeme(orgLexeme)) {
|
||||
// 找到与crossPath不相交的下一个crossPath
|
||||
if (crossPath.size() == 1 || !useSmart) {
|
||||
// crossPath没有歧义 或者 不做歧义处理
|
||||
// 直接输出当前crossPath
|
||||
context.addLexemePath(crossPath);
|
||||
} else {
|
||||
// 对当前的crossPath进行歧义处理
|
||||
QuickSortSet.Cell headCell = crossPath.getHead();
|
||||
LexemePath judgeResult = this.judge(headCell, crossPath.getPathLength());
|
||||
// 输出歧义处理结果judgeResult
|
||||
context.addLexemePath(judgeResult);
|
||||
}
|
||||
|
||||
// 把orgLexeme加入新的crossPath中
|
||||
crossPath = new LexemePath();
|
||||
crossPath.addCrossLexeme(orgLexeme);
|
||||
}
|
||||
orgLexeme = orgLexemes.pollFirst();
|
||||
}
|
||||
|
||||
// 把orgLexeme加入新的crossPath中
|
||||
crossPath = new LexemePath();
|
||||
crossPath.addCrossLexeme(orgLexeme);
|
||||
}
|
||||
orgLexeme = orgLexemes.pollFirst();
|
||||
// 处理最后的path
|
||||
if (crossPath.size() == 1 || !useSmart) {
|
||||
// crossPath没有歧义 或者 不做歧义处理
|
||||
// 直接输出当前crossPath
|
||||
context.addLexemePath(crossPath);
|
||||
} else {
|
||||
// 对当前的crossPath进行歧义处理
|
||||
QuickSortSet.Cell headCell = crossPath.getHead();
|
||||
LexemePath judgeResult = this.judge(headCell, crossPath.getPathLength());
|
||||
// 输出歧义处理结果judgeResult
|
||||
context.addLexemePath(judgeResult);
|
||||
}
|
||||
}
|
||||
|
||||
// 处理最后的path
|
||||
if (crossPath.size() == 1 || !useSmart) {
|
||||
// crossPath没有歧义 或者 不做歧义处理
|
||||
// 直接输出当前crossPath
|
||||
context.addLexemePath(crossPath);
|
||||
} else {
|
||||
// 对当前的crossPath进行歧义处理
|
||||
QuickSortSet.Cell headCell = crossPath.getHead();
|
||||
LexemePath judgeResult = this.judge(headCell, crossPath.getPathLength());
|
||||
// 输出歧义处理结果judgeResult
|
||||
context.addLexemePath(judgeResult);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* 歧义识别
|
||||
*
|
||||
* @param lexemeCell 歧义路径链表头
|
||||
* @param fullTextLength 歧义路径文本长度
|
||||
* @param fullTextLength 候选结果路径
|
||||
* @return
|
||||
*/
|
||||
private LexemePath judge(QuickSortSet.Cell lexemeCell, int fullTextLength) {
|
||||
// 候选路径集合
|
||||
TreeSet<LexemePath> pathOptions = new TreeSet<LexemePath>();
|
||||
// 候选结果路径
|
||||
LexemePath option = new LexemePath();
|
||||
|
||||
/**
|
||||
* 歧义识别
|
||||
*
|
||||
* @param lexemeCell 歧义路径链表头
|
||||
* @param fullTextLength 歧义路径文本长度
|
||||
* @param fullTextLength 候选结果路径
|
||||
* @return
|
||||
*/
|
||||
private LexemePath judge(QuickSortSet.Cell lexemeCell, int fullTextLength) {
|
||||
// 候选路径集合
|
||||
TreeSet<LexemePath> pathOptions = new TreeSet<LexemePath>();
|
||||
// 候选结果路径
|
||||
LexemePath option = new LexemePath();
|
||||
// 对crossPath进行一次遍历,同时返回本次遍历中有冲突的Lexeme栈
|
||||
Stack<QuickSortSet.Cell> lexemeStack = this.forwardPath(lexemeCell, option);
|
||||
|
||||
// 对crossPath进行一次遍历,同时返回本次遍历中有冲突的Lexeme栈
|
||||
Stack<QuickSortSet.Cell> lexemeStack = this.forwardPath(lexemeCell, option);
|
||||
// 当前词元链并非最理想的,加入候选路径集合
|
||||
pathOptions.add(option.copy());
|
||||
|
||||
// 当前词元链并非最理想的,加入候选路径集合
|
||||
pathOptions.add(option.copy());
|
||||
// 存在歧义词,处理
|
||||
QuickSortSet.Cell c = null;
|
||||
while (!lexemeStack.isEmpty()) {
|
||||
c = lexemeStack.pop();
|
||||
// 回滚词元链
|
||||
this.backPath(c.getLexeme(), option);
|
||||
// 从歧义词位置开始,递归,生成可选方案
|
||||
this.forwardPath(c, option);
|
||||
pathOptions.add(option.copy());
|
||||
}
|
||||
|
||||
// 存在歧义词,处理
|
||||
QuickSortSet.Cell c = null;
|
||||
while (!lexemeStack.isEmpty()) {
|
||||
c = lexemeStack.pop();
|
||||
// 回滚词元链
|
||||
this.backPath(c.getLexeme(), option);
|
||||
// 从歧义词位置开始,递归,生成可选方案
|
||||
this.forwardPath(c, option);
|
||||
pathOptions.add(option.copy());
|
||||
// 返回集合中的最优方案
|
||||
return pathOptions.first();
|
||||
}
|
||||
|
||||
// 返回集合中的最优方案
|
||||
return pathOptions.first();
|
||||
}
|
||||
|
||||
/**
|
||||
* 向前遍历,添加词元,构造一个无歧义词元组合
|
||||
*
|
||||
* @param option path
|
||||
* @return
|
||||
*/
|
||||
private Stack<QuickSortSet.Cell> forwardPath(QuickSortSet.Cell lexemeCell, LexemePath option) {
|
||||
// 发生冲突的Lexeme栈
|
||||
Stack<QuickSortSet.Cell> conflictStack = new Stack<QuickSortSet.Cell>();
|
||||
QuickSortSet.Cell c = lexemeCell;
|
||||
// 迭代遍历Lexeme链表
|
||||
while (c != null && c.getLexeme() != null) {
|
||||
if (!option.addNotCrossLexeme(c.getLexeme())) {
|
||||
// 词元交叉,添加失败则加入lexemeStack栈
|
||||
conflictStack.push(c);
|
||||
}
|
||||
c = c.getNext();
|
||||
/**
|
||||
* 向前遍历,添加词元,构造一个无歧义词元组合
|
||||
*
|
||||
* @param option path
|
||||
* @return
|
||||
*/
|
||||
private Stack<QuickSortSet.Cell> forwardPath(QuickSortSet.Cell lexemeCell, LexemePath option) {
|
||||
// 发生冲突的Lexeme栈
|
||||
Stack<QuickSortSet.Cell> conflictStack = new Stack<QuickSortSet.Cell>();
|
||||
QuickSortSet.Cell c = lexemeCell;
|
||||
// 迭代遍历Lexeme链表
|
||||
while (c != null && c.getLexeme() != null) {
|
||||
if (!option.addNotCrossLexeme(c.getLexeme())) {
|
||||
// 词元交叉,添加失败则加入lexemeStack栈
|
||||
conflictStack.push(c);
|
||||
}
|
||||
c = c.getNext();
|
||||
}
|
||||
return conflictStack;
|
||||
}
|
||||
return conflictStack;
|
||||
}
|
||||
|
||||
/**
|
||||
* 回滚词元链,直到它能够接受指定的词元
|
||||
*
|
||||
* @param option
|
||||
* @param l
|
||||
*/
|
||||
private void backPath(Lexeme l, LexemePath option) {
|
||||
while (option.checkCross(l)) {
|
||||
option.removeTail();
|
||||
/**
|
||||
* 回滚词元链,直到它能够接受指定的词元
|
||||
*
|
||||
* @param option
|
||||
* @param l
|
||||
*/
|
||||
private void backPath(Lexeme l, LexemePath option) {
|
||||
while (option.checkCross(l)) {
|
||||
option.removeTail();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -28,131 +28,145 @@ import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/** IK分词器主类 */
|
||||
/**
|
||||
* IK分词器主类
|
||||
*/
|
||||
public final class IKSegmenter {
|
||||
|
||||
/** 字符窜reader */
|
||||
private Reader input;
|
||||
/** 分词器配置项 */
|
||||
private Configuration cfg;
|
||||
/** 分词器上下文 */
|
||||
private AnalyzeContext context;
|
||||
/** 分词处理器列表 */
|
||||
private List<ISegmenter> segmenters;
|
||||
/** 分词歧义裁决器 */
|
||||
private IKArbitrator arbitrator;
|
||||
/**
|
||||
* 字符窜reader
|
||||
*/
|
||||
private Reader input;
|
||||
/**
|
||||
* 分词器配置项
|
||||
*/
|
||||
private Configuration cfg;
|
||||
/**
|
||||
* 分词器上下文
|
||||
*/
|
||||
private AnalyzeContext context;
|
||||
/**
|
||||
* 分词处理器列表
|
||||
*/
|
||||
private List<ISegmenter> segmenters;
|
||||
/**
|
||||
* 分词歧义裁决器
|
||||
*/
|
||||
private IKArbitrator arbitrator;
|
||||
|
||||
/**
|
||||
* IK分词器构造函数
|
||||
*
|
||||
* @param input
|
||||
* @param useSmart 为true,使用智能分词策略
|
||||
* <p>非智能分词:细粒度输出所有可能的切分结果 智能分词: 合并数词和量词,对分词结果进行歧义判断
|
||||
*/
|
||||
public IKSegmenter(Reader input, boolean useSmart) {
|
||||
this.input = input;
|
||||
this.cfg = DefaultConfig.getInstance();
|
||||
this.cfg.setUseSmart(useSmart);
|
||||
this.init();
|
||||
}
|
||||
/**
|
||||
* IK分词器构造函数
|
||||
*
|
||||
* @param input
|
||||
* @param useSmart 为true,使用智能分词策略
|
||||
* <p>非智能分词:细粒度输出所有可能的切分结果 智能分词: 合并数词和量词,对分词结果进行歧义判断
|
||||
*/
|
||||
public IKSegmenter(Reader input, boolean useSmart) {
|
||||
this.input = input;
|
||||
this.cfg = DefaultConfig.getInstance();
|
||||
this.cfg.setUseSmart(useSmart);
|
||||
this.init();
|
||||
}
|
||||
|
||||
/**
|
||||
* IK分词器构造函数
|
||||
*
|
||||
* @param input
|
||||
* @param cfg 使用自定义的Configuration构造分词器
|
||||
*/
|
||||
public IKSegmenter(Reader input, Configuration cfg) {
|
||||
this.input = input;
|
||||
this.cfg = cfg;
|
||||
this.init();
|
||||
}
|
||||
/**
|
||||
* IK分词器构造函数
|
||||
*
|
||||
* @param input
|
||||
* @param cfg 使用自定义的Configuration构造分词器
|
||||
*/
|
||||
public IKSegmenter(Reader input, Configuration cfg) {
|
||||
this.input = input;
|
||||
this.cfg = cfg;
|
||||
this.init();
|
||||
}
|
||||
|
||||
/** 初始化 */
|
||||
private void init() {
|
||||
// 初始化词典单例
|
||||
Dictionary.initial(this.cfg);
|
||||
// 初始化分词上下文
|
||||
this.context = new AnalyzeContext(this.cfg);
|
||||
// 加载子分词器
|
||||
this.segmenters = this.loadSegmenters();
|
||||
// 加载歧义裁决器
|
||||
this.arbitrator = new IKArbitrator();
|
||||
}
|
||||
/**
|
||||
* 初始化
|
||||
*/
|
||||
private void init() {
|
||||
// 初始化词典单例
|
||||
Dictionary.initial(this.cfg);
|
||||
// 初始化分词上下文
|
||||
this.context = new AnalyzeContext(this.cfg);
|
||||
// 加载子分词器
|
||||
this.segmenters = this.loadSegmenters();
|
||||
// 加载歧义裁决器
|
||||
this.arbitrator = new IKArbitrator();
|
||||
}
|
||||
|
||||
/**
|
||||
* 初始化词典,加载子分词器实现
|
||||
*
|
||||
* @return List<ISegmenter>
|
||||
*/
|
||||
private List<ISegmenter> loadSegmenters() {
|
||||
List<ISegmenter> segmenters = new ArrayList<ISegmenter>(4);
|
||||
// 处理字母的子分词器
|
||||
segmenters.add(new LetterSegmenter());
|
||||
// 处理中文数量词的子分词器
|
||||
segmenters.add(new CN_QuantifierSegmenter());
|
||||
// 处理中文词的子分词器
|
||||
segmenters.add(new CJKSegmenter());
|
||||
return segmenters;
|
||||
}
|
||||
/**
|
||||
* 初始化词典,加载子分词器实现
|
||||
*
|
||||
* @return List<ISegmenter>
|
||||
*/
|
||||
private List<ISegmenter> loadSegmenters() {
|
||||
List<ISegmenter> segmenters = new ArrayList<ISegmenter>(4);
|
||||
// 处理字母的子分词器
|
||||
segmenters.add(new LetterSegmenter());
|
||||
// 处理中文数量词的子分词器
|
||||
segmenters.add(new CN_QuantifierSegmenter());
|
||||
// 处理中文词的子分词器
|
||||
segmenters.add(new CJKSegmenter());
|
||||
return segmenters;
|
||||
}
|
||||
|
||||
/**
|
||||
* 分词,获取下一个词元
|
||||
*
|
||||
* @return Lexeme 词元对象
|
||||
* @throws IOException
|
||||
*/
|
||||
public synchronized Lexeme next() throws IOException {
|
||||
Lexeme l = null;
|
||||
while ((l = context.getNextLexeme()) == null) {
|
||||
/*
|
||||
* 从reader中读取数据,填充buffer 如果reader是分次读入buffer的,那么buffer要 进行移位处理 移位处理上次读入的但未处理的数据
|
||||
*/
|
||||
int available = context.fillBuffer(this.input);
|
||||
if (available <= 0) {
|
||||
// reader已经读完
|
||||
context.reset();
|
||||
return null;
|
||||
/**
|
||||
* 分词,获取下一个词元
|
||||
*
|
||||
* @return Lexeme 词元对象
|
||||
* @throws IOException
|
||||
*/
|
||||
public synchronized Lexeme next() throws IOException {
|
||||
Lexeme l = null;
|
||||
while ((l = context.getNextLexeme()) == null) {
|
||||
/*
|
||||
* 从reader中读取数据,填充buffer 如果reader是分次读入buffer的,那么buffer要 进行移位处理 移位处理上次读入的但未处理的数据
|
||||
*/
|
||||
int available = context.fillBuffer(this.input);
|
||||
if (available <= 0) {
|
||||
// reader已经读完
|
||||
context.reset();
|
||||
return null;
|
||||
|
||||
} else {
|
||||
// 初始化指针
|
||||
context.initCursor();
|
||||
do {
|
||||
// 遍历子分词器
|
||||
for (ISegmenter segmenter : segmenters) {
|
||||
segmenter.analyze(context);
|
||||
}
|
||||
// 字符缓冲区接近读完,需要读入新的字符
|
||||
if (context.needRefillBuffer()) {
|
||||
break;
|
||||
}
|
||||
// 向前移动指针
|
||||
} while (context.moveCursor());
|
||||
// 重置子分词器,为下轮循环进行初始化
|
||||
for (ISegmenter segmenter : segmenters) {
|
||||
segmenter.reset();
|
||||
} else {
|
||||
// 初始化指针
|
||||
context.initCursor();
|
||||
do {
|
||||
// 遍历子分词器
|
||||
for (ISegmenter segmenter : segmenters) {
|
||||
segmenter.analyze(context);
|
||||
}
|
||||
// 字符缓冲区接近读完,需要读入新的字符
|
||||
if (context.needRefillBuffer()) {
|
||||
break;
|
||||
}
|
||||
// 向前移动指针
|
||||
} while (context.moveCursor());
|
||||
// 重置子分词器,为下轮循环进行初始化
|
||||
for (ISegmenter segmenter : segmenters) {
|
||||
segmenter.reset();
|
||||
}
|
||||
}
|
||||
// 对分词进行歧义处理
|
||||
this.arbitrator.process(context, this.cfg.useSmart());
|
||||
// 将分词结果输出到结果集,并处理未切分的单个CJK字符
|
||||
context.outputToResult();
|
||||
// 记录本次分词的缓冲区位移
|
||||
context.markBufferOffset();
|
||||
}
|
||||
}
|
||||
// 对分词进行歧义处理
|
||||
this.arbitrator.process(context, this.cfg.useSmart());
|
||||
// 将分词结果输出到结果集,并处理未切分的单个CJK字符
|
||||
context.outputToResult();
|
||||
// 记录本次分词的缓冲区位移
|
||||
context.markBufferOffset();
|
||||
return l;
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
/**
|
||||
* 重置分词器到初始状态
|
||||
*
|
||||
* @param input
|
||||
*/
|
||||
public synchronized void reset(Reader input) {
|
||||
this.input = input;
|
||||
context.reset();
|
||||
for (ISegmenter segmenter : segmenters) {
|
||||
segmenter.reset();
|
||||
/**
|
||||
* 重置分词器到初始状态
|
||||
*
|
||||
* @param input
|
||||
*/
|
||||
public synchronized void reset(Reader input) {
|
||||
this.input = input;
|
||||
context.reset();
|
||||
for (ISegmenter segmenter : segmenters) {
|
||||
segmenter.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,44 +1,43 @@
|
||||
/**
|
||||
* IK 中文分词 版本 5.0
|
||||
* IK Analyzer release 5.0
|
||||
*
|
||||
* <p>
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* <p>
|
||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
*/
|
||||
package com.rymcu.forest.lucene.core;
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* 子分词器接口
|
||||
*/
|
||||
interface ISegmenter {
|
||||
|
||||
/**
|
||||
* 从分析器读取下一个可能分解的词元对象
|
||||
* @param context 分词算法上下文
|
||||
*/
|
||||
void analyze(AnalyzeContext context);
|
||||
/**
|
||||
* 从分析器读取下一个可能分解的词元对象
|
||||
* @param context 分词算法上下文
|
||||
*/
|
||||
void analyze(AnalyzeContext context);
|
||||
|
||||
/**
|
||||
* 重置子分析器状态
|
||||
*/
|
||||
void reset();
|
||||
/**
|
||||
* 重置子分析器状态
|
||||
*/
|
||||
void reset();
|
||||
|
||||
}
|
||||
|
@ -21,263 +21,283 @@ package com.rymcu.forest.lucene.core;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/** 英文字符及阿拉伯数字子分词器 */
|
||||
/**
|
||||
* 英文字符及阿拉伯数字子分词器
|
||||
*/
|
||||
class LetterSegmenter implements ISegmenter {
|
||||
|
||||
/** 子分词器标签 */
|
||||
static final String SEGMENTER_NAME = "LETTER_SEGMENTER";
|
||||
/** 链接符号 */
|
||||
private static final char[] Letter_Connector = new char[] {'#', '&', '+', '-', '.', '@', '_'};
|
||||
/** 数字符号 */
|
||||
private static final char[] Num_Connector = new char[] {',', '.'};
|
||||
/** 词元的开始位置, 同时作为子分词器状态标识 当start > -1 时,标识当前的分词器正在处理字符 */
|
||||
private int start;
|
||||
/** 记录词元结束位置 end记录的是在词元中最后一个出现的Letter但非Sign_Connector的字符的位置 */
|
||||
private int end;
|
||||
/** 字母起始位置 */
|
||||
private int englishStart;
|
||||
/** 字母结束位置 */
|
||||
private int englishEnd;
|
||||
/** 阿拉伯数字起始位置 */
|
||||
private int arabicStart;
|
||||
/** 阿拉伯数字结束位置 */
|
||||
private int arabicEnd;
|
||||
/**
|
||||
* 子分词器标签
|
||||
*/
|
||||
static final String SEGMENTER_NAME = "LETTER_SEGMENTER";
|
||||
/**
|
||||
* 链接符号
|
||||
*/
|
||||
private static final char[] Letter_Connector = new char[]{'#', '&', '+', '-', '.', '@', '_'};
|
||||
/**
|
||||
* 数字符号
|
||||
*/
|
||||
private static final char[] Num_Connector = new char[]{',', '.'};
|
||||
/**
|
||||
* 词元的开始位置, 同时作为子分词器状态标识 当start > -1 时,标识当前的分词器正在处理字符
|
||||
*/
|
||||
private int start;
|
||||
/**
|
||||
* 记录词元结束位置 end记录的是在词元中最后一个出现的Letter但非Sign_Connector的字符的位置
|
||||
*/
|
||||
private int end;
|
||||
/**
|
||||
* 字母起始位置
|
||||
*/
|
||||
private int englishStart;
|
||||
/**
|
||||
* 字母结束位置
|
||||
*/
|
||||
private int englishEnd;
|
||||
/**
|
||||
* 阿拉伯数字起始位置
|
||||
*/
|
||||
private int arabicStart;
|
||||
/**
|
||||
* 阿拉伯数字结束位置
|
||||
*/
|
||||
private int arabicEnd;
|
||||
|
||||
LetterSegmenter() {
|
||||
Arrays.sort(Letter_Connector);
|
||||
Arrays.sort(Num_Connector);
|
||||
this.start = -1;
|
||||
this.end = -1;
|
||||
this.englishStart = -1;
|
||||
this.englishEnd = -1;
|
||||
this.arabicStart = -1;
|
||||
this.arabicEnd = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void analyze(AnalyzeContext context) {
|
||||
boolean bufferLockFlag = false;
|
||||
// 处理英文字母
|
||||
bufferLockFlag = this.processEnglishLetter(context) || bufferLockFlag;
|
||||
// 处理阿拉伯字母
|
||||
bufferLockFlag = this.processArabicLetter(context) || bufferLockFlag;
|
||||
// 处理混合字母(这个要放最后处理,可以通过QuickSortSet排除重复)
|
||||
bufferLockFlag = this.processMixLetter(context) || bufferLockFlag;
|
||||
|
||||
// 判断是否锁定缓冲区
|
||||
if (bufferLockFlag) {
|
||||
context.lockBuffer(SEGMENTER_NAME);
|
||||
} else {
|
||||
// 对缓冲区解锁
|
||||
context.unlockBuffer(SEGMENTER_NAME);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
this.start = -1;
|
||||
this.end = -1;
|
||||
this.englishStart = -1;
|
||||
this.englishEnd = -1;
|
||||
this.arabicStart = -1;
|
||||
this.arabicEnd = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理数字字母混合输出 如:windos2000 | linliangyi2005@gmail.com
|
||||
*
|
||||
* @param context
|
||||
* @return
|
||||
*/
|
||||
private boolean processMixLetter(AnalyzeContext context) {
|
||||
boolean needLock = false;
|
||||
|
||||
if (this.start == -1) { // 当前的分词器尚未开始处理字符
|
||||
if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()
|
||||
|| CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) {
|
||||
// 记录起始指针的位置,标明分词器进入处理状态
|
||||
this.start = context.getCursor();
|
||||
this.end = start;
|
||||
}
|
||||
|
||||
} else { // 当前的分词器正在处理字符
|
||||
if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()
|
||||
|| CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) {
|
||||
// 记录下可能的结束位置
|
||||
this.end = context.getCursor();
|
||||
|
||||
} else if (CharacterUtil.CHAR_USELESS == context.getCurrentCharType()
|
||||
&& this.isLetterConnector(context.getCurrentChar())) {
|
||||
// 记录下可能的结束位置
|
||||
this.end = context.getCursor();
|
||||
} else {
|
||||
// 遇到非Letter字符,输出词元
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
this.start,
|
||||
this.end - this.start + 1,
|
||||
Lexeme.TYPE_LETTER);
|
||||
context.addLexeme(newLexeme);
|
||||
LetterSegmenter() {
|
||||
Arrays.sort(Letter_Connector);
|
||||
Arrays.sort(Num_Connector);
|
||||
this.start = -1;
|
||||
this.end = -1;
|
||||
}
|
||||
this.englishStart = -1;
|
||||
this.englishEnd = -1;
|
||||
this.arabicStart = -1;
|
||||
this.arabicEnd = -1;
|
||||
}
|
||||
|
||||
// 判断缓冲区是否已经读完
|
||||
if (context.isBufferConsumed()) {
|
||||
if (this.start != -1 && this.end != -1) {
|
||||
// 缓冲以读完,输出词元
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
this.start,
|
||||
this.end - this.start + 1,
|
||||
Lexeme.TYPE_LETTER);
|
||||
context.addLexeme(newLexeme);
|
||||
@Override
|
||||
public void analyze(AnalyzeContext context) {
|
||||
boolean bufferLockFlag = false;
|
||||
// 处理英文字母
|
||||
bufferLockFlag = this.processEnglishLetter(context) || bufferLockFlag;
|
||||
// 处理阿拉伯字母
|
||||
bufferLockFlag = this.processArabicLetter(context) || bufferLockFlag;
|
||||
// 处理混合字母(这个要放最后处理,可以通过QuickSortSet排除重复)
|
||||
bufferLockFlag = this.processMixLetter(context) || bufferLockFlag;
|
||||
|
||||
// 判断是否锁定缓冲区
|
||||
if (bufferLockFlag) {
|
||||
context.lockBuffer(SEGMENTER_NAME);
|
||||
} else {
|
||||
// 对缓冲区解锁
|
||||
context.unlockBuffer(SEGMENTER_NAME);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
this.start = -1;
|
||||
this.end = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// 判断是否锁定缓冲区
|
||||
// 对缓冲区解锁
|
||||
needLock = this.start != -1 || this.end != -1;
|
||||
return needLock;
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理纯英文字母输出
|
||||
*
|
||||
* @param context
|
||||
* @return
|
||||
*/
|
||||
private boolean processEnglishLetter(AnalyzeContext context) {
|
||||
boolean needLock = false;
|
||||
|
||||
if (this.englishStart == -1) { // 当前的分词器尚未开始处理英文字符
|
||||
if (CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) {
|
||||
// 记录起始指针的位置,标明分词器进入处理状态
|
||||
this.englishStart = context.getCursor();
|
||||
this.englishEnd = this.englishStart;
|
||||
}
|
||||
} else { // 当前的分词器正在处理英文字符
|
||||
if (CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) {
|
||||
// 记录当前指针位置为结束位置
|
||||
this.englishEnd = context.getCursor();
|
||||
} else {
|
||||
// 遇到非English字符,输出词元
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
this.englishStart,
|
||||
this.englishEnd - this.englishStart + 1,
|
||||
Lexeme.TYPE_ENGLISH);
|
||||
context.addLexeme(newLexeme);
|
||||
this.englishStart = -1;
|
||||
this.englishEnd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// 判断缓冲区是否已经读完
|
||||
if (context.isBufferConsumed()) {
|
||||
if (this.englishStart != -1 && this.englishEnd != -1) {
|
||||
// 缓冲以读完,输出词元
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
this.englishStart,
|
||||
this.englishEnd - this.englishStart + 1,
|
||||
Lexeme.TYPE_ENGLISH);
|
||||
context.addLexeme(newLexeme);
|
||||
this.englishStart = -1;
|
||||
this.englishEnd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// 判断是否锁定缓冲区
|
||||
// 对缓冲区解锁
|
||||
needLock = this.englishStart != -1 || this.englishEnd != -1;
|
||||
return needLock;
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理阿拉伯数字输出
|
||||
*
|
||||
* @param context
|
||||
* @return
|
||||
*/
|
||||
private boolean processArabicLetter(AnalyzeContext context) {
|
||||
boolean needLock = false;
|
||||
|
||||
if (this.arabicStart == -1) { // 当前的分词器尚未开始处理数字字符
|
||||
if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()) {
|
||||
// 记录起始指针的位置,标明分词器进入处理状态
|
||||
this.arabicStart = context.getCursor();
|
||||
this.arabicEnd = this.arabicStart;
|
||||
}
|
||||
} else { // 当前的分词器正在处理数字字符
|
||||
if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()) {
|
||||
// 记录当前指针位置为结束位置
|
||||
this.arabicEnd = context.getCursor();
|
||||
} else if (CharacterUtil.CHAR_USELESS == context.getCurrentCharType()
|
||||
&& this.isNumConnector(context.getCurrentChar())) {
|
||||
// 不输出数字,但不标记结束
|
||||
} else {
|
||||
// //遇到非Arabic字符,输出词元
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
this.arabicStart,
|
||||
this.arabicEnd - this.arabicStart + 1,
|
||||
Lexeme.TYPE_ARABIC);
|
||||
context.addLexeme(newLexeme);
|
||||
this.arabicStart = -1;
|
||||
this.arabicEnd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// 判断缓冲区是否已经读完
|
||||
if (context.isBufferConsumed()) {
|
||||
if (this.arabicStart != -1 && this.arabicEnd != -1) {
|
||||
// 生成已切分的词元
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
this.arabicStart,
|
||||
this.arabicEnd - this.arabicStart + 1,
|
||||
Lexeme.TYPE_ARABIC);
|
||||
context.addLexeme(newLexeme);
|
||||
this.arabicStart = -1;
|
||||
this.arabicEnd = -1;
|
||||
}
|
||||
/**
|
||||
* 处理数字字母混合输出 如:windos2000 | linliangyi2005@gmail.com
|
||||
*
|
||||
* @param context
|
||||
* @return
|
||||
*/
|
||||
private boolean processMixLetter(AnalyzeContext context) {
|
||||
boolean needLock = false;
|
||||
|
||||
if (this.start == -1) { // 当前的分词器尚未开始处理字符
|
||||
if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()
|
||||
|| CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) {
|
||||
// 记录起始指针的位置,标明分词器进入处理状态
|
||||
this.start = context.getCursor();
|
||||
this.end = start;
|
||||
}
|
||||
|
||||
} else { // 当前的分词器正在处理字符
|
||||
if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()
|
||||
|| CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) {
|
||||
// 记录下可能的结束位置
|
||||
this.end = context.getCursor();
|
||||
|
||||
} else if (CharacterUtil.CHAR_USELESS == context.getCurrentCharType()
|
||||
&& this.isLetterConnector(context.getCurrentChar())) {
|
||||
// 记录下可能的结束位置
|
||||
this.end = context.getCursor();
|
||||
} else {
|
||||
// 遇到非Letter字符,输出词元
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
this.start,
|
||||
this.end - this.start + 1,
|
||||
Lexeme.TYPE_LETTER);
|
||||
context.addLexeme(newLexeme);
|
||||
this.start = -1;
|
||||
this.end = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// 判断缓冲区是否已经读完
|
||||
if (context.isBufferConsumed()) {
|
||||
if (this.start != -1 && this.end != -1) {
|
||||
// 缓冲以读完,输出词元
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
this.start,
|
||||
this.end - this.start + 1,
|
||||
Lexeme.TYPE_LETTER);
|
||||
context.addLexeme(newLexeme);
|
||||
this.start = -1;
|
||||
this.end = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// 判断是否锁定缓冲区
|
||||
// 对缓冲区解锁
|
||||
needLock = this.start != -1 || this.end != -1;
|
||||
return needLock;
|
||||
}
|
||||
|
||||
// 判断是否锁定缓冲区
|
||||
// 对缓冲区解锁
|
||||
needLock = this.arabicStart != -1 || this.arabicEnd != -1;
|
||||
return needLock;
|
||||
}
|
||||
/**
|
||||
* 处理纯英文字母输出
|
||||
*
|
||||
* @param context
|
||||
* @return
|
||||
*/
|
||||
private boolean processEnglishLetter(AnalyzeContext context) {
|
||||
boolean needLock = false;
|
||||
|
||||
/**
|
||||
* 判断是否是字母连接符号
|
||||
*
|
||||
* @param input
|
||||
* @return
|
||||
*/
|
||||
private boolean isLetterConnector(char input) {
|
||||
int index = Arrays.binarySearch(Letter_Connector, input);
|
||||
return index >= 0;
|
||||
}
|
||||
if (this.englishStart == -1) { // 当前的分词器尚未开始处理英文字符
|
||||
if (CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) {
|
||||
// 记录起始指针的位置,标明分词器进入处理状态
|
||||
this.englishStart = context.getCursor();
|
||||
this.englishEnd = this.englishStart;
|
||||
}
|
||||
} else { // 当前的分词器正在处理英文字符
|
||||
if (CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) {
|
||||
// 记录当前指针位置为结束位置
|
||||
this.englishEnd = context.getCursor();
|
||||
} else {
|
||||
// 遇到非English字符,输出词元
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
this.englishStart,
|
||||
this.englishEnd - this.englishStart + 1,
|
||||
Lexeme.TYPE_ENGLISH);
|
||||
context.addLexeme(newLexeme);
|
||||
this.englishStart = -1;
|
||||
this.englishEnd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否是数字连接符号
|
||||
*
|
||||
* @param input
|
||||
* @return
|
||||
*/
|
||||
private boolean isNumConnector(char input) {
|
||||
int index = Arrays.binarySearch(Num_Connector, input);
|
||||
return index >= 0;
|
||||
}
|
||||
// 判断缓冲区是否已经读完
|
||||
if (context.isBufferConsumed()) {
|
||||
if (this.englishStart != -1 && this.englishEnd != -1) {
|
||||
// 缓冲以读完,输出词元
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
this.englishStart,
|
||||
this.englishEnd - this.englishStart + 1,
|
||||
Lexeme.TYPE_ENGLISH);
|
||||
context.addLexeme(newLexeme);
|
||||
this.englishStart = -1;
|
||||
this.englishEnd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// 判断是否锁定缓冲区
|
||||
// 对缓冲区解锁
|
||||
needLock = this.englishStart != -1 || this.englishEnd != -1;
|
||||
return needLock;
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理阿拉伯数字输出
|
||||
*
|
||||
* @param context
|
||||
* @return
|
||||
*/
|
||||
private boolean processArabicLetter(AnalyzeContext context) {
|
||||
boolean needLock = false;
|
||||
|
||||
if (this.arabicStart == -1) { // 当前的分词器尚未开始处理数字字符
|
||||
if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()) {
|
||||
// 记录起始指针的位置,标明分词器进入处理状态
|
||||
this.arabicStart = context.getCursor();
|
||||
this.arabicEnd = this.arabicStart;
|
||||
}
|
||||
} else { // 当前的分词器正在处理数字字符
|
||||
if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()) {
|
||||
// 记录当前指针位置为结束位置
|
||||
this.arabicEnd = context.getCursor();
|
||||
} else if (CharacterUtil.CHAR_USELESS == context.getCurrentCharType()
|
||||
&& this.isNumConnector(context.getCurrentChar())) {
|
||||
// 不输出数字,但不标记结束
|
||||
} else {
|
||||
// //遇到非Arabic字符,输出词元
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
this.arabicStart,
|
||||
this.arabicEnd - this.arabicStart + 1,
|
||||
Lexeme.TYPE_ARABIC);
|
||||
context.addLexeme(newLexeme);
|
||||
this.arabicStart = -1;
|
||||
this.arabicEnd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// 判断缓冲区是否已经读完
|
||||
if (context.isBufferConsumed()) {
|
||||
if (this.arabicStart != -1 && this.arabicEnd != -1) {
|
||||
// 生成已切分的词元
|
||||
Lexeme newLexeme =
|
||||
new Lexeme(
|
||||
context.getBufferOffset(),
|
||||
this.arabicStart,
|
||||
this.arabicEnd - this.arabicStart + 1,
|
||||
Lexeme.TYPE_ARABIC);
|
||||
context.addLexeme(newLexeme);
|
||||
this.arabicStart = -1;
|
||||
this.arabicEnd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// 判断是否锁定缓冲区
|
||||
// 对缓冲区解锁
|
||||
needLock = this.arabicStart != -1 || this.arabicEnd != -1;
|
||||
return needLock;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否是字母连接符号
|
||||
*
|
||||
* @param input
|
||||
* @return
|
||||
*/
|
||||
private boolean isLetterConnector(char input) {
|
||||
int index = Arrays.binarySearch(Letter_Connector, input);
|
||||
return index >= 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否是数字连接符号
|
||||
*
|
||||
* @param input
|
||||
* @return
|
||||
*/
|
||||
private boolean isNumConnector(char input) {
|
||||
int index = Arrays.binarySearch(Num_Connector, input);
|
||||
return index >= 0;
|
||||
}
|
||||
}
|
||||
|
@ -1,26 +1,25 @@
|
||||
/**
|
||||
* IK 中文分词 版本 5.0
|
||||
* IK Analyzer release 5.0
|
||||
*
|
||||
* <p>
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* <p>
|
||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
*/
|
||||
package com.rymcu.forest.lucene.core;
|
||||
|
||||
@ -28,252 +27,252 @@ package com.rymcu.forest.lucene.core;
|
||||
* IK词元对象
|
||||
*/
|
||||
public class Lexeme implements Comparable<Lexeme> {
|
||||
// lexemeType常量
|
||||
// 未知
|
||||
public static final int TYPE_UNKNOWN = 0;
|
||||
// 英文
|
||||
public static final int TYPE_ENGLISH = 1;
|
||||
// 数字
|
||||
public static final int TYPE_ARABIC = 2;
|
||||
// 英文数字混合
|
||||
public static final int TYPE_LETTER = 3;
|
||||
// 中文词元
|
||||
public static final int TYPE_CNWORD = 4;
|
||||
// 中文单字
|
||||
public static final int TYPE_CNCHAR = 64;
|
||||
// 日韩文字
|
||||
public static final int TYPE_OTHER_CJK = 8;
|
||||
// 中文数词
|
||||
public static final int TYPE_CNUM = 16;
|
||||
// 中文量词
|
||||
public static final int TYPE_COUNT = 32;
|
||||
// 中文数量词
|
||||
public static final int TYPE_CQUAN = 48;
|
||||
// lexemeType常量
|
||||
// 未知
|
||||
public static final int TYPE_UNKNOWN = 0;
|
||||
// 英文
|
||||
public static final int TYPE_ENGLISH = 1;
|
||||
// 数字
|
||||
public static final int TYPE_ARABIC = 2;
|
||||
// 英文数字混合
|
||||
public static final int TYPE_LETTER = 3;
|
||||
// 中文词元
|
||||
public static final int TYPE_CNWORD = 4;
|
||||
// 中文单字
|
||||
public static final int TYPE_CNCHAR = 64;
|
||||
// 日韩文字
|
||||
public static final int TYPE_OTHER_CJK = 8;
|
||||
// 中文数词
|
||||
public static final int TYPE_CNUM = 16;
|
||||
// 中文量词
|
||||
public static final int TYPE_COUNT = 32;
|
||||
// 中文数量词
|
||||
public static final int TYPE_CQUAN = 48;
|
||||
|
||||
// 词元的起始位移
|
||||
private int offset;
|
||||
// 词元的相对起始位置
|
||||
private int begin;
|
||||
// 词元的长度
|
||||
private int length;
|
||||
// 词元文本
|
||||
private String lexemeText;
|
||||
// 词元类型
|
||||
private int lexemeType;
|
||||
// 词元的起始位移
|
||||
private int offset;
|
||||
// 词元的相对起始位置
|
||||
private int begin;
|
||||
// 词元的长度
|
||||
private int length;
|
||||
// 词元文本
|
||||
private String lexemeText;
|
||||
// 词元类型
|
||||
private int lexemeType;
|
||||
|
||||
public Lexeme(int offset, int begin, int length, int lexemeType) {
|
||||
this.offset = offset;
|
||||
this.begin = begin;
|
||||
if (length < 0) {
|
||||
throw new IllegalArgumentException("length < 0");
|
||||
}
|
||||
this.length = length;
|
||||
this.lexemeType = lexemeType;
|
||||
}
|
||||
|
||||
/*
|
||||
* 判断词元相等算法 起始位置偏移、起始位置、终止位置相同
|
||||
* @see java.lang.Object#equals(Object o)
|
||||
*/
|
||||
public boolean equals(Object o) {
|
||||
if (o == null) {
|
||||
return false;
|
||||
public Lexeme(int offset, int begin, int length, int lexemeType) {
|
||||
this.offset = offset;
|
||||
this.begin = begin;
|
||||
if (length < 0) {
|
||||
throw new IllegalArgumentException("length < 0");
|
||||
}
|
||||
this.length = length;
|
||||
this.lexemeType = lexemeType;
|
||||
}
|
||||
|
||||
if (this == o) {
|
||||
return true;
|
||||
/*
|
||||
* 判断词元相等算法 起始位置偏移、起始位置、终止位置相同
|
||||
* @see java.lang.Object#equals(Object o)
|
||||
*/
|
||||
public boolean equals(Object o) {
|
||||
if (o == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (o instanceof Lexeme) {
|
||||
Lexeme other = (Lexeme) o;
|
||||
if (this.offset == other.getOffset() && this.begin == other.getBegin()
|
||||
&& this.length == other.getLength()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (o instanceof Lexeme) {
|
||||
Lexeme other = (Lexeme) o;
|
||||
if (this.offset == other.getOffset() && this.begin == other.getBegin()
|
||||
&& this.length == other.getLength()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
/*
|
||||
* 词元哈希编码算法
|
||||
* @see java.lang.Object#hashCode()
|
||||
*/
|
||||
public int hashCode() {
|
||||
int absBegin = getBeginPosition();
|
||||
int absEnd = getEndPosition();
|
||||
return (absBegin * 37) + (absEnd * 31) + ((absBegin * absEnd) % getLength()) * 11;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* 词元哈希编码算法
|
||||
* @see java.lang.Object#hashCode()
|
||||
*/
|
||||
public int hashCode() {
|
||||
int absBegin = getBeginPosition();
|
||||
int absEnd = getEndPosition();
|
||||
return (absBegin * 37) + (absEnd * 31) + ((absBegin * absEnd) % getLength()) * 11;
|
||||
}
|
||||
/*
|
||||
* 词元在排序集合中的比较算法
|
||||
* @see java.lang.Comparable#compareTo(java.lang.Object)
|
||||
*/
|
||||
public int compareTo(Lexeme other) {
|
||||
// 起始位置优先
|
||||
if (this.begin < other.getBegin()) {
|
||||
return -1;
|
||||
} else if (this.begin == other.getBegin()) {
|
||||
// 词元长度优先
|
||||
if (this.length > other.getLength()) {
|
||||
return -1;
|
||||
} else if (this.length == other.getLength()) {
|
||||
return 0;
|
||||
} else {// this.length < other.getLength()
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* 词元在排序集合中的比较算法
|
||||
* @see java.lang.Comparable#compareTo(java.lang.Object)
|
||||
*/
|
||||
public int compareTo(Lexeme other) {
|
||||
// 起始位置优先
|
||||
if (this.begin < other.getBegin()) {
|
||||
return -1;
|
||||
} else if (this.begin == other.getBegin()) {
|
||||
// 词元长度优先
|
||||
if (this.length > other.getLength()) {
|
||||
return -1;
|
||||
} else if (this.length == other.getLength()) {
|
||||
return 0;
|
||||
} else {// this.length < other.getLength()
|
||||
return 1;
|
||||
}
|
||||
|
||||
} else {// this.begin > other.getBegin()
|
||||
return 1;
|
||||
} else {// this.begin > other.getBegin()
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int getOffset() {
|
||||
return offset;
|
||||
}
|
||||
|
||||
public void setOffset(int offset) {
|
||||
this.offset = offset;
|
||||
}
|
||||
|
||||
public int getBegin() {
|
||||
return begin;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词元在文本中的起始位置
|
||||
* @return int
|
||||
*/
|
||||
public int getBeginPosition() {
|
||||
return offset + begin;
|
||||
}
|
||||
|
||||
public void setBegin(int begin) {
|
||||
this.begin = begin;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词元在文本中的结束位置
|
||||
* @return int
|
||||
*/
|
||||
public int getEndPosition() {
|
||||
return offset + begin + length;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词元的字符长度
|
||||
* @return int
|
||||
*/
|
||||
public int getLength() {
|
||||
return this.length;
|
||||
}
|
||||
|
||||
public void setLength(int length) {
|
||||
if (this.length < 0) {
|
||||
throw new IllegalArgumentException("length < 0");
|
||||
public int getOffset() {
|
||||
return offset;
|
||||
}
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词元的文本内容
|
||||
* @return String
|
||||
*/
|
||||
public String getLexemeText() {
|
||||
if (lexemeText == null) {
|
||||
return "";
|
||||
public void setOffset(int offset) {
|
||||
this.offset = offset;
|
||||
}
|
||||
return lexemeText;
|
||||
}
|
||||
|
||||
public void setLexemeText(String lexemeText) {
|
||||
if (lexemeText == null) {
|
||||
this.lexemeText = "";
|
||||
this.length = 0;
|
||||
} else {
|
||||
this.lexemeText = lexemeText;
|
||||
this.length = lexemeText.length();
|
||||
public int getBegin() {
|
||||
return begin;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词元类型
|
||||
* @return int
|
||||
*/
|
||||
public int getLexemeType() {
|
||||
return lexemeType;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词元类型标示字符串
|
||||
* @return String
|
||||
*/
|
||||
public String getLexemeTypeString() {
|
||||
switch (lexemeType) {
|
||||
|
||||
case TYPE_ENGLISH:
|
||||
return "ENGLISH";
|
||||
|
||||
case TYPE_ARABIC:
|
||||
return "ARABIC";
|
||||
|
||||
case TYPE_LETTER:
|
||||
return "LETTER";
|
||||
|
||||
case TYPE_CNWORD:
|
||||
return "CN_WORD";
|
||||
|
||||
case TYPE_CNCHAR:
|
||||
return "CN_CHAR";
|
||||
|
||||
case TYPE_OTHER_CJK:
|
||||
return "OTHER_CJK";
|
||||
|
||||
case TYPE_COUNT:
|
||||
return "COUNT";
|
||||
|
||||
case TYPE_CNUM:
|
||||
return "TYPE_CNUM";
|
||||
|
||||
case TYPE_CQUAN:
|
||||
return "TYPE_CQUAN";
|
||||
|
||||
default:
|
||||
return "UNKONW";
|
||||
/**
|
||||
* 获取词元在文本中的起始位置
|
||||
* @return int
|
||||
*/
|
||||
public int getBeginPosition() {
|
||||
return offset + begin;
|
||||
}
|
||||
}
|
||||
|
||||
public void setLexemeType(int lexemeType) {
|
||||
this.lexemeType = lexemeType;
|
||||
}
|
||||
|
||||
/**
|
||||
* 合并两个相邻的词元
|
||||
* @param l
|
||||
* @param lexemeType
|
||||
* @return boolean 词元是否成功合并
|
||||
*/
|
||||
public boolean append(Lexeme l, int lexemeType) {
|
||||
if (l != null && this.getEndPosition() == l.getBeginPosition()) {
|
||||
this.length += l.getLength();
|
||||
this.lexemeType = lexemeType;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
public void setBegin(int begin) {
|
||||
this.begin = begin;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public String toString() {
|
||||
StringBuffer strbuf = new StringBuffer();
|
||||
strbuf.append(this.getBeginPosition()).append("-").append(this.getEndPosition());
|
||||
strbuf.append(" : ").append(this.lexemeText).append(" : \t");
|
||||
strbuf.append(this.getLexemeTypeString());
|
||||
return strbuf.toString();
|
||||
}
|
||||
/**
|
||||
* 获取词元在文本中的结束位置
|
||||
* @return int
|
||||
*/
|
||||
public int getEndPosition() {
|
||||
return offset + begin + length;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词元的字符长度
|
||||
* @return int
|
||||
*/
|
||||
public int getLength() {
|
||||
return this.length;
|
||||
}
|
||||
|
||||
public void setLength(int length) {
|
||||
if (this.length < 0) {
|
||||
throw new IllegalArgumentException("length < 0");
|
||||
}
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词元的文本内容
|
||||
* @return String
|
||||
*/
|
||||
public String getLexemeText() {
|
||||
if (lexemeText == null) {
|
||||
return "";
|
||||
}
|
||||
return lexemeText;
|
||||
}
|
||||
|
||||
public void setLexemeText(String lexemeText) {
|
||||
if (lexemeText == null) {
|
||||
this.lexemeText = "";
|
||||
this.length = 0;
|
||||
} else {
|
||||
this.lexemeText = lexemeText;
|
||||
this.length = lexemeText.length();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词元类型
|
||||
* @return int
|
||||
*/
|
||||
public int getLexemeType() {
|
||||
return lexemeType;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词元类型标示字符串
|
||||
* @return String
|
||||
*/
|
||||
public String getLexemeTypeString() {
|
||||
switch (lexemeType) {
|
||||
|
||||
case TYPE_ENGLISH:
|
||||
return "ENGLISH";
|
||||
|
||||
case TYPE_ARABIC:
|
||||
return "ARABIC";
|
||||
|
||||
case TYPE_LETTER:
|
||||
return "LETTER";
|
||||
|
||||
case TYPE_CNWORD:
|
||||
return "CN_WORD";
|
||||
|
||||
case TYPE_CNCHAR:
|
||||
return "CN_CHAR";
|
||||
|
||||
case TYPE_OTHER_CJK:
|
||||
return "OTHER_CJK";
|
||||
|
||||
case TYPE_COUNT:
|
||||
return "COUNT";
|
||||
|
||||
case TYPE_CNUM:
|
||||
return "TYPE_CNUM";
|
||||
|
||||
case TYPE_CQUAN:
|
||||
return "TYPE_CQUAN";
|
||||
|
||||
default:
|
||||
return "UNKONW";
|
||||
}
|
||||
}
|
||||
|
||||
public void setLexemeType(int lexemeType) {
|
||||
this.lexemeType = lexemeType;
|
||||
}
|
||||
|
||||
/**
|
||||
* 合并两个相邻的词元
|
||||
* @param l
|
||||
* @param lexemeType
|
||||
* @return boolean 词元是否成功合并
|
||||
*/
|
||||
public boolean append(Lexeme l, int lexemeType) {
|
||||
if (l != null && this.getEndPosition() == l.getBeginPosition()) {
|
||||
this.length += l.getLength();
|
||||
this.lexemeType = lexemeType;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public String toString() {
|
||||
StringBuffer strbuf = new StringBuffer();
|
||||
strbuf.append(this.getBeginPosition()).append("-").append(this.getEndPosition());
|
||||
strbuf.append(" : ").append(this.lexemeText).append(" : \t");
|
||||
strbuf.append(this.getLexemeTypeString());
|
||||
return strbuf.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,26 +1,25 @@
|
||||
/**
|
||||
* IK 中文分词 版本 5.0
|
||||
* IK Analyzer release 5.0
|
||||
*
|
||||
* <p>
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* <p>
|
||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
*/
|
||||
package com.rymcu.forest.lucene.core;
|
||||
|
||||
@ -29,227 +28,227 @@ package com.rymcu.forest.lucene.core;
|
||||
*/
|
||||
class LexemePath extends QuickSortSet implements Comparable<LexemePath> {
|
||||
|
||||
// 起始位置
|
||||
private int pathBegin;
|
||||
// 结束
|
||||
private int pathEnd;
|
||||
// 词元链的有效字符长度
|
||||
private int payloadLength;
|
||||
|
||||
LexemePath() {
|
||||
this.pathBegin = -1;
|
||||
this.pathEnd = -1;
|
||||
this.payloadLength = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 向LexemePath追加相交的Lexeme
|
||||
* @param lexeme
|
||||
* @return
|
||||
*/
|
||||
boolean addCrossLexeme(Lexeme lexeme) {
|
||||
if (this.isEmpty()) {
|
||||
this.addLexeme(lexeme);
|
||||
this.pathBegin = lexeme.getBegin();
|
||||
this.pathEnd = lexeme.getBegin() + lexeme.getLength();
|
||||
this.payloadLength += lexeme.getLength();
|
||||
return true;
|
||||
|
||||
} else if (this.checkCross(lexeme)) {
|
||||
this.addLexeme(lexeme);
|
||||
if (lexeme.getBegin() + lexeme.getLength() > this.pathEnd) {
|
||||
this.pathEnd = lexeme.getBegin() + lexeme.getLength();
|
||||
}
|
||||
this.payloadLength = this.pathEnd - this.pathBegin;
|
||||
return true;
|
||||
|
||||
} else {
|
||||
return false;
|
||||
// 起始位置
|
||||
private int pathBegin;
|
||||
// 结束
|
||||
private int pathEnd;
|
||||
// 词元链的有效字符长度
|
||||
private int payloadLength;
|
||||
|
||||
LexemePath() {
|
||||
this.pathBegin = -1;
|
||||
this.pathEnd = -1;
|
||||
this.payloadLength = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 向LexemePath追加不相交的Lexeme
|
||||
* @param lexeme
|
||||
* @return
|
||||
*/
|
||||
boolean addNotCrossLexeme(Lexeme lexeme) {
|
||||
if (this.isEmpty()) {
|
||||
this.addLexeme(lexeme);
|
||||
this.pathBegin = lexeme.getBegin();
|
||||
this.pathEnd = lexeme.getBegin() + lexeme.getLength();
|
||||
this.payloadLength += lexeme.getLength();
|
||||
return true;
|
||||
|
||||
} else if (this.checkCross(lexeme)) {
|
||||
return false;
|
||||
|
||||
} else {
|
||||
this.addLexeme(lexeme);
|
||||
this.payloadLength += lexeme.getLength();
|
||||
Lexeme head = this.peekFirst();
|
||||
this.pathBegin = head.getBegin();
|
||||
Lexeme tail = this.peekLast();
|
||||
this.pathEnd = tail.getBegin() + tail.getLength();
|
||||
return true;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 移除尾部的Lexeme
|
||||
* @return
|
||||
*/
|
||||
Lexeme removeTail() {
|
||||
Lexeme tail = this.pollLast();
|
||||
if (this.isEmpty()) {
|
||||
this.pathBegin = -1;
|
||||
this.pathEnd = -1;
|
||||
this.payloadLength = 0;
|
||||
} else {
|
||||
this.payloadLength -= tail.getLength();
|
||||
Lexeme newTail = this.peekLast();
|
||||
this.pathEnd = newTail.getBegin() + newTail.getLength();
|
||||
}
|
||||
return tail;
|
||||
}
|
||||
|
||||
/**
|
||||
* 检测词元位置交叉(有歧义的切分)
|
||||
* @param lexeme
|
||||
* @return
|
||||
*/
|
||||
boolean checkCross(Lexeme lexeme) {
|
||||
return (lexeme.getBegin() >= this.pathBegin && lexeme.getBegin() < this.pathEnd)
|
||||
|| (this.pathBegin >= lexeme.getBegin() && this.pathBegin < lexeme.getBegin()
|
||||
+ lexeme.getLength());
|
||||
}
|
||||
|
||||
int getPathBegin() {
|
||||
return pathBegin;
|
||||
}
|
||||
|
||||
int getPathEnd() {
|
||||
return pathEnd;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取Path的有效词长
|
||||
* @return
|
||||
*/
|
||||
int getPayloadLength() {
|
||||
return this.payloadLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取LexemePath的路径长度
|
||||
* @return
|
||||
*/
|
||||
int getPathLength() {
|
||||
return this.pathEnd - this.pathBegin;
|
||||
}
|
||||
|
||||
/**
|
||||
* X权重(词元长度积)
|
||||
* @return
|
||||
*/
|
||||
int getXWeight() {
|
||||
int product = 1;
|
||||
Cell c = this.getHead();
|
||||
while (c != null && c.getLexeme() != null) {
|
||||
product *= c.getLexeme().getLength();
|
||||
c = c.getNext();
|
||||
}
|
||||
return product;
|
||||
}
|
||||
|
||||
/**
|
||||
* 词元位置权重
|
||||
* @return
|
||||
*/
|
||||
int getPWeight() {
|
||||
int pWeight = 0;
|
||||
int p = 0;
|
||||
Cell c = this.getHead();
|
||||
while (c != null && c.getLexeme() != null) {
|
||||
p++;
|
||||
pWeight += p * c.getLexeme().getLength();
|
||||
c = c.getNext();
|
||||
}
|
||||
return pWeight;
|
||||
}
|
||||
|
||||
LexemePath copy() {
|
||||
LexemePath theCopy = new LexemePath();
|
||||
theCopy.pathBegin = this.pathBegin;
|
||||
theCopy.pathEnd = this.pathEnd;
|
||||
theCopy.payloadLength = this.payloadLength;
|
||||
Cell c = this.getHead();
|
||||
while (c != null && c.getLexeme() != null) {
|
||||
theCopy.addLexeme(c.getLexeme());
|
||||
c = c.getNext();
|
||||
}
|
||||
return theCopy;
|
||||
}
|
||||
|
||||
public int compareTo(LexemePath o) {
|
||||
// 比较有效文本长度
|
||||
if (this.payloadLength > o.payloadLength) {
|
||||
return -1;
|
||||
} else if (this.payloadLength < o.payloadLength) {
|
||||
return 1;
|
||||
} else {
|
||||
// 比较词元个数,越少越好
|
||||
if (this.size() < o.size()) {
|
||||
return -1;
|
||||
} else if (this.size() > o.size()) {
|
||||
return 1;
|
||||
} else {
|
||||
// 路径跨度越大越好
|
||||
if (this.getPathLength() > o.getPathLength()) {
|
||||
return -1;
|
||||
} else if (this.getPathLength() < o.getPathLength()) {
|
||||
return 1;
|
||||
} else {
|
||||
// 根据统计学结论,逆向切分概率高于正向切分,因此位置越靠后的优先
|
||||
if (this.pathEnd > o.pathEnd) {
|
||||
return -1;
|
||||
} else if (pathEnd < o.pathEnd) {
|
||||
return 1;
|
||||
} else {
|
||||
// 词长越平均越好
|
||||
if (this.getXWeight() > o.getXWeight()) {
|
||||
return -1;
|
||||
} else if (this.getXWeight() < o.getXWeight()) {
|
||||
return 1;
|
||||
} else {
|
||||
// 词元位置权重比较
|
||||
if (this.getPWeight() > o.getPWeight()) {
|
||||
return -1;
|
||||
} else if (this.getPWeight() < o.getPWeight()) {
|
||||
return 1;
|
||||
}
|
||||
/**
|
||||
* 向LexemePath追加相交的Lexeme
|
||||
* @param lexeme
|
||||
* @return
|
||||
*/
|
||||
boolean addCrossLexeme(Lexeme lexeme) {
|
||||
if (this.isEmpty()) {
|
||||
this.addLexeme(lexeme);
|
||||
this.pathBegin = lexeme.getBegin();
|
||||
this.pathEnd = lexeme.getBegin() + lexeme.getLength();
|
||||
this.payloadLength += lexeme.getLength();
|
||||
return true;
|
||||
|
||||
} else if (this.checkCross(lexeme)) {
|
||||
this.addLexeme(lexeme);
|
||||
if (lexeme.getBegin() + lexeme.getLength() > this.pathEnd) {
|
||||
this.pathEnd = lexeme.getBegin() + lexeme.getLength();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
this.payloadLength = this.pathEnd - this.pathBegin;
|
||||
return true;
|
||||
|
||||
public String toString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append("pathBegin : ").append(pathBegin).append("\r\n");
|
||||
sb.append("pathEnd : ").append(pathEnd).append("\r\n");
|
||||
sb.append("payloadLength : ").append(payloadLength).append("\r\n");
|
||||
Cell head = this.getHead();
|
||||
while (head != null) {
|
||||
sb.append("lexeme : ").append(head.getLexeme()).append("\r\n");
|
||||
head = head.getNext();
|
||||
} else {
|
||||
return false;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 向LexemePath追加不相交的Lexeme
|
||||
* @param lexeme
|
||||
* @return
|
||||
*/
|
||||
boolean addNotCrossLexeme(Lexeme lexeme) {
|
||||
if (this.isEmpty()) {
|
||||
this.addLexeme(lexeme);
|
||||
this.pathBegin = lexeme.getBegin();
|
||||
this.pathEnd = lexeme.getBegin() + lexeme.getLength();
|
||||
this.payloadLength += lexeme.getLength();
|
||||
return true;
|
||||
|
||||
} else if (this.checkCross(lexeme)) {
|
||||
return false;
|
||||
|
||||
} else {
|
||||
this.addLexeme(lexeme);
|
||||
this.payloadLength += lexeme.getLength();
|
||||
Lexeme head = this.peekFirst();
|
||||
this.pathBegin = head.getBegin();
|
||||
Lexeme tail = this.peekLast();
|
||||
this.pathEnd = tail.getBegin() + tail.getLength();
|
||||
return true;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 移除尾部的Lexeme
|
||||
* @return
|
||||
*/
|
||||
Lexeme removeTail() {
|
||||
Lexeme tail = this.pollLast();
|
||||
if (this.isEmpty()) {
|
||||
this.pathBegin = -1;
|
||||
this.pathEnd = -1;
|
||||
this.payloadLength = 0;
|
||||
} else {
|
||||
this.payloadLength -= tail.getLength();
|
||||
Lexeme newTail = this.peekLast();
|
||||
this.pathEnd = newTail.getBegin() + newTail.getLength();
|
||||
}
|
||||
return tail;
|
||||
}
|
||||
|
||||
/**
|
||||
* 检测词元位置交叉(有歧义的切分)
|
||||
* @param lexeme
|
||||
* @return
|
||||
*/
|
||||
boolean checkCross(Lexeme lexeme) {
|
||||
return (lexeme.getBegin() >= this.pathBegin && lexeme.getBegin() < this.pathEnd)
|
||||
|| (this.pathBegin >= lexeme.getBegin() && this.pathBegin < lexeme.getBegin()
|
||||
+ lexeme.getLength());
|
||||
}
|
||||
|
||||
int getPathBegin() {
|
||||
return pathBegin;
|
||||
}
|
||||
|
||||
int getPathEnd() {
|
||||
return pathEnd;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取Path的有效词长
|
||||
* @return
|
||||
*/
|
||||
int getPayloadLength() {
|
||||
return this.payloadLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取LexemePath的路径长度
|
||||
* @return
|
||||
*/
|
||||
int getPathLength() {
|
||||
return this.pathEnd - this.pathBegin;
|
||||
}
|
||||
|
||||
/**
|
||||
* X权重(词元长度积)
|
||||
* @return
|
||||
*/
|
||||
int getXWeight() {
|
||||
int product = 1;
|
||||
Cell c = this.getHead();
|
||||
while (c != null && c.getLexeme() != null) {
|
||||
product *= c.getLexeme().getLength();
|
||||
c = c.getNext();
|
||||
}
|
||||
return product;
|
||||
}
|
||||
|
||||
/**
|
||||
* 词元位置权重
|
||||
* @return
|
||||
*/
|
||||
int getPWeight() {
|
||||
int pWeight = 0;
|
||||
int p = 0;
|
||||
Cell c = this.getHead();
|
||||
while (c != null && c.getLexeme() != null) {
|
||||
p++;
|
||||
pWeight += p * c.getLexeme().getLength();
|
||||
c = c.getNext();
|
||||
}
|
||||
return pWeight;
|
||||
}
|
||||
|
||||
LexemePath copy() {
|
||||
LexemePath theCopy = new LexemePath();
|
||||
theCopy.pathBegin = this.pathBegin;
|
||||
theCopy.pathEnd = this.pathEnd;
|
||||
theCopy.payloadLength = this.payloadLength;
|
||||
Cell c = this.getHead();
|
||||
while (c != null && c.getLexeme() != null) {
|
||||
theCopy.addLexeme(c.getLexeme());
|
||||
c = c.getNext();
|
||||
}
|
||||
return theCopy;
|
||||
}
|
||||
|
||||
public int compareTo(LexemePath o) {
|
||||
// 比较有效文本长度
|
||||
if (this.payloadLength > o.payloadLength) {
|
||||
return -1;
|
||||
} else if (this.payloadLength < o.payloadLength) {
|
||||
return 1;
|
||||
} else {
|
||||
// 比较词元个数,越少越好
|
||||
if (this.size() < o.size()) {
|
||||
return -1;
|
||||
} else if (this.size() > o.size()) {
|
||||
return 1;
|
||||
} else {
|
||||
// 路径跨度越大越好
|
||||
if (this.getPathLength() > o.getPathLength()) {
|
||||
return -1;
|
||||
} else if (this.getPathLength() < o.getPathLength()) {
|
||||
return 1;
|
||||
} else {
|
||||
// 根据统计学结论,逆向切分概率高于正向切分,因此位置越靠后的优先
|
||||
if (this.pathEnd > o.pathEnd) {
|
||||
return -1;
|
||||
} else if (pathEnd < o.pathEnd) {
|
||||
return 1;
|
||||
} else {
|
||||
// 词长越平均越好
|
||||
if (this.getXWeight() > o.getXWeight()) {
|
||||
return -1;
|
||||
} else if (this.getXWeight() < o.getXWeight()) {
|
||||
return 1;
|
||||
} else {
|
||||
// 词元位置权重比较
|
||||
if (this.getPWeight() > o.getPWeight()) {
|
||||
return -1;
|
||||
} else if (this.getPWeight() < o.getPWeight()) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append("pathBegin : ").append(pathBegin).append("\r\n");
|
||||
sb.append("pathEnd : ").append(pathEnd).append("\r\n");
|
||||
sb.append("payloadLength : ").append(payloadLength).append("\r\n");
|
||||
Cell head = this.getHead();
|
||||
while (head != null) {
|
||||
sb.append("lexeme : ").append(head.getLexeme()).append("\r\n");
|
||||
head = head.getNext();
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,26 +1,25 @@
|
||||
/**
|
||||
* IK 中文分词 版本 5.0
|
||||
* IK Analyzer release 5.0
|
||||
*
|
||||
* <p>
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* <p>
|
||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
*/
|
||||
package com.rymcu.forest.lucene.core;
|
||||
|
||||
@ -28,212 +27,212 @@ package com.rymcu.forest.lucene.core;
|
||||
* IK分词器专用的Lexem快速排序集合
|
||||
*/
|
||||
class QuickSortSet {
|
||||
// 链表头
|
||||
private Cell head;
|
||||
// 链表尾
|
||||
private Cell tail;
|
||||
// 链表的实际大小
|
||||
private int size;
|
||||
// 链表头
|
||||
private Cell head;
|
||||
// 链表尾
|
||||
private Cell tail;
|
||||
// 链表的实际大小
|
||||
private int size;
|
||||
|
||||
QuickSortSet() {
|
||||
this.size = 0;
|
||||
}
|
||||
QuickSortSet() {
|
||||
this.size = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 向链表集合添加词元
|
||||
* @param lexeme
|
||||
*/
|
||||
boolean addLexeme(Lexeme lexeme) {
|
||||
Cell newCell = new Cell(lexeme);
|
||||
if (this.size == 0) {
|
||||
this.head = newCell;
|
||||
this.tail = newCell;
|
||||
this.size++;
|
||||
return true;
|
||||
/**
|
||||
* 向链表集合添加词元
|
||||
* @param lexeme
|
||||
*/
|
||||
boolean addLexeme(Lexeme lexeme) {
|
||||
Cell newCell = new Cell(lexeme);
|
||||
if (this.size == 0) {
|
||||
this.head = newCell;
|
||||
this.tail = newCell;
|
||||
this.size++;
|
||||
return true;
|
||||
|
||||
} else {
|
||||
if (this.tail.compareTo(newCell) == 0) {// 词元与尾部词元相同,不放入集合
|
||||
} else {
|
||||
if (this.tail.compareTo(newCell) == 0) {// 词元与尾部词元相同,不放入集合
|
||||
return false;
|
||||
|
||||
} else if (this.tail.compareTo(newCell) < 0) {// 词元接入链表尾部
|
||||
this.tail.next = newCell;
|
||||
newCell.prev = this.tail;
|
||||
this.tail = newCell;
|
||||
this.size++;
|
||||
return true;
|
||||
|
||||
} else if (this.head.compareTo(newCell) > 0) {// 词元接入链表头部
|
||||
this.head.prev = newCell;
|
||||
newCell.next = this.head;
|
||||
this.head = newCell;
|
||||
this.size++;
|
||||
return true;
|
||||
|
||||
} else {
|
||||
// 从尾部上逆
|
||||
Cell index = this.tail;
|
||||
while (index != null && index.compareTo(newCell) > 0) {
|
||||
index = index.prev;
|
||||
}
|
||||
if (index.compareTo(newCell) == 0) {// 词元与集合中的词元重复,不放入集合
|
||||
return false;
|
||||
|
||||
} else if (index.compareTo(newCell) < 0) {// 词元插入链表中的某个位置
|
||||
newCell.prev = index;
|
||||
newCell.next = index.next;
|
||||
index.next.prev = newCell;
|
||||
index.next = newCell;
|
||||
this.size++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} else if (this.tail.compareTo(newCell) < 0) {// 词元接入链表尾部
|
||||
this.tail.next = newCell;
|
||||
newCell.prev = this.tail;
|
||||
this.tail = newCell;
|
||||
this.size++;
|
||||
return true;
|
||||
|
||||
} else if (this.head.compareTo(newCell) > 0) {// 词元接入链表头部
|
||||
this.head.prev = newCell;
|
||||
newCell.next = this.head;
|
||||
this.head = newCell;
|
||||
this.size++;
|
||||
return true;
|
||||
|
||||
} else {
|
||||
// 从尾部上逆
|
||||
Cell index = this.tail;
|
||||
while (index != null && index.compareTo(newCell) > 0) {
|
||||
index = index.prev;
|
||||
/**
|
||||
* 返回链表头部元素
|
||||
* @return
|
||||
*/
|
||||
Lexeme peekFirst() {
|
||||
if (this.head != null) {
|
||||
return this.head.lexeme;
|
||||
}
|
||||
if (index.compareTo(newCell) == 0) {// 词元与集合中的词元重复,不放入集合
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
|
||||
} else if (index.compareTo(newCell) < 0) {// 词元插入链表中的某个位置
|
||||
newCell.prev = index;
|
||||
newCell.next = index.next;
|
||||
index.next.prev = newCell;
|
||||
index.next = newCell;
|
||||
this.size++;
|
||||
return true;
|
||||
/**
|
||||
* 取出链表集合的第一个元素
|
||||
* @return Lexeme
|
||||
*/
|
||||
Lexeme pollFirst() {
|
||||
if (this.size == 1) {
|
||||
Lexeme first = this.head.lexeme;
|
||||
this.head = null;
|
||||
this.tail = null;
|
||||
this.size--;
|
||||
return first;
|
||||
} else if (this.size > 1) {
|
||||
Lexeme first = this.head.lexeme;
|
||||
this.head = this.head.next;
|
||||
this.size--;
|
||||
return first;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回链表头部元素
|
||||
* @return
|
||||
*/
|
||||
Lexeme peekFirst() {
|
||||
if (this.head != null) {
|
||||
return this.head.lexeme;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 取出链表集合的第一个元素
|
||||
* @return Lexeme
|
||||
*/
|
||||
Lexeme pollFirst() {
|
||||
if (this.size == 1) {
|
||||
Lexeme first = this.head.lexeme;
|
||||
this.head = null;
|
||||
this.tail = null;
|
||||
this.size--;
|
||||
return first;
|
||||
} else if (this.size > 1) {
|
||||
Lexeme first = this.head.lexeme;
|
||||
this.head = this.head.next;
|
||||
this.size--;
|
||||
return first;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回链表尾部元素
|
||||
* @return
|
||||
*/
|
||||
Lexeme peekLast() {
|
||||
if (this.tail != null) {
|
||||
return this.tail.lexeme;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 取出链表集合的最后一个元素
|
||||
* @return Lexeme
|
||||
*/
|
||||
Lexeme pollLast() {
|
||||
if (this.size == 1) {
|
||||
Lexeme last = this.head.lexeme;
|
||||
this.head = null;
|
||||
this.tail = null;
|
||||
this.size--;
|
||||
return last;
|
||||
|
||||
} else if (this.size > 1) {
|
||||
Lexeme last = this.tail.lexeme;
|
||||
this.tail = this.tail.prev;
|
||||
this.size--;
|
||||
return last;
|
||||
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回集合大小
|
||||
* @return
|
||||
*/
|
||||
int size() {
|
||||
return this.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断集合是否为空
|
||||
* @return
|
||||
*/
|
||||
boolean isEmpty() {
|
||||
return this.size == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回lexeme链的头部
|
||||
* @return
|
||||
*/
|
||||
Cell getHead() {
|
||||
return this.head;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* IK 中文分词 版本 5.0
|
||||
* IK Analyzer release 5.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* QuickSortSet集合单元
|
||||
*
|
||||
*/
|
||||
class Cell implements Comparable<Cell> {
|
||||
private Cell prev;
|
||||
private Cell next;
|
||||
private Lexeme lexeme;
|
||||
|
||||
Cell(Lexeme lexeme) {
|
||||
if (lexeme == null) {
|
||||
throw new IllegalArgumentException("lexeme must not be null");
|
||||
}
|
||||
this.lexeme = lexeme;
|
||||
}
|
||||
|
||||
public int compareTo(Cell o) {
|
||||
return this.lexeme.compareTo(o.lexeme);
|
||||
/**
|
||||
* 返回链表尾部元素
|
||||
* @return
|
||||
*/
|
||||
Lexeme peekLast() {
|
||||
if (this.tail != null) {
|
||||
return this.tail.lexeme;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public Cell getPrev() {
|
||||
return this.prev;
|
||||
/**
|
||||
* 取出链表集合的最后一个元素
|
||||
* @return Lexeme
|
||||
*/
|
||||
Lexeme pollLast() {
|
||||
if (this.size == 1) {
|
||||
Lexeme last = this.head.lexeme;
|
||||
this.head = null;
|
||||
this.tail = null;
|
||||
this.size--;
|
||||
return last;
|
||||
|
||||
} else if (this.size > 1) {
|
||||
Lexeme last = this.tail.lexeme;
|
||||
this.tail = this.tail.prev;
|
||||
this.size--;
|
||||
return last;
|
||||
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public Cell getNext() {
|
||||
return this.next;
|
||||
/**
|
||||
* 返回集合大小
|
||||
* @return
|
||||
*/
|
||||
int size() {
|
||||
return this.size;
|
||||
}
|
||||
|
||||
public Lexeme getLexeme() {
|
||||
return this.lexeme;
|
||||
/**
|
||||
* 判断集合是否为空
|
||||
* @return
|
||||
*/
|
||||
boolean isEmpty() {
|
||||
return this.size == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回lexeme链的头部
|
||||
* @return
|
||||
*/
|
||||
Cell getHead() {
|
||||
return this.head;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* IK 中文分词 版本 5.0
|
||||
* IK Analyzer release 5.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* QuickSortSet集合单元
|
||||
*
|
||||
*/
|
||||
class Cell implements Comparable<Cell> {
|
||||
private Cell prev;
|
||||
private Cell next;
|
||||
private Lexeme lexeme;
|
||||
|
||||
Cell(Lexeme lexeme) {
|
||||
if (lexeme == null) {
|
||||
throw new IllegalArgumentException("lexeme must not be null");
|
||||
}
|
||||
this.lexeme = lexeme;
|
||||
}
|
||||
|
||||
public int compareTo(Cell o) {
|
||||
return this.lexeme.compareTo(o.lexeme);
|
||||
}
|
||||
|
||||
public Cell getPrev() {
|
||||
return this.prev;
|
||||
}
|
||||
|
||||
public Cell getNext() {
|
||||
return this.next;
|
||||
}
|
||||
|
||||
public Lexeme getLexeme() {
|
||||
return this.lexeme;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,27 +1,25 @@
|
||||
/**
|
||||
*
|
||||
* IK 中文分词 版本 5.0
|
||||
* IK Analyzer release 5.0
|
||||
*
|
||||
* <p>
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* <p>
|
||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
*/
|
||||
package com.rymcu.forest.lucene.dic;
|
||||
|
||||
@ -34,295 +32,295 @@ import java.util.Map;
|
||||
*/
|
||||
class DictSegment implements Comparable<DictSegment> {
|
||||
|
||||
// 公用字典表,存储汉字
|
||||
private static final Map<Character, Character> charMap = new HashMap<Character, Character>(16,
|
||||
0.95f);
|
||||
// 数组大小上限
|
||||
private static final int ARRAY_LENGTH_LIMIT = 3;
|
||||
// 公用字典表,存储汉字
|
||||
private static final Map<Character, Character> charMap = new HashMap<Character, Character>(16,
|
||||
0.95f);
|
||||
// 数组大小上限
|
||||
private static final int ARRAY_LENGTH_LIMIT = 3;
|
||||
|
||||
// Map存储结构
|
||||
private Map<Character, DictSegment> childrenMap;
|
||||
// 数组方式存储结构
|
||||
private DictSegment[] childrenArray;
|
||||
// Map存储结构
|
||||
private Map<Character, DictSegment> childrenMap;
|
||||
// 数组方式存储结构
|
||||
private DictSegment[] childrenArray;
|
||||
|
||||
// 当前节点上存储的字符
|
||||
private Character nodeChar;
|
||||
// 当前节点存储的Segment数目
|
||||
// storeSize <=ARRAY_LENGTH_LIMIT ,使用数组存储, storeSize >ARRAY_LENGTH_LIMIT ,则使用Map存储
|
||||
private int storeSize = 0;
|
||||
// 当前DictSegment状态 ,默认 0 , 1表示从根节点到当前节点的路径表示一个词
|
||||
private int nodeState = 0;
|
||||
// 当前节点上存储的字符
|
||||
private Character nodeChar;
|
||||
// 当前节点存储的Segment数目
|
||||
// storeSize <=ARRAY_LENGTH_LIMIT ,使用数组存储, storeSize >ARRAY_LENGTH_LIMIT ,则使用Map存储
|
||||
private int storeSize = 0;
|
||||
// 当前DictSegment状态 ,默认 0 , 1表示从根节点到当前节点的路径表示一个词
|
||||
private int nodeState = 0;
|
||||
|
||||
DictSegment(Character nodeChar) {
|
||||
if (nodeChar == null) {
|
||||
throw new IllegalArgumentException("参数为空异常,字符不能为空");
|
||||
}
|
||||
this.nodeChar = nodeChar;
|
||||
}
|
||||
|
||||
Character getNodeChar() {
|
||||
return nodeChar;
|
||||
}
|
||||
|
||||
/*
|
||||
* 判断是否有下一个节点
|
||||
*/
|
||||
boolean hasNextNode() {
|
||||
return this.storeSize > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 匹配词段
|
||||
* @param charArray
|
||||
* @return Hit
|
||||
*/
|
||||
Hit match(char[] charArray) {
|
||||
return this.match(charArray, 0, charArray.length, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* 匹配词段
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @return Hit
|
||||
*/
|
||||
Hit match(char[] charArray, int begin, int length) {
|
||||
return this.match(charArray, begin, length, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* 匹配词段
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @param searchHit
|
||||
* @return Hit
|
||||
*/
|
||||
Hit match(char[] charArray, int begin, int length, Hit searchHit) {
|
||||
|
||||
if (searchHit == null) {
|
||||
// 如果hit为空,新建
|
||||
searchHit = new Hit();
|
||||
// 设置hit的其实文本位置
|
||||
searchHit.setBegin(begin);
|
||||
} else {
|
||||
// 否则要将HIT状态重置
|
||||
searchHit.setUnmatch();
|
||||
}
|
||||
// 设置hit的当前处理位置
|
||||
searchHit.setEnd(begin);
|
||||
|
||||
Character keyChar = new Character(charArray[begin]);
|
||||
DictSegment ds = null;
|
||||
|
||||
// 引用实例变量为本地变量,避免查询时遇到更新的同步问题
|
||||
DictSegment[] segmentArray = this.childrenArray;
|
||||
Map<Character, DictSegment> segmentMap = this.childrenMap;
|
||||
|
||||
// STEP1 在节点中查找keyChar对应的DictSegment
|
||||
if (segmentArray != null) {
|
||||
// 在数组中查找
|
||||
DictSegment keySegment = new DictSegment(keyChar);
|
||||
int position = Arrays.binarySearch(segmentArray, 0, this.storeSize, keySegment);
|
||||
if (position >= 0) {
|
||||
ds = segmentArray[position];
|
||||
}
|
||||
|
||||
} else if (segmentMap != null) {
|
||||
// 在map中查找
|
||||
ds = (DictSegment) segmentMap.get(keyChar);
|
||||
}
|
||||
|
||||
// STEP2 找到DictSegment,判断词的匹配状态,是否继续递归,还是返回结果
|
||||
if (ds != null) {
|
||||
if (length > 1) {
|
||||
// 词未匹配完,继续往下搜索
|
||||
return ds.match(charArray, begin + 1, length - 1, searchHit);
|
||||
} else if (length == 1) {
|
||||
|
||||
// 搜索最后一个char
|
||||
if (ds.nodeState == 1) {
|
||||
// 添加HIT状态为完全匹配
|
||||
searchHit.setMatch();
|
||||
DictSegment(Character nodeChar) {
|
||||
if (nodeChar == null) {
|
||||
throw new IllegalArgumentException("参数为空异常,字符不能为空");
|
||||
}
|
||||
if (ds.hasNextNode()) {
|
||||
// 添加HIT状态为前缀匹配
|
||||
searchHit.setPrefix();
|
||||
// 记录当前位置的DictSegment
|
||||
searchHit.setMatchedDictSegment(ds);
|
||||
this.nodeChar = nodeChar;
|
||||
}
|
||||
|
||||
Character getNodeChar() {
|
||||
return nodeChar;
|
||||
}
|
||||
|
||||
/*
|
||||
* 判断是否有下一个节点
|
||||
*/
|
||||
boolean hasNextNode() {
|
||||
return this.storeSize > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 匹配词段
|
||||
* @param charArray
|
||||
* @return Hit
|
||||
*/
|
||||
Hit match(char[] charArray) {
|
||||
return this.match(charArray, 0, charArray.length, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* 匹配词段
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @return Hit
|
||||
*/
|
||||
Hit match(char[] charArray, int begin, int length) {
|
||||
return this.match(charArray, begin, length, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* 匹配词段
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @param searchHit
|
||||
* @return Hit
|
||||
*/
|
||||
Hit match(char[] charArray, int begin, int length, Hit searchHit) {
|
||||
|
||||
if (searchHit == null) {
|
||||
// 如果hit为空,新建
|
||||
searchHit = new Hit();
|
||||
// 设置hit的其实文本位置
|
||||
searchHit.setBegin(begin);
|
||||
} else {
|
||||
// 否则要将HIT状态重置
|
||||
searchHit.setUnmatch();
|
||||
}
|
||||
// 设置hit的当前处理位置
|
||||
searchHit.setEnd(begin);
|
||||
|
||||
Character keyChar = new Character(charArray[begin]);
|
||||
DictSegment ds = null;
|
||||
|
||||
// 引用实例变量为本地变量,避免查询时遇到更新的同步问题
|
||||
DictSegment[] segmentArray = this.childrenArray;
|
||||
Map<Character, DictSegment> segmentMap = this.childrenMap;
|
||||
|
||||
// STEP1 在节点中查找keyChar对应的DictSegment
|
||||
if (segmentArray != null) {
|
||||
// 在数组中查找
|
||||
DictSegment keySegment = new DictSegment(keyChar);
|
||||
int position = Arrays.binarySearch(segmentArray, 0, this.storeSize, keySegment);
|
||||
if (position >= 0) {
|
||||
ds = segmentArray[position];
|
||||
}
|
||||
|
||||
} else if (segmentMap != null) {
|
||||
// 在map中查找
|
||||
ds = (DictSegment) segmentMap.get(keyChar);
|
||||
}
|
||||
|
||||
// STEP2 找到DictSegment,判断词的匹配状态,是否继续递归,还是返回结果
|
||||
if (ds != null) {
|
||||
if (length > 1) {
|
||||
// 词未匹配完,继续往下搜索
|
||||
return ds.match(charArray, begin + 1, length - 1, searchHit);
|
||||
} else if (length == 1) {
|
||||
|
||||
// 搜索最后一个char
|
||||
if (ds.nodeState == 1) {
|
||||
// 添加HIT状态为完全匹配
|
||||
searchHit.setMatch();
|
||||
}
|
||||
if (ds.hasNextNode()) {
|
||||
// 添加HIT状态为前缀匹配
|
||||
searchHit.setPrefix();
|
||||
// 记录当前位置的DictSegment
|
||||
searchHit.setMatchedDictSegment(ds);
|
||||
}
|
||||
return searchHit;
|
||||
}
|
||||
|
||||
}
|
||||
// STEP3 没有找到DictSegment, 将HIT设置为不匹配
|
||||
return searchHit;
|
||||
}
|
||||
|
||||
}
|
||||
// STEP3 没有找到DictSegment, 将HIT设置为不匹配
|
||||
return searchHit;
|
||||
}
|
||||
|
||||
/**
|
||||
* 加载填充词典片段
|
||||
* @param charArray
|
||||
*/
|
||||
void fillSegment(char[] charArray) {
|
||||
this.fillSegment(charArray, 0, charArray.length, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* 屏蔽词典中的一个词
|
||||
* @param charArray
|
||||
*/
|
||||
void disableSegment(char[] charArray) {
|
||||
this.fillSegment(charArray, 0, charArray.length, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* 加载填充词典片段
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @param enabled
|
||||
*/
|
||||
private synchronized void fillSegment(char[] charArray, int begin, int length, int enabled) {
|
||||
// 获取字典表中的汉字对象
|
||||
Character beginChar = new Character(charArray[begin]);
|
||||
Character keyChar = charMap.get(beginChar);
|
||||
// 字典中没有该字,则将其添加入字典
|
||||
if (keyChar == null) {
|
||||
charMap.put(beginChar, beginChar);
|
||||
keyChar = beginChar;
|
||||
}
|
||||
|
||||
// 搜索当前节点的存储,查询对应keyChar的keyChar,如果没有则创建
|
||||
DictSegment ds = lookforSegment(keyChar, enabled);
|
||||
if (ds != null) {
|
||||
// 处理keyChar对应的segment
|
||||
if (length > 1) {
|
||||
// 词元还没有完全加入词典树
|
||||
ds.fillSegment(charArray, begin + 1, length - 1, enabled);
|
||||
} else if (length == 1) {
|
||||
// 已经是词元的最后一个char,设置当前节点状态为enabled,
|
||||
// enabled=1表明一个完整的词,enabled=0表示从词典中屏蔽当前词
|
||||
ds.nodeState = enabled;
|
||||
}
|
||||
/**
|
||||
* 加载填充词典片段
|
||||
* @param charArray
|
||||
*/
|
||||
void fillSegment(char[] charArray) {
|
||||
this.fillSegment(charArray, 0, charArray.length, 1);
|
||||
}
|
||||
|
||||
}
|
||||
/**
|
||||
* 屏蔽词典中的一个词
|
||||
* @param charArray
|
||||
*/
|
||||
void disableSegment(char[] charArray) {
|
||||
this.fillSegment(charArray, 0, charArray.length, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* 查找本节点下对应的keyChar的segment *
|
||||
* @param keyChar
|
||||
* @param create =1如果没有找到,则创建新的segment ; =0如果没有找到,不创建,返回null
|
||||
* @return
|
||||
*/
|
||||
private DictSegment lookforSegment(Character keyChar, int create) {
|
||||
/**
|
||||
* 加载填充词典片段
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @param enabled
|
||||
*/
|
||||
private synchronized void fillSegment(char[] charArray, int begin, int length, int enabled) {
|
||||
// 获取字典表中的汉字对象
|
||||
Character beginChar = new Character(charArray[begin]);
|
||||
Character keyChar = charMap.get(beginChar);
|
||||
// 字典中没有该字,则将其添加入字典
|
||||
if (keyChar == null) {
|
||||
charMap.put(beginChar, beginChar);
|
||||
keyChar = beginChar;
|
||||
}
|
||||
|
||||
DictSegment ds = null;
|
||||
// 搜索当前节点的存储,查询对应keyChar的keyChar,如果没有则创建
|
||||
DictSegment ds = lookforSegment(keyChar, enabled);
|
||||
if (ds != null) {
|
||||
// 处理keyChar对应的segment
|
||||
if (length > 1) {
|
||||
// 词元还没有完全加入词典树
|
||||
ds.fillSegment(charArray, begin + 1, length - 1, enabled);
|
||||
} else if (length == 1) {
|
||||
// 已经是词元的最后一个char,设置当前节点状态为enabled,
|
||||
// enabled=1表明一个完整的词,enabled=0表示从词典中屏蔽当前词
|
||||
ds.nodeState = enabled;
|
||||
}
|
||||
}
|
||||
|
||||
if (this.storeSize <= ARRAY_LENGTH_LIMIT) {
|
||||
// 获取数组容器,如果数组未创建则创建数组
|
||||
DictSegment[] segmentArray = getChildrenArray();
|
||||
// 搜寻数组
|
||||
DictSegment keySegment = new DictSegment(keyChar);
|
||||
int position = Arrays.binarySearch(segmentArray, 0, this.storeSize, keySegment);
|
||||
if (position >= 0) {
|
||||
ds = segmentArray[position];
|
||||
}
|
||||
}
|
||||
|
||||
// 遍历数组后没有找到对应的segment
|
||||
if (ds == null && create == 1) {
|
||||
ds = keySegment;
|
||||
if (this.storeSize < ARRAY_LENGTH_LIMIT) {
|
||||
// 数组容量未满,使用数组存储
|
||||
segmentArray[this.storeSize] = ds;
|
||||
// segment数目+1
|
||||
this.storeSize++;
|
||||
Arrays.sort(segmentArray, 0, this.storeSize);
|
||||
/**
|
||||
* 查找本节点下对应的keyChar的segment *
|
||||
* @param keyChar
|
||||
* @param create =1如果没有找到,则创建新的segment ; =0如果没有找到,不创建,返回null
|
||||
* @return
|
||||
*/
|
||||
private DictSegment lookforSegment(Character keyChar, int create) {
|
||||
|
||||
DictSegment ds = null;
|
||||
|
||||
if (this.storeSize <= ARRAY_LENGTH_LIMIT) {
|
||||
// 获取数组容器,如果数组未创建则创建数组
|
||||
DictSegment[] segmentArray = getChildrenArray();
|
||||
// 搜寻数组
|
||||
DictSegment keySegment = new DictSegment(keyChar);
|
||||
int position = Arrays.binarySearch(segmentArray, 0, this.storeSize, keySegment);
|
||||
if (position >= 0) {
|
||||
ds = segmentArray[position];
|
||||
}
|
||||
|
||||
// 遍历数组后没有找到对应的segment
|
||||
if (ds == null && create == 1) {
|
||||
ds = keySegment;
|
||||
if (this.storeSize < ARRAY_LENGTH_LIMIT) {
|
||||
// 数组容量未满,使用数组存储
|
||||
segmentArray[this.storeSize] = ds;
|
||||
// segment数目+1
|
||||
this.storeSize++;
|
||||
Arrays.sort(segmentArray, 0, this.storeSize);
|
||||
|
||||
} else {
|
||||
// 数组容量已满,切换Map存储
|
||||
// 获取Map容器,如果Map未创建,则创建Map
|
||||
Map<Character, DictSegment> segmentMap = getChildrenMap();
|
||||
// 将数组中的segment迁移到Map中
|
||||
migrate(segmentArray, segmentMap);
|
||||
// 存储新的segment
|
||||
segmentMap.put(keyChar, ds);
|
||||
// segment数目+1 , 必须在释放数组前执行storeSize++ , 确保极端情况下,不会取到空的数组
|
||||
this.storeSize++;
|
||||
// 释放当前的数组引用
|
||||
this.childrenArray = null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
// 数组容量已满,切换Map存储
|
||||
// 获取Map容器,如果Map未创建,则创建Map
|
||||
Map<Character, DictSegment> segmentMap = getChildrenMap();
|
||||
// 将数组中的segment迁移到Map中
|
||||
migrate(segmentArray, segmentMap);
|
||||
// 存储新的segment
|
||||
segmentMap.put(keyChar, ds);
|
||||
// segment数目+1 , 必须在释放数组前执行storeSize++ , 确保极端情况下,不会取到空的数组
|
||||
this.storeSize++;
|
||||
// 释放当前的数组引用
|
||||
this.childrenArray = null;
|
||||
// 获取Map容器,如果Map未创建,则创建Map
|
||||
Map<Character, DictSegment> segmentMap = getChildrenMap();
|
||||
// 搜索Map
|
||||
ds = (DictSegment) segmentMap.get(keyChar);
|
||||
if (ds == null && create == 1) {
|
||||
// 构造新的segment
|
||||
ds = new DictSegment(keyChar);
|
||||
segmentMap.put(keyChar, ds);
|
||||
// 当前节点存储segment数目+1
|
||||
this.storeSize++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
// 获取Map容器,如果Map未创建,则创建Map
|
||||
Map<Character, DictSegment> segmentMap = getChildrenMap();
|
||||
// 搜索Map
|
||||
ds = (DictSegment) segmentMap.get(keyChar);
|
||||
if (ds == null && create == 1) {
|
||||
// 构造新的segment
|
||||
ds = new DictSegment(keyChar);
|
||||
segmentMap.put(keyChar, ds);
|
||||
// 当前节点存储segment数目+1
|
||||
this.storeSize++;
|
||||
}
|
||||
return ds;
|
||||
}
|
||||
|
||||
return ds;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取数组容器
|
||||
* 线程同步方法
|
||||
*/
|
||||
private DictSegment[] getChildrenArray() {
|
||||
if (this.childrenArray == null) {
|
||||
synchronized (this) {
|
||||
/**
|
||||
* 获取数组容器
|
||||
* 线程同步方法
|
||||
*/
|
||||
private DictSegment[] getChildrenArray() {
|
||||
if (this.childrenArray == null) {
|
||||
this.childrenArray = new DictSegment[ARRAY_LENGTH_LIMIT];
|
||||
synchronized (this) {
|
||||
if (this.childrenArray == null) {
|
||||
this.childrenArray = new DictSegment[ARRAY_LENGTH_LIMIT];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return this.childrenArray;
|
||||
}
|
||||
return this.childrenArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取Map容器
|
||||
* 线程同步方法
|
||||
*/
|
||||
private Map<Character, DictSegment> getChildrenMap() {
|
||||
if (this.childrenMap == null) {
|
||||
synchronized (this) {
|
||||
/**
|
||||
* 获取Map容器
|
||||
* 线程同步方法
|
||||
*/
|
||||
private Map<Character, DictSegment> getChildrenMap() {
|
||||
if (this.childrenMap == null) {
|
||||
this.childrenMap = new HashMap<Character, DictSegment>(ARRAY_LENGTH_LIMIT * 2, 0.8f);
|
||||
synchronized (this) {
|
||||
if (this.childrenMap == null) {
|
||||
this.childrenMap = new HashMap<Character, DictSegment>(ARRAY_LENGTH_LIMIT * 2, 0.8f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return this.childrenMap;
|
||||
}
|
||||
return this.childrenMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* 将数组中的segment迁移到Map中
|
||||
* @param segmentArray
|
||||
*/
|
||||
private void migrate(DictSegment[] segmentArray, Map<Character, DictSegment> segmentMap) {
|
||||
for (DictSegment segment : segmentArray) {
|
||||
if (segment != null) {
|
||||
segmentMap.put(segment.nodeChar, segment);
|
||||
}
|
||||
/**
|
||||
* 将数组中的segment迁移到Map中
|
||||
* @param segmentArray
|
||||
*/
|
||||
private void migrate(DictSegment[] segmentArray, Map<Character, DictSegment> segmentMap) {
|
||||
for (DictSegment segment : segmentArray) {
|
||||
if (segment != null) {
|
||||
segmentMap.put(segment.nodeChar, segment);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 实现Comparable接口
|
||||
* @param o
|
||||
* @return int
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(DictSegment o) {
|
||||
// 对当前节点存储的char进行比较
|
||||
return this.nodeChar.compareTo(o.nodeChar);
|
||||
}
|
||||
/**
|
||||
* 实现Comparable接口
|
||||
* @param o
|
||||
* @return int
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(DictSegment o) {
|
||||
// 对当前节点存储的char进行比较
|
||||
return this.nodeChar.compareTo(o.nodeChar);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -28,320 +28,344 @@ import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/** 词典管理类,单例模式 */
|
||||
/**
|
||||
* 词典管理类,单例模式
|
||||
*/
|
||||
public class Dictionary {
|
||||
|
||||
/** 词典单例 */
|
||||
private static Dictionary singleton;
|
||||
/** 主词典对象 */
|
||||
private DictSegment _MainDict;
|
||||
/** 停止词词典 */
|
||||
private DictSegment _StopWordDict;
|
||||
/** 量词词典 */
|
||||
private DictSegment _QuantifierDict;
|
||||
/** 用户自定义词典路径 */
|
||||
private static final String PATH_USER_DIC =
|
||||
System.getProperty("user.dir") + "/lucene/userDic/userDic.dic";
|
||||
/** 配置对象 */
|
||||
private final Configuration cfg;
|
||||
/**
|
||||
* 词典单例
|
||||
*/
|
||||
private static Dictionary singleton;
|
||||
/**
|
||||
* 主词典对象
|
||||
*/
|
||||
private DictSegment _MainDict;
|
||||
/**
|
||||
* 停止词词典
|
||||
*/
|
||||
private DictSegment _StopWordDict;
|
||||
/**
|
||||
* 量词词典
|
||||
*/
|
||||
private DictSegment _QuantifierDict;
|
||||
/**
|
||||
* 用户自定义词典路径
|
||||
*/
|
||||
private static final String PATH_USER_DIC =
|
||||
System.getProperty("user.dir") + "/lucene/userDic/userDic.dic";
|
||||
/**
|
||||
* 配置对象
|
||||
*/
|
||||
private final Configuration cfg;
|
||||
|
||||
private Dictionary(Configuration cfg) {
|
||||
this.cfg = cfg;
|
||||
this.loadMainDict();
|
||||
this.loadStopWordDict();
|
||||
this.loadQuantifierDict();
|
||||
}
|
||||
private Dictionary(Configuration cfg) {
|
||||
this.cfg = cfg;
|
||||
this.loadMainDict();
|
||||
this.loadStopWordDict();
|
||||
this.loadQuantifierDict();
|
||||
}
|
||||
|
||||
/**
|
||||
* 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化 只有当Dictionary类被实际调用时,才会开始载入词典, 这将延长首次分词操作的时间
|
||||
* 该方法提供了一个在应用加载阶段就初始化字典的手段
|
||||
*/
|
||||
public static void initial(Configuration cfg) {
|
||||
if (singleton == null) {
|
||||
synchronized (Dictionary.class) {
|
||||
/**
|
||||
* 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化 只有当Dictionary类被实际调用时,才会开始载入词典, 这将延长首次分词操作的时间
|
||||
* 该方法提供了一个在应用加载阶段就初始化字典的手段
|
||||
*/
|
||||
public static void initial(Configuration cfg) {
|
||||
if (singleton == null) {
|
||||
singleton = new Dictionary(cfg);
|
||||
synchronized (Dictionary.class) {
|
||||
if (singleton == null) {
|
||||
singleton = new Dictionary(cfg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词典单子实例
|
||||
*
|
||||
* @return Dictionary 单例对象
|
||||
*/
|
||||
public static Dictionary getSingleton() {
|
||||
if (singleton == null) {
|
||||
throw new IllegalStateException("词典尚未初始化,请先调用initial方法");
|
||||
}
|
||||
return singleton;
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量加载新词条
|
||||
*
|
||||
* @param words Collection<String>词条列表
|
||||
*/
|
||||
public void addWords(Collection<String> words) {
|
||||
if (words != null) {
|
||||
for (String word : words) {
|
||||
if (word != null) {
|
||||
// 批量加载词条到主内存词典中
|
||||
singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray());
|
||||
/**
|
||||
* 获取词典单子实例
|
||||
*
|
||||
* @return Dictionary 单例对象
|
||||
*/
|
||||
public static Dictionary getSingleton() {
|
||||
if (singleton == null) {
|
||||
throw new IllegalStateException("词典尚未初始化,请先调用initial方法");
|
||||
}
|
||||
}
|
||||
return singleton;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量移除(屏蔽)词条
|
||||
*
|
||||
* @param words
|
||||
*/
|
||||
public void disableWords(Collection<String> words) {
|
||||
if (words != null) {
|
||||
for (String word : words) {
|
||||
if (word != null) {
|
||||
// 批量屏蔽词条
|
||||
singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray());
|
||||
/**
|
||||
* 批量加载新词条
|
||||
*
|
||||
* @param words Collection<String>词条列表
|
||||
*/
|
||||
public void addWords(Collection<String> words) {
|
||||
if (words != null) {
|
||||
for (String word : words) {
|
||||
if (word != null) {
|
||||
// 批量加载词条到主内存词典中
|
||||
singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 检索匹配主词典
|
||||
*
|
||||
* @param charArray
|
||||
* @return Hit 匹配结果描述
|
||||
*/
|
||||
public Hit matchInMainDict(char[] charArray) {
|
||||
return singleton._MainDict.match(charArray);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检索匹配主词典
|
||||
*
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @return Hit 匹配结果描述
|
||||
*/
|
||||
public Hit matchInMainDict(char[] charArray, int begin, int length) {
|
||||
return singleton._MainDict.match(charArray, begin, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检索匹配量词词典
|
||||
*
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @return Hit 匹配结果描述
|
||||
*/
|
||||
public Hit matchInQuantifierDict(char[] charArray, int begin, int length) {
|
||||
return singleton._QuantifierDict.match(charArray, begin, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* 从已匹配的Hit中直接取出DictSegment,继续向下匹配
|
||||
*
|
||||
* @param charArray
|
||||
* @param currentIndex
|
||||
* @param matchedHit
|
||||
* @return Hit
|
||||
*/
|
||||
public Hit matchWithHit(char[] charArray, int currentIndex, Hit matchedHit) {
|
||||
DictSegment ds = matchedHit.getMatchedDictSegment();
|
||||
return ds.match(charArray, currentIndex, 1, matchedHit);
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否是停止词
|
||||
*
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @return boolean
|
||||
*/
|
||||
public boolean isStopWord(char[] charArray, int begin, int length) {
|
||||
return singleton._StopWordDict.match(charArray, begin, length).isMatch();
|
||||
}
|
||||
|
||||
/** 加载主词典及扩展词典 */
|
||||
private void loadMainDict() {
|
||||
// 建立一个主词典实例
|
||||
_MainDict = new DictSegment((char) 0);
|
||||
// 读取主词典文件
|
||||
Resource resource = new ClassPathResource(cfg.getMainDictionary());
|
||||
try {
|
||||
InputStream is = resource.getInputStream();
|
||||
BufferedReader br =
|
||||
new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
if (theWord != null && !"".equals(theWord.trim())) {
|
||||
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||
/**
|
||||
* 批量移除(屏蔽)词条
|
||||
*
|
||||
* @param words
|
||||
*/
|
||||
public void disableWords(Collection<String> words) {
|
||||
if (words != null) {
|
||||
for (String word : words) {
|
||||
if (word != null) {
|
||||
// 批量屏蔽词条
|
||||
singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (theWord != null);
|
||||
} catch (IOException e) {
|
||||
System.err.println("Main Dictionary loading exception.");
|
||||
e.printStackTrace();
|
||||
}
|
||||
// 加载扩展词典
|
||||
this.loadExtDict();
|
||||
}
|
||||
|
||||
/** 加载用户配置的扩展词典到主词库表 */
|
||||
private void loadExtDict() {
|
||||
// 加载扩展词典配置
|
||||
List<String> extDictFiles = cfg.getExtDictionary();
|
||||
if (extDictFiles != null) {
|
||||
InputStream is;
|
||||
for (String extDictName : extDictFiles) {
|
||||
// 读取扩展词典文件
|
||||
System.out.println("加载扩展词典:" + extDictName);
|
||||
is = this.getClass().getClassLoader().getResourceAsStream(extDictName);
|
||||
// 如果找不到扩展的字典,则忽略
|
||||
/**
|
||||
* 检索匹配主词典
|
||||
*
|
||||
* @param charArray
|
||||
* @return Hit 匹配结果描述
|
||||
*/
|
||||
public Hit matchInMainDict(char[] charArray) {
|
||||
return singleton._MainDict.match(charArray);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检索匹配主词典
|
||||
*
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @return Hit 匹配结果描述
|
||||
*/
|
||||
public Hit matchInMainDict(char[] charArray, int begin, int length) {
|
||||
return singleton._MainDict.match(charArray, begin, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检索匹配量词词典
|
||||
*
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @return Hit 匹配结果描述
|
||||
*/
|
||||
public Hit matchInQuantifierDict(char[] charArray, int begin, int length) {
|
||||
return singleton._QuantifierDict.match(charArray, begin, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* 从已匹配的Hit中直接取出DictSegment,继续向下匹配
|
||||
*
|
||||
* @param charArray
|
||||
* @param currentIndex
|
||||
* @param matchedHit
|
||||
* @return Hit
|
||||
*/
|
||||
public Hit matchWithHit(char[] charArray, int currentIndex, Hit matchedHit) {
|
||||
DictSegment ds = matchedHit.getMatchedDictSegment();
|
||||
return ds.match(charArray, currentIndex, 1, matchedHit);
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否是停止词
|
||||
*
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @return boolean
|
||||
*/
|
||||
public boolean isStopWord(char[] charArray, int begin, int length) {
|
||||
return singleton._StopWordDict.match(charArray, begin, length).isMatch();
|
||||
}
|
||||
|
||||
/**
|
||||
* 加载主词典及扩展词典
|
||||
*/
|
||||
private void loadMainDict() {
|
||||
// 建立一个主词典实例
|
||||
_MainDict = new DictSegment((char) 0);
|
||||
// 读取主词典文件
|
||||
Resource resource = new ClassPathResource(cfg.getMainDictionary());
|
||||
try {
|
||||
InputStream is = resource.getInputStream();
|
||||
BufferedReader br =
|
||||
new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
if (theWord != null && !"".equals(theWord.trim())) {
|
||||
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
} catch (IOException e) {
|
||||
System.err.println("Main Dictionary loading exception.");
|
||||
e.printStackTrace();
|
||||
}
|
||||
// 加载扩展词典
|
||||
this.loadExtDict();
|
||||
}
|
||||
|
||||
/**
|
||||
* 加载用户配置的扩展词典到主词库表
|
||||
*/
|
||||
private void loadExtDict() {
|
||||
// 加载扩展词典配置
|
||||
List<String> extDictFiles = cfg.getExtDictionary();
|
||||
if (extDictFiles != null) {
|
||||
InputStream is;
|
||||
for (String extDictName : extDictFiles) {
|
||||
// 读取扩展词典文件
|
||||
System.out.println("加载扩展词典:" + extDictName);
|
||||
is = this.getClass().getClassLoader().getResourceAsStream(extDictName);
|
||||
// 如果找不到扩展的字典,则忽略
|
||||
if (is == null) {
|
||||
try {
|
||||
is = new FileInputStream(extDictName);
|
||||
} catch (FileNotFoundException e) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
try {
|
||||
BufferedReader br =
|
||||
new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
if (theWord != null && !"".equals(theWord.trim())) {
|
||||
// 加载扩展词典数据到主内存词典中
|
||||
System.out.println(theWord);
|
||||
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
} catch (IOException ioe) {
|
||||
System.err.println("Extension Dictionary loading exception.");
|
||||
ioe.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 加载用户扩展的停止词词典
|
||||
*/
|
||||
private void loadStopWordDict() {
|
||||
// 建立一个主词典实例
|
||||
_StopWordDict = new DictSegment((char) 0);
|
||||
// 加载扩展停止词典
|
||||
List<String> extStopWordDictFiles = cfg.getExtStopWordDictionary();
|
||||
if (extStopWordDictFiles != null) {
|
||||
InputStream is = null;
|
||||
for (String extStopWordDictName : extStopWordDictFiles) {
|
||||
System.out.println("加载扩展停止词典:" + extStopWordDictName);
|
||||
// 读取扩展词典文件
|
||||
is = this.getClass().getClassLoader().getResourceAsStream(extStopWordDictName);
|
||||
// 如果找不到扩展的字典,则忽略
|
||||
if (is == null) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
if (theWord != null && !"".equals(theWord.trim())) {
|
||||
// 加载扩展停止词典数据到内存中
|
||||
_StopWordDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
} catch (IOException ioe) {
|
||||
System.err.println("Extension Stop word Dictionary loading exception.");
|
||||
ioe.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 加载量词词典
|
||||
*/
|
||||
private void loadQuantifierDict() {
|
||||
// 建立一个量词典实例
|
||||
_QuantifierDict = new DictSegment((char) 0);
|
||||
// 读取量词词典文件
|
||||
InputStream is =
|
||||
this.getClass().getClassLoader().getResourceAsStream(cfg.getQuantifierDictionary());
|
||||
if (is == null) {
|
||||
try {
|
||||
is = new FileInputStream(extDictName);
|
||||
} catch (FileNotFoundException e) {
|
||||
continue;
|
||||
}
|
||||
throw new RuntimeException("Quantifier Dictionary not found!!!");
|
||||
}
|
||||
try {
|
||||
BufferedReader br =
|
||||
new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
if (theWord != null && !"".equals(theWord.trim())) {
|
||||
// 加载扩展词典数据到主内存词典中
|
||||
System.out.println(theWord);
|
||||
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
BufferedReader br =
|
||||
new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
if (theWord != null && !"".equals(theWord.trim())) {
|
||||
_QuantifierDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
} catch (IOException ioe) {
|
||||
System.err.println("Extension Dictionary loading exception.");
|
||||
ioe.printStackTrace();
|
||||
System.err.println("Quantifier Dictionary loading exception.");
|
||||
ioe.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
try {
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** 加载用户扩展的停止词词典 */
|
||||
private void loadStopWordDict() {
|
||||
// 建立一个主词典实例
|
||||
_StopWordDict = new DictSegment((char) 0);
|
||||
// 加载扩展停止词典
|
||||
List<String> extStopWordDictFiles = cfg.getExtStopWordDictionary();
|
||||
if (extStopWordDictFiles != null) {
|
||||
InputStream is = null;
|
||||
for (String extStopWordDictName : extStopWordDictFiles) {
|
||||
System.out.println("加载扩展停止词典:" + extStopWordDictName);
|
||||
/**
|
||||
* 加载用户配置的自定义扩展词典到主词库表
|
||||
*/
|
||||
public void updateUserDict() {
|
||||
// 加载扩展词典配置
|
||||
InputStream is;
|
||||
// 读取扩展词典文件
|
||||
is = this.getClass().getClassLoader().getResourceAsStream(extStopWordDictName);
|
||||
// 如果找不到扩展的字典,则忽略
|
||||
if (is == null) {
|
||||
continue;
|
||||
System.out.println("更新加载扩展词典:" + PATH_USER_DIC);
|
||||
try {
|
||||
is = new FileInputStream(PATH_USER_DIC);
|
||||
} catch (FileNotFoundException e) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
if (theWord != null && !"".equals(theWord.trim())) {
|
||||
// 加载扩展停止词典数据到内存中
|
||||
_StopWordDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
BufferedReader br =
|
||||
new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
if (theWord != null && !"".equals(theWord.trim())) {
|
||||
// 加载扩展词典数据到主内存词典中
|
||||
System.out.println(theWord);
|
||||
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
} catch (IOException ioe) {
|
||||
System.err.println("Extension Stop word Dictionary loading exception.");
|
||||
ioe.printStackTrace();
|
||||
System.err.println("Extension Dictionary loading exception.");
|
||||
ioe.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
try {
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** 加载量词词典 */
|
||||
private void loadQuantifierDict() {
|
||||
// 建立一个量词典实例
|
||||
_QuantifierDict = new DictSegment((char) 0);
|
||||
// 读取量词词典文件
|
||||
InputStream is =
|
||||
this.getClass().getClassLoader().getResourceAsStream(cfg.getQuantifierDictionary());
|
||||
if (is == null) {
|
||||
throw new RuntimeException("Quantifier Dictionary not found!!!");
|
||||
}
|
||||
try {
|
||||
BufferedReader br =
|
||||
new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
if (theWord != null && !"".equals(theWord.trim())) {
|
||||
_QuantifierDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
} catch (IOException ioe) {
|
||||
System.err.println("Quantifier Dictionary loading exception.");
|
||||
ioe.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** 加载用户配置的自定义扩展词典到主词库表 */
|
||||
public void updateUserDict() {
|
||||
// 加载扩展词典配置
|
||||
InputStream is;
|
||||
// 读取扩展词典文件
|
||||
System.out.println("更新加载扩展词典:" + PATH_USER_DIC);
|
||||
try {
|
||||
is = new FileInputStream(PATH_USER_DIC);
|
||||
} catch (FileNotFoundException e) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
BufferedReader br =
|
||||
new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
if (theWord != null && !"".equals(theWord.trim())) {
|
||||
// 加载扩展词典数据到主内存词典中
|
||||
System.out.println(theWord);
|
||||
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
} catch (IOException ioe) {
|
||||
System.err.println("Extension Dictionary loading exception.");
|
||||
ioe.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,27 +1,25 @@
|
||||
/**
|
||||
*
|
||||
* IK 中文分词 版本 5.0
|
||||
* IK Analyzer release 5.0
|
||||
*
|
||||
* <p>
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* <p>
|
||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
*/
|
||||
package com.rymcu.forest.lucene.dic;
|
||||
|
||||
@ -29,91 +27,91 @@ package com.rymcu.forest.lucene.dic;
|
||||
* 表示一次词典匹配的命中
|
||||
*/
|
||||
public class Hit {
|
||||
// Hit不匹配
|
||||
private static final int UNMATCH = 0x00000000;
|
||||
// Hit完全匹配
|
||||
private static final int MATCH = 0x00000001;
|
||||
// Hit前缀匹配
|
||||
private static final int PREFIX = 0x00000010;
|
||||
// Hit不匹配
|
||||
private static final int UNMATCH = 0x00000000;
|
||||
// Hit完全匹配
|
||||
private static final int MATCH = 0x00000001;
|
||||
// Hit前缀匹配
|
||||
private static final int PREFIX = 0x00000010;
|
||||
|
||||
// 该HIT当前状态,默认未匹配
|
||||
private int hitState = UNMATCH;
|
||||
// 该HIT当前状态,默认未匹配
|
||||
private int hitState = UNMATCH;
|
||||
|
||||
// 记录词典匹配过程中,当前匹配到的词典分支节点
|
||||
private DictSegment matchedDictSegment;
|
||||
/*
|
||||
* 词段开始位置
|
||||
*/
|
||||
private int begin;
|
||||
/*
|
||||
* 词段的结束位置
|
||||
*/
|
||||
private int end;
|
||||
// 记录词典匹配过程中,当前匹配到的词典分支节点
|
||||
private DictSegment matchedDictSegment;
|
||||
/*
|
||||
* 词段开始位置
|
||||
*/
|
||||
private int begin;
|
||||
/*
|
||||
* 词段的结束位置
|
||||
*/
|
||||
private int end;
|
||||
|
||||
/**
|
||||
* 判断是否完全匹配
|
||||
*/
|
||||
public boolean isMatch() {
|
||||
return (this.hitState & MATCH) > 0;
|
||||
}
|
||||
/**
|
||||
* 判断是否完全匹配
|
||||
*/
|
||||
public boolean isMatch() {
|
||||
return (this.hitState & MATCH) > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void setMatch() {
|
||||
this.hitState = this.hitState | MATCH;
|
||||
}
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void setMatch() {
|
||||
this.hitState = this.hitState | MATCH;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否是词的前缀
|
||||
*/
|
||||
public boolean isPrefix() {
|
||||
return (this.hitState & PREFIX) > 0;
|
||||
}
|
||||
/**
|
||||
* 判断是否是词的前缀
|
||||
*/
|
||||
public boolean isPrefix() {
|
||||
return (this.hitState & PREFIX) > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void setPrefix() {
|
||||
this.hitState = this.hitState | PREFIX;
|
||||
}
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void setPrefix() {
|
||||
this.hitState = this.hitState | PREFIX;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否是不匹配
|
||||
*/
|
||||
public boolean isUnmatch() {
|
||||
return this.hitState == UNMATCH;
|
||||
}
|
||||
/**
|
||||
* 判断是否是不匹配
|
||||
*/
|
||||
public boolean isUnmatch() {
|
||||
return this.hitState == UNMATCH;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void setUnmatch() {
|
||||
this.hitState = UNMATCH;
|
||||
}
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void setUnmatch() {
|
||||
this.hitState = UNMATCH;
|
||||
}
|
||||
|
||||
public DictSegment getMatchedDictSegment() {
|
||||
return matchedDictSegment;
|
||||
}
|
||||
public DictSegment getMatchedDictSegment() {
|
||||
return matchedDictSegment;
|
||||
}
|
||||
|
||||
public void setMatchedDictSegment(DictSegment matchedDictSegment) {
|
||||
this.matchedDictSegment = matchedDictSegment;
|
||||
}
|
||||
public void setMatchedDictSegment(DictSegment matchedDictSegment) {
|
||||
this.matchedDictSegment = matchedDictSegment;
|
||||
}
|
||||
|
||||
public int getBegin() {
|
||||
return begin;
|
||||
}
|
||||
public int getBegin() {
|
||||
return begin;
|
||||
}
|
||||
|
||||
public void setBegin(int begin) {
|
||||
this.begin = begin;
|
||||
}
|
||||
public void setBegin(int begin) {
|
||||
this.begin = begin;
|
||||
}
|
||||
|
||||
public int getEnd() {
|
||||
return end;
|
||||
}
|
||||
public int getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
public void setEnd(int end) {
|
||||
this.end = end;
|
||||
}
|
||||
public void setEnd(int end) {
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -19,29 +19,29 @@ import java.util.concurrent.CountDownLatch;
|
||||
*/
|
||||
public class ArticleBeanIndex extends BaseIndex<ArticleLucene> {
|
||||
|
||||
public ArticleBeanIndex(
|
||||
String parentIndexPath,
|
||||
int subIndex,
|
||||
CountDownLatch countDownLatch1,
|
||||
CountDownLatch countDownLatch2,
|
||||
List<ArticleLucene> list) {
|
||||
super(parentIndexPath, subIndex, countDownLatch1, countDownLatch2, list);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void indexDoc(IndexWriter writer, ArticleLucene t) throws Exception {
|
||||
Document doc = new Document();
|
||||
Field id = new Field("id", t.getIdArticle() + "", TextField.TYPE_STORED);
|
||||
Field title = new Field("title", t.getArticleTitle(), TextField.TYPE_STORED);
|
||||
Field summary = new Field("summary", t.getArticleContent(), TextField.TYPE_STORED);
|
||||
// 添加到Document中
|
||||
doc.add(id);
|
||||
doc.add(title);
|
||||
doc.add(summary);
|
||||
if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
|
||||
writer.addDocument(doc);
|
||||
} else {
|
||||
writer.updateDocument(new Term("id", t.getIdArticle() + ""), doc);
|
||||
public ArticleBeanIndex(
|
||||
String parentIndexPath,
|
||||
int subIndex,
|
||||
CountDownLatch countDownLatch1,
|
||||
CountDownLatch countDownLatch2,
|
||||
List<ArticleLucene> list) {
|
||||
super(parentIndexPath, subIndex, countDownLatch1, countDownLatch2, list);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void indexDoc(IndexWriter writer, ArticleLucene t) throws Exception {
|
||||
Document doc = new Document();
|
||||
Field id = new Field("id", t.getIdArticle() + "", TextField.TYPE_STORED);
|
||||
Field title = new Field("title", t.getArticleTitle(), TextField.TYPE_STORED);
|
||||
Field summary = new Field("summary", t.getArticleContent(), TextField.TYPE_STORED);
|
||||
// 添加到Document中
|
||||
doc.add(id);
|
||||
doc.add(title);
|
||||
doc.add(summary);
|
||||
if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
|
||||
writer.addDocument(doc);
|
||||
} else {
|
||||
writer.updateDocument(new Term("id", t.getIdArticle() + ""), doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -16,125 +16,136 @@ import java.util.concurrent.CountDownLatch;
|
||||
* @date 2021/2/2 14:14
|
||||
*/
|
||||
public abstract class BaseIndex<T> implements Runnable {
|
||||
/** 父级索引路径 */
|
||||
private String parentIndexPath;
|
||||
/** 索引编写器 */
|
||||
private IndexWriter writer;
|
||||
/**
|
||||
* 父级索引路径
|
||||
*/
|
||||
private String parentIndexPath;
|
||||
/**
|
||||
* 索引编写器
|
||||
*/
|
||||
private IndexWriter writer;
|
||||
|
||||
private int subIndex;
|
||||
/** 主线程 */
|
||||
private final CountDownLatch countDownLatch1;
|
||||
/** 工作线程 */
|
||||
private final CountDownLatch countDownLatch2;
|
||||
/** 对象列表 */
|
||||
private List<T> list;
|
||||
private int subIndex;
|
||||
/**
|
||||
* 主线程
|
||||
*/
|
||||
private final CountDownLatch countDownLatch1;
|
||||
/**
|
||||
* 工作线程
|
||||
*/
|
||||
private final CountDownLatch countDownLatch2;
|
||||
/**
|
||||
* 对象列表
|
||||
*/
|
||||
private List<T> list;
|
||||
|
||||
public BaseIndex(String parentIndexPath, int subIndex) {
|
||||
this.parentIndexPath = parentIndexPath;
|
||||
this.subIndex = subIndex;
|
||||
try {
|
||||
this.writer = IndexUtil.getIndexWriter(parentIndexPath + "/index" + subIndex, true);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
this.countDownLatch1 = null;
|
||||
this.countDownLatch2 = null;
|
||||
}
|
||||
|
||||
public BaseIndex(
|
||||
IndexWriter writer,
|
||||
CountDownLatch countDownLatch1,
|
||||
CountDownLatch countDownLatch2,
|
||||
List<T> list) {
|
||||
super();
|
||||
this.writer = writer;
|
||||
this.countDownLatch1 = countDownLatch1;
|
||||
this.countDownLatch2 = countDownLatch2;
|
||||
this.list = list;
|
||||
}
|
||||
|
||||
public BaseIndex(
|
||||
String parentIndexPath,
|
||||
int subIndex,
|
||||
CountDownLatch countDownLatch1,
|
||||
CountDownLatch countDownLatch2,
|
||||
List<T> list) {
|
||||
super();
|
||||
this.parentIndexPath = parentIndexPath;
|
||||
this.subIndex = subIndex;
|
||||
try {
|
||||
// 多目录索引创建
|
||||
File file = new File(parentIndexPath + "/index" + subIndex);
|
||||
if (!file.exists()) {
|
||||
file.mkdir();
|
||||
}
|
||||
this.writer = IndexUtil.getIndexWriter(parentIndexPath + "/index" + subIndex, true);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
public BaseIndex(String parentIndexPath, int subIndex) {
|
||||
this.parentIndexPath = parentIndexPath;
|
||||
this.subIndex = subIndex;
|
||||
try {
|
||||
this.writer = IndexUtil.getIndexWriter(parentIndexPath + "/index" + subIndex, true);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
this.countDownLatch1 = null;
|
||||
this.countDownLatch2 = null;
|
||||
}
|
||||
|
||||
this.subIndex = subIndex;
|
||||
this.countDownLatch1 = countDownLatch1;
|
||||
this.countDownLatch2 = countDownLatch2;
|
||||
this.list = list;
|
||||
}
|
||||
|
||||
public BaseIndex(
|
||||
String path, CountDownLatch countDownLatch1, CountDownLatch countDownLatch2, List<T> list) {
|
||||
super();
|
||||
try {
|
||||
// 单目录索引创建
|
||||
File file = new File(path);
|
||||
if (!file.exists()) {
|
||||
file.mkdir();
|
||||
}
|
||||
this.writer = IndexUtil.getIndexWriter(path, true);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
public BaseIndex(
|
||||
IndexWriter writer,
|
||||
CountDownLatch countDownLatch1,
|
||||
CountDownLatch countDownLatch2,
|
||||
List<T> list) {
|
||||
super();
|
||||
this.writer = writer;
|
||||
this.countDownLatch1 = countDownLatch1;
|
||||
this.countDownLatch2 = countDownLatch2;
|
||||
this.list = list;
|
||||
}
|
||||
;
|
||||
this.countDownLatch1 = countDownLatch1;
|
||||
this.countDownLatch2 = countDownLatch2;
|
||||
this.list = list;
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建索引
|
||||
*
|
||||
* @param writer
|
||||
* @throws IOException
|
||||
* @throws ParseException
|
||||
*/
|
||||
public abstract void indexDoc(IndexWriter writer, T t) throws Exception;
|
||||
/**
|
||||
* 批量索引创建
|
||||
*
|
||||
* @param writer
|
||||
* @param t
|
||||
* @throws Exception
|
||||
*/
|
||||
public void indexDocs(IndexWriter writer, List<T> t) throws Exception {
|
||||
for (T t2 : t) {
|
||||
indexDoc(writer, t2);
|
||||
}
|
||||
}
|
||||
public BaseIndex(
|
||||
String parentIndexPath,
|
||||
int subIndex,
|
||||
CountDownLatch countDownLatch1,
|
||||
CountDownLatch countDownLatch2,
|
||||
List<T> list) {
|
||||
super();
|
||||
this.parentIndexPath = parentIndexPath;
|
||||
this.subIndex = subIndex;
|
||||
try {
|
||||
// 多目录索引创建
|
||||
File file = new File(parentIndexPath + "/index" + subIndex);
|
||||
if (!file.exists()) {
|
||||
file.mkdir();
|
||||
}
|
||||
this.writer = IndexUtil.getIndexWriter(parentIndexPath + "/index" + subIndex, true);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
countDownLatch1.await();
|
||||
System.out.println(writer);
|
||||
indexDocs(writer, list);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
countDownLatch2.countDown();
|
||||
try {
|
||||
writer.commit();
|
||||
writer.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
this.subIndex = subIndex;
|
||||
this.countDownLatch1 = countDownLatch1;
|
||||
this.countDownLatch2 = countDownLatch2;
|
||||
this.list = list;
|
||||
}
|
||||
|
||||
public BaseIndex(
|
||||
String path, CountDownLatch countDownLatch1, CountDownLatch countDownLatch2, List<T> list) {
|
||||
super();
|
||||
try {
|
||||
// 单目录索引创建
|
||||
File file = new File(path);
|
||||
if (!file.exists()) {
|
||||
file.mkdir();
|
||||
}
|
||||
this.writer = IndexUtil.getIndexWriter(path, true);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
;
|
||||
this.countDownLatch1 = countDownLatch1;
|
||||
this.countDownLatch2 = countDownLatch2;
|
||||
this.list = list;
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建索引
|
||||
*
|
||||
* @param writer
|
||||
* @throws IOException
|
||||
* @throws ParseException
|
||||
*/
|
||||
public abstract void indexDoc(IndexWriter writer, T t) throws Exception;
|
||||
|
||||
/**
|
||||
* 批量索引创建
|
||||
*
|
||||
* @param writer
|
||||
* @param t
|
||||
* @throws Exception
|
||||
*/
|
||||
public void indexDocs(IndexWriter writer, List<T> t) throws Exception {
|
||||
for (T t2 : t) {
|
||||
indexDoc(writer, t2);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
countDownLatch1.await();
|
||||
System.out.println(writer);
|
||||
indexDocs(writer, list);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
countDownLatch2.countDown();
|
||||
try {
|
||||
writer.commit();
|
||||
writer.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,26 +1,25 @@
|
||||
/**
|
||||
* IK 中文分词 版本 5.0.1
|
||||
* IK Analyzer release 5.0.1
|
||||
*
|
||||
* <p>
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* <p>
|
||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
*/
|
||||
package com.rymcu.forest.lucene.lucene;
|
||||
|
||||
@ -32,44 +31,44 @@ import org.apache.lucene.analysis.Tokenizer;
|
||||
*/
|
||||
public final class IKAnalyzer extends Analyzer {
|
||||
|
||||
private boolean useSmart;
|
||||
private boolean useSmart;
|
||||
|
||||
public boolean useSmart() {
|
||||
return useSmart;
|
||||
}
|
||||
public boolean useSmart() {
|
||||
return useSmart;
|
||||
}
|
||||
|
||||
public void setUseSmart(boolean useSmart) {
|
||||
this.useSmart = useSmart;
|
||||
}
|
||||
public void setUseSmart(boolean useSmart) {
|
||||
this.useSmart = useSmart;
|
||||
}
|
||||
|
||||
/**
|
||||
* IK分词器Lucene Analyzer接口实现类
|
||||
*
|
||||
* 默认细粒度切分算法
|
||||
*/
|
||||
public IKAnalyzer() {
|
||||
this(false);
|
||||
}
|
||||
/**
|
||||
* IK分词器Lucene Analyzer接口实现类
|
||||
*
|
||||
* 默认细粒度切分算法
|
||||
*/
|
||||
public IKAnalyzer() {
|
||||
this(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* IK分词器Lucene Analyzer接口实现类
|
||||
*
|
||||
* @param useSmart 当为true时,分词器进行智能切分
|
||||
*/
|
||||
public IKAnalyzer(boolean useSmart) {
|
||||
super();
|
||||
this.useSmart = useSmart;
|
||||
}
|
||||
/**
|
||||
* IK分词器Lucene Analyzer接口实现类
|
||||
*
|
||||
* @param useSmart 当为true时,分词器进行智能切分
|
||||
*/
|
||||
public IKAnalyzer(boolean useSmart) {
|
||||
super();
|
||||
this.useSmart = useSmart;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* lucene 6.0
|
||||
* 重载Analyzer接口,构造分词组件
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer IKTokenizer = new IKTokenizer(this.useSmart());
|
||||
return new Analyzer.TokenStreamComponents(IKTokenizer);
|
||||
}
|
||||
/**
|
||||
* lucene 6.0
|
||||
* 重载Analyzer接口,构造分词组件
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer IKTokenizer = new IKTokenizer(this.useSmart());
|
||||
return new Analyzer.TokenStreamComponents(IKTokenizer);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -55,6 +55,7 @@ public final class IKTokenizer extends Tokenizer {
|
||||
|
||||
/**
|
||||
* Lucene 6.0 Tokenizer适配器类构造函数
|
||||
*
|
||||
* @param useSmart
|
||||
*/
|
||||
public IKTokenizer(boolean useSmart) {
|
||||
@ -70,6 +71,7 @@ public final class IKTokenizer extends Tokenizer {
|
||||
/**
|
||||
* lucene 6.0 新增
|
||||
* 方便创建 工厂类
|
||||
*
|
||||
* @param factory
|
||||
* @param useSmart
|
||||
*/
|
||||
|
@ -19,29 +19,29 @@ import java.util.concurrent.CountDownLatch;
|
||||
*/
|
||||
public class PortfolioBeanIndex extends BaseIndex<PortfolioLucene> {
|
||||
|
||||
public PortfolioBeanIndex(
|
||||
String parentIndexPath,
|
||||
int subIndex,
|
||||
CountDownLatch countDownLatch1,
|
||||
CountDownLatch countDownLatch2,
|
||||
List<PortfolioLucene> list) {
|
||||
super(parentIndexPath, subIndex, countDownLatch1, countDownLatch2, list);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void indexDoc(IndexWriter writer, PortfolioLucene user) throws Exception {
|
||||
Document doc = new Document();
|
||||
Field id = new Field("id", user.getIdPortfolio() + "", TextField.TYPE_STORED);
|
||||
Field title = new Field("title", user.getPortfolioTitle(), TextField.TYPE_STORED);
|
||||
Field summary = new Field("summary", user.getPortfolioDescription(), TextField.TYPE_STORED);
|
||||
// 添加到Document中
|
||||
doc.add(id);
|
||||
doc.add(title);
|
||||
doc.add(summary);
|
||||
if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
|
||||
writer.addDocument(doc);
|
||||
} else {
|
||||
writer.updateDocument(new Term("id", user.getIdPortfolio() + ""), doc);
|
||||
public PortfolioBeanIndex(
|
||||
String parentIndexPath,
|
||||
int subIndex,
|
||||
CountDownLatch countDownLatch1,
|
||||
CountDownLatch countDownLatch2,
|
||||
List<PortfolioLucene> list) {
|
||||
super(parentIndexPath, subIndex, countDownLatch1, countDownLatch2, list);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void indexDoc(IndexWriter writer, PortfolioLucene user) throws Exception {
|
||||
Document doc = new Document();
|
||||
Field id = new Field("id", user.getIdPortfolio() + "", TextField.TYPE_STORED);
|
||||
Field title = new Field("title", user.getPortfolioTitle(), TextField.TYPE_STORED);
|
||||
Field summary = new Field("summary", user.getPortfolioDescription(), TextField.TYPE_STORED);
|
||||
// 添加到Document中
|
||||
doc.add(id);
|
||||
doc.add(title);
|
||||
doc.add(summary);
|
||||
if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
|
||||
writer.addDocument(doc);
|
||||
} else {
|
||||
writer.updateDocument(new Term("id", user.getIdPortfolio() + ""), doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -16,28 +16,28 @@ import java.util.List;
|
||||
@Mapper
|
||||
public interface ArticleLuceneMapper {
|
||||
|
||||
/**
|
||||
* 加载所有文章内容
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
List<ArticleLucene> getAllArticleLucene();
|
||||
/**
|
||||
* 加载所有文章内容
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
List<ArticleLucene> getAllArticleLucene();
|
||||
|
||||
/**
|
||||
* 加载所有文章内容
|
||||
*
|
||||
* @param ids 文章id(半角逗号分隔)
|
||||
* @return
|
||||
*/
|
||||
List<ArticleDTO> getArticlesByIds(@Param("ids") Long[] ids);
|
||||
/**
|
||||
* 加载所有文章内容
|
||||
*
|
||||
* @param ids 文章id(半角逗号分隔)
|
||||
* @return
|
||||
*/
|
||||
List<ArticleDTO> getArticlesByIds(@Param("ids") Long[] ids);
|
||||
|
||||
|
||||
/**
|
||||
* 加载文章内容
|
||||
*
|
||||
* @param id 文章id
|
||||
* @return
|
||||
*/
|
||||
ArticleLucene getById(@Param("id") Long id);
|
||||
/**
|
||||
* 加载文章内容
|
||||
*
|
||||
* @param id 文章id
|
||||
* @return
|
||||
*/
|
||||
ArticleLucene getById(@Param("id") Long id);
|
||||
|
||||
}
|
||||
|
@ -16,26 +16,26 @@ import java.util.List;
|
||||
@Mapper
|
||||
public interface PortfolioLuceneMapper {
|
||||
|
||||
/**
|
||||
* 加载所有作品集信息
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
List<PortfolioLucene> getAllPortfolioLucene();
|
||||
/**
|
||||
* 加载所有作品集信息
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
List<PortfolioLucene> getAllPortfolioLucene();
|
||||
|
||||
/**
|
||||
* 加载所有作品集信息
|
||||
*
|
||||
* @param ids 作品集id(半角逗号分隔)
|
||||
* @return
|
||||
*/
|
||||
List<PortfolioDTO> getPortfoliosByIds(@Param("ids") Long[] ids);
|
||||
/**
|
||||
* 加载所有作品集信息
|
||||
*
|
||||
* @param ids 作品集id(半角逗号分隔)
|
||||
* @return
|
||||
*/
|
||||
List<PortfolioDTO> getPortfoliosByIds(@Param("ids") Long[] ids);
|
||||
|
||||
/**
|
||||
* 加载作品集
|
||||
*
|
||||
* @param id 用户id
|
||||
* @return
|
||||
*/
|
||||
PortfolioLucene getById(@Param("id") Long id);
|
||||
/**
|
||||
* 加载作品集
|
||||
*
|
||||
* @param id 用户id
|
||||
* @return
|
||||
*/
|
||||
PortfolioLucene getById(@Param("id") Long id);
|
||||
}
|
||||
|
@ -15,39 +15,39 @@ import java.util.List;
|
||||
@Mapper
|
||||
public interface UserDicMapper {
|
||||
|
||||
/**
|
||||
* 加载所有字典
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
List<String> getAllDic();
|
||||
/**
|
||||
* 加载所有字典
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
List<String> getAllDic();
|
||||
|
||||
/**
|
||||
* 加载所有字典信息
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
List<UserDic> getAll();
|
||||
/**
|
||||
* 加载所有字典信息
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
List<UserDic> getAll();
|
||||
|
||||
/**
|
||||
* 增加字典
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
void addDic(@Param("dic") String userDic);
|
||||
/**
|
||||
* 增加字典
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
void addDic(@Param("dic") String userDic);
|
||||
|
||||
/**
|
||||
* 删除字典
|
||||
*
|
||||
* @param id
|
||||
*/
|
||||
void deleteDic(@Param("id") String id);
|
||||
/**
|
||||
* 删除字典
|
||||
*
|
||||
* @param id
|
||||
*/
|
||||
void deleteDic(@Param("id") String id);
|
||||
|
||||
/**
|
||||
* 更新字典
|
||||
*
|
||||
* @param id
|
||||
* @param userDic
|
||||
*/
|
||||
void updateDic(@Param("id") Integer id, @Param("dic") String userDic);
|
||||
/**
|
||||
* 更新字典
|
||||
*
|
||||
* @param id
|
||||
* @param userDic
|
||||
*/
|
||||
void updateDic(@Param("id") Integer id, @Param("dic") String userDic);
|
||||
}
|
||||
|
@ -16,26 +16,26 @@ import java.util.List;
|
||||
@Mapper
|
||||
public interface UserLuceneMapper {
|
||||
|
||||
/**
|
||||
* 加载所有用户信息
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
List<UserLucene> getAllUserLucene();
|
||||
/**
|
||||
* 加载所有用户信息
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
List<UserLucene> getAllUserLucene();
|
||||
|
||||
/**
|
||||
* 加载所有用户信息
|
||||
*
|
||||
* @param ids 用户id(半角逗号分隔)
|
||||
* @return
|
||||
*/
|
||||
List<UserDTO> getUsersByIds(@Param("ids") Long[] ids);
|
||||
/**
|
||||
* 加载所有用户信息
|
||||
*
|
||||
* @param ids 用户id(半角逗号分隔)
|
||||
* @return
|
||||
*/
|
||||
List<UserDTO> getUsersByIds(@Param("ids") Long[] ids);
|
||||
|
||||
/**
|
||||
* 加载 UserLucene
|
||||
*
|
||||
* @param id 用户id
|
||||
* @return
|
||||
*/
|
||||
UserLucene getById(@Param("id") String id);
|
||||
/**
|
||||
* 加载 UserLucene
|
||||
*
|
||||
* @param id 用户id
|
||||
* @return
|
||||
*/
|
||||
UserLucene getById(@Param("id") String id);
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.lucene.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -18,16 +18,23 @@ import lombok.NoArgsConstructor;
|
||||
@AllArgsConstructor
|
||||
public class ArticleLucene {
|
||||
|
||||
/** 文章编号 */
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idArticle;
|
||||
/**
|
||||
* 文章编号
|
||||
*/
|
||||
private Long idArticle;
|
||||
|
||||
/** 文章标题 */
|
||||
private String articleTitle;
|
||||
/**
|
||||
* 文章标题
|
||||
*/
|
||||
private String articleTitle;
|
||||
|
||||
/** 文章内容 */
|
||||
private String articleContent;
|
||||
/**
|
||||
* 文章内容
|
||||
*/
|
||||
private String articleContent;
|
||||
|
||||
/** 相关度评分 */
|
||||
private String score;
|
||||
/**
|
||||
* 相关度评分
|
||||
*/
|
||||
private String score;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.lucene.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -18,16 +18,23 @@ import lombok.NoArgsConstructor;
|
||||
@AllArgsConstructor
|
||||
public class PortfolioLucene {
|
||||
|
||||
/** 作品集编号 */
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idPortfolio;
|
||||
/**
|
||||
* 作品集编号
|
||||
*/
|
||||
private Long idPortfolio;
|
||||
|
||||
/** 作品集名称 */
|
||||
private String portfolioTitle;
|
||||
/**
|
||||
* 作品集名称
|
||||
*/
|
||||
private String portfolioTitle;
|
||||
|
||||
/** 作品集介绍 */
|
||||
private String portfolioDescription;
|
||||
/**
|
||||
* 作品集介绍
|
||||
*/
|
||||
private String portfolioDescription;
|
||||
|
||||
/** 相关度评分 */
|
||||
private String score;
|
||||
/**
|
||||
* 相关度评分
|
||||
*/
|
||||
private String score;
|
||||
}
|
||||
|
@ -15,11 +15,15 @@ import javax.persistence.Table;
|
||||
@Data
|
||||
@Table(name = "forest_lucene_user_dic")
|
||||
public class UserDic {
|
||||
/** 主键 */
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
private Integer id;
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@Id
|
||||
@GeneratedValue(generator = "JDBC")
|
||||
private Integer id;
|
||||
|
||||
/** 字典 */
|
||||
private String dic;
|
||||
/**
|
||||
* 字典
|
||||
*/
|
||||
private String dic;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.rymcu.forest.lucene.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -18,16 +18,23 @@ import lombok.NoArgsConstructor;
|
||||
@AllArgsConstructor
|
||||
public class UserLucene {
|
||||
|
||||
/** 用户编号 */
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING)
|
||||
private Long idUser;
|
||||
/**
|
||||
* 用户编号
|
||||
*/
|
||||
private Long idUser;
|
||||
|
||||
/** 昵称 */
|
||||
private String nickname;
|
||||
/**
|
||||
* 昵称
|
||||
*/
|
||||
private String nickname;
|
||||
|
||||
/** 签名 */
|
||||
private String signature;
|
||||
/**
|
||||
* 签名
|
||||
*/
|
||||
private String signature;
|
||||
|
||||
/** 相关度评分 */
|
||||
private String score;
|
||||
/**
|
||||
* 相关度评分
|
||||
*/
|
||||
private String score;
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user