|
|
@@ -3,24 +3,14 @@ import ch.qos.logback.classic.Logger;
|
|
|
import ch.qos.logback.classic.LoggerContext;
|
|
|
import cn.reghao.jutil.tool.id.SnowFlake;
|
|
|
import cn.reghao.tnb.content.api.constant.PostScope;
|
|
|
-import cn.reghao.tnb.search.api.dto.VideoSummary;
|
|
|
import cn.reghao.tnb.search.app.SearchApplication;
|
|
|
import cn.reghao.tnb.search.app.es.*;
|
|
|
import cn.reghao.tnb.search.app.lucene.LuceneDocument;
|
|
|
import cn.reghao.tnb.search.app.lucene.LuceneIndex;
|
|
|
import cn.reghao.tnb.search.app.lucene.LuceneQuery;
|
|
|
import cn.reghao.tnb.search.app.model.po.VideoText;
|
|
|
-import cn.reghao.tnb.search.app.model.po.Wenshu;
|
|
|
-import cn.reghao.tnb.search.app.util.ClassUtil;
|
|
|
-import co.elastic.clients.elasticsearch.ElasticsearchClient;
|
|
|
-import co.elastic.clients.elasticsearch._types.FieldValue;
|
|
|
import co.elastic.clients.elasticsearch._types.mapping.Property;
|
|
|
-import co.elastic.clients.elasticsearch._types.query_dsl.BoolQuery;
|
|
|
-import co.elastic.clients.elasticsearch._types.query_dsl.Query;
|
|
|
-import co.elastic.clients.elasticsearch._types.query_dsl.QueryStringQuery;
|
|
|
import co.elastic.clients.elasticsearch.indices.AnalyzeRequest;
|
|
|
-import co.elastic.clients.elasticsearch.indices.AnalyzeResponse;
|
|
|
-import co.elastic.clients.elasticsearch.indices.analyze.AnalyzeToken;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.junit.jupiter.api.Test;
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
@@ -30,11 +20,7 @@ import org.springframework.data.domain.Page;
|
|
|
import org.springframework.data.domain.PageRequest;
|
|
|
import org.springframework.test.context.ActiveProfiles;
|
|
|
|
|
|
-import java.io.File;
|
|
|
-import java.io.FileInputStream;
|
|
|
import java.io.IOException;
|
|
|
-import java.nio.ByteBuffer;
|
|
|
-import java.nio.channels.FileChannel;
|
|
|
import java.util.*;
|
|
|
|
|
|
/**
|
|
|
@@ -45,6 +31,12 @@ import java.util.*;
|
|
|
@ActiveProfiles("dev")
|
|
|
@SpringBootTest(classes = SearchApplication.class)
|
|
|
public class SearchTest {
|
|
|
+ void setLogLevel() {
|
|
|
+ LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
|
|
|
+ Logger rootLogger = loggerContext.getLogger("ROOT");
|
|
|
+ rootLogger.setLevel(Level.INFO);
|
|
|
+ }
|
|
|
+
|
|
|
SnowFlake idGenerator = new SnowFlake(1, 1);
|
|
|
@Autowired
|
|
|
ElasticService elasticService;
|
|
|
@@ -54,58 +46,6 @@ public class SearchTest {
|
|
|
MappingService mappingService;
|
|
|
@Autowired
|
|
|
DocumentService documentService;
|
|
|
- @Autowired
|
|
|
- QueryService<Wenshu> queryService;
|
|
|
- String index = Wenshu.class.getSimpleName().toLowerCase(Locale.ROOT);
|
|
|
-
|
|
|
- void readByFileChannel(String filePath, DocumentService documentService) {
|
|
|
- List<Wenshu> list = new ArrayList<>();
|
|
|
- File file = new File(filePath);
|
|
|
- try {
|
|
|
- FileInputStream fis = new FileInputStream(file);
|
|
|
- FileChannel fileChannel = fis.getChannel();
|
|
|
-
|
|
|
- int total = 0;
|
|
|
- // 10MB
|
|
|
- int capacity = 10*1024*1024;
|
|
|
- ByteBuffer byteBuffer = ByteBuffer.allocate(capacity);
|
|
|
- StringBuffer buffer = new StringBuffer();
|
|
|
- while(fileChannel.read(byteBuffer) != -1) {
|
|
|
- //读取后,将位置置为0,将limit置为容量, 以备下次读入到字节缓冲中,从0开始存储
|
|
|
- byteBuffer.clear();
|
|
|
- byte[] bytes = byteBuffer.array();
|
|
|
-
|
|
|
- String str = new String(bytes);
|
|
|
- buffer.append(str);
|
|
|
- String[] strArray = buffer.toString().split(System.lineSeparator());
|
|
|
- for (int i = 0; i < strArray.length-1; i++) {
|
|
|
- String line = strArray[i];
|
|
|
- Wenshu wenshu = parseLine(line);
|
|
|
- if (wenshu != null) {
|
|
|
- list.add(wenshu);
|
|
|
- } else {
|
|
|
- log.error("error parse line: {}", ++total);
|
|
|
- }
|
|
|
-
|
|
|
- if (list.size() > 10_000) {
|
|
|
- documentService.batchAddDocument1(index, list);
|
|
|
- log.info("add {} documents to es", list.size());
|
|
|
- list.clear();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- String lastLine = strArray[strArray.length-1];
|
|
|
- if (!lastLine.endsWith("}")) {
|
|
|
- buffer = new StringBuffer();
|
|
|
- buffer.append(strArray[strArray.length-1]);
|
|
|
- }
|
|
|
- }
|
|
|
- } catch (IOException e) {
|
|
|
- e.printStackTrace();
|
|
|
- } finally {
|
|
|
- // TODO close 处理
|
|
|
- }
|
|
|
- }
|
|
|
|
|
|
@Test
|
|
|
public void indexTest() throws IOException {
|
|
|
@@ -123,103 +63,6 @@ public class SearchTest {
|
|
|
videoTextDocument.deleteAllDocument();
|
|
|
}
|
|
|
|
|
|
- void setLogLevel() {
|
|
|
- LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
|
|
|
- Logger rootLogger = loggerContext.getLogger("ROOT");
|
|
|
- rootLogger.setLevel(Level.INFO);
|
|
|
- }
|
|
|
-
|
|
|
- public void addDocTest() {
|
|
|
- setLogLevel();
|
|
|
- documentService.deleteAllDocument(index);
|
|
|
- String filePath = "/home/reghao/Downloads/2021年01月裁判文书数据.csv";
|
|
|
- readByFileChannel(filePath, documentService);
|
|
|
- }
|
|
|
-
|
|
|
- private Wenshu parseLine(String line) {
|
|
|
- String[] arr = line.split(",");
|
|
|
- try {
|
|
|
- List<String> fields = new ArrayList<>();
|
|
|
- String id = idGenerator.nextId()+"";
|
|
|
- fields.add(id);
|
|
|
-
|
|
|
- String originalUrl = arr[0];
|
|
|
- String caseId = arr[1];
|
|
|
- String caseName = arr[2];
|
|
|
- String court = arr[3];
|
|
|
- String region = arr[4];
|
|
|
- String caseType = arr[5];
|
|
|
- String caseTypeId = arr[6];
|
|
|
- fields.addAll(Arrays.asList(arr).subList(0, 7));
|
|
|
- String procedure = arr[8];
|
|
|
- fields.add(procedure);
|
|
|
- String judgmentDate = arr[9];
|
|
|
- fields.add(judgmentDate);
|
|
|
- String publicDate = arr[10];
|
|
|
- fields.add(publicDate);
|
|
|
- String parties = arr[11];
|
|
|
- fields.add(parties);
|
|
|
-
|
|
|
- String cause = "";
|
|
|
- String legalBasis = "";
|
|
|
- String fullText = "";
|
|
|
- if (arr.length == 13) {
|
|
|
- cause = arr[12];
|
|
|
- } else if (arr.length == 14) {
|
|
|
- cause = arr[12];
|
|
|
- legalBasis = arr[13];
|
|
|
- } else if (arr.length == 15) {
|
|
|
- cause = arr[12];
|
|
|
- legalBasis = arr[13];
|
|
|
- fullText = arr[14];
|
|
|
- } else if (arr.length > 15) {
|
|
|
- cause = arr[12];
|
|
|
- legalBasis = arr[13];
|
|
|
- String str = line.split(legalBasis)[1];
|
|
|
- fullText = str.substring(1);
|
|
|
- }
|
|
|
-
|
|
|
- fields.add(cause);
|
|
|
- fields.add(legalBasis);
|
|
|
- fields.add(fullText);
|
|
|
-
|
|
|
- Object object = ClassUtil.getObject(Wenshu.class, fields.toArray(new String[0]));
|
|
|
- if (object instanceof Wenshu) {
|
|
|
- return (Wenshu) object;
|
|
|
- }
|
|
|
- } catch (Exception e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
-
|
|
|
- return null;
|
|
|
- }
|
|
|
-
|
|
|
- @Test
|
|
|
- public void queryTest() {
|
|
|
- setLogLevel();
|
|
|
-
|
|
|
- int pn = 1;
|
|
|
- int ps = 10;
|
|
|
- String queryString = "拐卖";
|
|
|
- /*List<Wenshu> page = queryService.queryWithHighlight(index, queryString, pn, ps, Wenshu.class);
|
|
|
- while (!page.isEmpty()) {
|
|
|
- pn++;
|
|
|
- page = queryService.queryWithHighlight(index, queryString, pn, ps, Wenshu.class);
|
|
|
- }*/
|
|
|
- Page<Wenshu> page = queryService.queryWithHighlight(index, queryString, pn, ps, Wenshu.class);
|
|
|
- long total = page.getTotalElements();
|
|
|
- int totalPages = page.getTotalPages();
|
|
|
- while (pn <= totalPages) {
|
|
|
- page = queryService.queryWithHighlight(index, queryString, pn, ps, Wenshu.class);
|
|
|
- List<Wenshu> list = page.getContent();
|
|
|
- Wenshu wenshu = list.get(0);
|
|
|
- String id = wenshu.getId();
|
|
|
- String caseName = wenshu.getCaseName();
|
|
|
- System.out.println(caseName);
|
|
|
- pn++;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
@Autowired
|
|
|
LuceneQuery luceneQuery;
|
|
|
@Autowired
|