Parcourir la source

update search-service

reghao il y a 6 mois
Parent
commit
93088ea20e

+ 17 - 0
search/search-service/src/main/java/cn/reghao/tnb/search/app/log/db/AccessLogMongo.java

@@ -6,6 +6,7 @@ import cn.reghao.tnb.search.app.log.model.po.AccessLog;
 import com.mongodb.MongoBulkWriteException;
 import com.mongodb.client.MongoCursor;
 import com.mongodb.client.model.InsertManyOptions;
+import com.mongodb.client.result.DeleteResult;
 import com.mongodb.client.result.InsertManyResult;
 import org.bson.Document;
 import org.springframework.data.domain.Sort;
@@ -68,6 +69,15 @@ public class AccessLogMongo implements BaseCrud<AccessLog>, BaseQuery<AccessLog>
 
     @Override
     public void delete(AccessLog accessLog) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("requestId").is(accessLog.getRequestId()));
+        DeleteResult deleteResult = mongoTemplate.remove(query, colName);
+    }
+
+    public void deleteByBefore(long before) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("requestTime").lt(before));
+        DeleteResult deleteResult = mongoTemplate.remove(query, colName);
     }
 
     public MongoCursor<Document> getCursor(Map<String, Object> map) {
@@ -83,6 +93,13 @@ public class AccessLogMongo implements BaseCrud<AccessLog>, BaseQuery<AccessLog>
         return mongoTemplate.find(query, AccessLog.class, colName);
     }
 
+    public List<AccessLog> findAll() {
+        Query query = new Query();
+        query.limit(pageSize);
+        query.with(Sort.by(new Sort.Order(Sort.Direction.DESC, "requestTime")));
+        return mongoTemplate.find(query, AccessLog.class, colName);
+    }
+
     public List<AccessLog> findByTargetServer(String targetServer) {
         Query query = new Query();
         query.addCriteria(Criteria.where("targetServer").is(targetServer));

+ 11 - 10
search/search-service/src/main/java/cn/reghao/tnb/search/app/service/WenshuService.java

@@ -43,24 +43,18 @@ public class WenshuService {
         int bufSize = 1024*1024*10;
         BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(filePath)), bufSize);
 
-        int count = 0;
         List<Wenshu> list = new ArrayList<>();
         // 忽略 CSV 文件的第一行
         String line = in.readLine();
         while((line = in.readLine()) != null) {
             Wenshu wenshu = parseLineByWenshu(line);
             if (wenshu != null) {
-                list.add(wenshu);
+                successCount++;
+                /*list.add(wenshu);
                 if (list.size() > 10_000) {
-                    //documentService.batchAddWenshu(indexName, list);
                     addLuceneIndex(list);
                     list.clear();
-                    count++;
-                }
-            }
-
-            if (count > 10) {
-                break;
+                }*/
             }
         }
         in.close();
@@ -95,6 +89,8 @@ public class WenshuService {
         return null;
     }
 
+    int successCount = 0;
+    int failCount = 0;
     int errorCount = 0;
     String mark = "\"";
     private Wenshu parseLineByWenshu(String line) {
@@ -178,7 +174,7 @@ public class WenshuService {
             Object object = ClassUtil.getObject(Wenshu.class, fields.toArray(new String[0]));
             return (Wenshu) object;
         } catch (Exception e) {
-            log.error("error line -> {}", ++errorCount);
+            log.error("parse line failed -> {}", ++failCount);
             // ignore
         }
         return null;
@@ -204,9 +200,14 @@ public class WenshuService {
             log.info("index {} documents...", wenshuList.size());
         } catch (Exception e) {
             e.printStackTrace();
+            errorCount++;
         }
     }
 
+    public void printResult() {
+        System.out.printf("success: %s, fail: %s, error: %s", successCount, failCount, errorCount);
+    }
+
     public void deleteAll() throws IOException {
         wenshuDocRepository.deleteAll();
         luceneIndex.deleteAll(indexName);

+ 1 - 23
search/search-service/src/test/java/SearchTest.java → search/search-service/src/test/java/ElasticTest.java

@@ -5,9 +5,6 @@ import cn.reghao.jutil.tool.id.SnowFlake;
 import cn.reghao.tnb.content.api.constant.PostScope;
 import cn.reghao.tnb.search.app.SearchApplication;
 import cn.reghao.tnb.search.app.es.*;
-import cn.reghao.tnb.search.app.lucene.LuceneDocument;
-import cn.reghao.tnb.search.app.lucene.LuceneIndex;
-import cn.reghao.tnb.search.app.lucene.LuceneSearch;
 import cn.reghao.tnb.search.app.model.po.VideoText;
 import co.elastic.clients.elasticsearch._types.mapping.Property;
 import co.elastic.clients.elasticsearch.indices.AnalyzeRequest;
@@ -30,7 +27,7 @@ import java.util.*;
 @Slf4j
 @ActiveProfiles("dev")
 @SpringBootTest(classes = SearchApplication.class)
-public class SearchTest {
+public class ElasticTest {
     void setLogLevel() {
         LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
         Logger rootLogger = loggerContext.getLogger("ROOT");
@@ -63,12 +60,6 @@ public class SearchTest {
         videoTextDocument.deleteAllDocument();
     }
 
-    @Autowired
-    LuceneSearch luceneSearch;
-    @Autowired
-    LuceneIndex luceneIndex;
-    @Autowired
-    LuceneDocument luceneDocument;
     @Autowired
     SearchService searchService;
     @Test
@@ -77,19 +68,6 @@ public class SearchTest {
         int ps = 12;
         PageRequest pageRequest = PageRequest.of(pn-1, ps);
         String videoId = "ao1n8ggYOg";
-        /*Document document = luceneIndex.getDocument(videoId);
-        VideoSummary videoSummary = (VideoSummary) luceneDocument.getObject(VideoSummary.class, document);
-
-        String title = "哈哈哈哈哈5哈哈哈哈4哈1";
-        videoSummary.setTitle(title);
-        videoSummary.setId("");
-        videoSummary.setDescription("");
-
-        Document document1 = luceneDocument.getVideoSummaryDoc(videoSummary);
-        luceneIndex.updateIndex(videoId, document1);
-
-        Document document2 = luceneIndex.getDocument(videoId);
-        Object object2 = luceneDocument.getObject(VideoSummary.class, document2);*/
 
         String index = VideoText.class.getSimpleName().toLowerCase(Locale.ROOT);
         String kw = "隔壁";

+ 26 - 0
search/search-service/src/test/java/LogTest.java

@@ -0,0 +1,26 @@
+import cn.reghao.tnb.search.app.SearchApplication;
+import cn.reghao.tnb.search.app.log.db.AccessLogMongo;
+import cn.reghao.tnb.search.app.log.model.po.AccessLog;
+import lombok.extern.slf4j.Slf4j;
+import org.junit.jupiter.api.Test;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.test.context.ActiveProfiles;
+
+import java.util.List;
+
+/**
+ * @author reghao
+ * @date 2025-08-27 20:58:22
+ */
+@Slf4j
+@ActiveProfiles("dev")
+@SpringBootTest(classes = SearchApplication.class)
+public class LogTest {
+    @Autowired
+    AccessLogMongo accessLogMongo;
+    @Test
+    public void logTest() {
+        List<AccessLog> accessLogList = accessLogMongo.findAll();
+    }
+}

+ 44 - 68
search/search-service/src/test/java/WenshuTest.java

@@ -3,8 +3,8 @@ import ch.qos.logback.classic.Logger;
 import ch.qos.logback.classic.LoggerContext;
 import cn.reghao.jutil.jdk.db.PageList;
 import cn.reghao.jutil.tool.id.SnowFlake;
+import cn.reghao.tnb.search.api.dto.IndexCount;
 import cn.reghao.tnb.search.app.SearchApplication;
-import cn.reghao.tnb.search.app.es.*;
 import cn.reghao.tnb.search.app.lucene.LuceneDocument;
 import cn.reghao.tnb.search.app.lucene.LuceneIndex;
 import cn.reghao.tnb.search.app.lucene.LuceneSearch;
@@ -12,21 +12,18 @@ import cn.reghao.tnb.search.app.model.po.Wenshu;
 import cn.reghao.tnb.search.app.model.po.WenshuLucene;
 import cn.reghao.tnb.search.app.model.vo.ElasticQuery;
 import cn.reghao.tnb.search.app.service.WenshuService;
-import cn.reghao.tnb.search.app.util.ClassUtil;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.lucene.document.Document;
 import org.junit.jupiter.api.Test;
 import org.slf4j.LoggerFactory;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.data.domain.Page;
 import org.springframework.test.context.ActiveProfiles;
 
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
+import java.nio.file.*;
+import java.nio.file.attribute.BasicFileAttributes;
 import java.util.*;
 import java.util.stream.Collectors;
 
@@ -45,18 +42,6 @@ public class WenshuTest {
     }
 
     SnowFlake idGenerator = new SnowFlake(1, 1);
-    @Autowired
-    ElasticService elasticService;
-    @Autowired
-    IndexService indexService;
-    @Autowired
-    MappingService mappingService;
-    @Autowired
-    DocumentService documentService;
-    @Autowired
-    QueryService<Wenshu> queryService;
-    String indexName = Wenshu.class.getSimpleName().toLowerCase(Locale.ROOT);
-
     private void addLuceneIndex(List<Wenshu> wenshuList) {
         long id = idGenerator.nextId();
         List<Document> luceneDocumentList = wenshuList.stream()
@@ -68,14 +53,6 @@ public class WenshuTest {
                 .map(wenshu -> luceneDocument.getDocumentByWenshu(wenshu))
                 .collect(Collectors.toList());
 
-        /*luceneDocumentList.forEach(doc -> {
-            try {
-                luceneIndex.createIndex(doc);
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
-        });*/
-
         String indexName = "wenshu";
         try {
             luceneIndex.createIndex(indexName, wenshuDocumentList);
@@ -92,57 +69,56 @@ public class WenshuTest {
     LuceneDocument luceneDocument;
     @Autowired
     WenshuService wenshuService;
-    @Test
-    public void addTest() throws IOException {
-        String indexName = "wenshu";
-        /*indexService.deleteIndex(indexName);
-        Map<String, Property> propertyMap = mappingService.getPropertyMapByWenshu();
-        indexService.createIndex(indexName, propertyMap);*/
-
-        //documentService.deleteAllDocument(indexName);
-        String filePath = "/home/reghao/Downloads/2021年07月裁判文书数据.csv";
-        //readByFileChannel(filePath, documentService);
-        wenshuService.processFile(filePath);
-    }
 
-    @Test
-    public void deleteAllTest() throws IOException {
-        wenshuService.deleteAll();
+    public void walkDir(Path path) throws IOException {
+        Files.walkFileTree(path, new FileVisitor<Path>() {
+            @Override
+            public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
+                return FileVisitResult.CONTINUE;
+            }
+
+            @Override
+            public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
+                File file1 = file.toFile();
+                String filePath = file1.getAbsolutePath();
+                if (filePath.endsWith("csv")) {
+                    wenshuService.processFile(filePath);
+                    log.info("index {} done...", filePath);
+                }
+                return FileVisitResult.CONTINUE;
+            }
+
+            @Override
+            public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException {
+                return FileVisitResult.CONTINUE;
+            }
+
+            @Override
+            public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
+                return FileVisitResult.CONTINUE;
+            }
+        });
     }
 
     @Test
-    public void queryTest() {
-        setLogLevel();
+    public void indexWenshuLucene() throws IOException {
+        String indexName = "wenshu_lucene";
+        String baseDir = "/home/reghao/disk/2/wenshu/";
+        Path path = Paths.get(baseDir);
+        walkDir(path);
 
-        int pn = 1;
-        int ps = 10;
-        String queryString = "贩毒";
-        ElasticQuery elasticQuery = new ElasticQuery.Builder()
-                .pageSize(10)
-                .pageNumber(1)
-                .indexName(indexName)
-                .queryFiledNames(List.of("caseName", "fullText"))
-                .highlightFiledNames(List.of("caseName", "fullText"))
-                .queryString(queryString)
-                .build();
+        IndexCount indexCount = luceneIndex.countIndex(indexName);
+        System.out.printf("max: %s, num: %s\n", indexCount.getMaxDocs(), indexCount.getNumDocs());
+        wenshuService.printResult();
+    }
 
-        Page<Wenshu> page = queryService.queryWithHighlight(elasticQuery, Wenshu.class);
-        long total = page.getTotalElements();
-        int totalPages = page.getTotalPages();
-        while (pn <= totalPages) {
-            elasticQuery.setPageNumber(pn);
-            page = queryService.queryWithHighlight(elasticQuery, Wenshu.class);
-            List<Wenshu> list = page.getContent();
-            Wenshu wenshu = list.get(0);
-            String id = wenshu.getId();
-            String caseName = wenshu.getCaseName();
-            System.out.printf("%s -> %s\n", id, caseName);
-            pn++;
-        }
+    @Test
+    public void deleteAllWenshuLucene() throws IOException {
+        wenshuService.deleteAll();
     }
 
     @Test
-    public void queryTest1() throws Exception {
+    public void queryTest() {
         setLogLevel();
 
         String indexName = "wenshu_lucene";