Bläddra i källkod

删除 lucene, 移到 bnt 项目

reghao 1 år sedan
förälder
incheckning
8cbebac783

+ 0 - 26
content/content-service/pom.xml

@@ -140,32 +140,6 @@
             </exclusions>
         </dependency>
 
-        <dependency>
-            <groupId>org.apache.lucene</groupId>
-            <artifactId>lucene-core</artifactId>
-            <version>8.9.0</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.lucene</groupId>
-            <artifactId>lucene-queryparser</artifactId>
-            <version>8.9.0</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.lucene</groupId>
-            <artifactId>lucene-highlighter</artifactId>
-            <version>8.9.0</version>
-        </dependency>
-        <dependency>
-            <groupId>com.jianggujin</groupId>
-            <artifactId>IKAnalyzer-lucene</artifactId>
-            <version>8.0.0</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.lucene</groupId>
-            <artifactId>lucene-analyzers-smartcn</artifactId>
-            <version>8.9.0</version>
-        </dependency>
-
         <dependency>
             <groupId>org.springframework.boot</groupId>
             <artifactId>spring-boot-starter-data-redis</artifactId>

+ 0 - 111
content/content-service/src/main/java/cn/reghao/tnb/content/app/vod/service/lucene/LuceneIndex.java

@@ -1,111 +0,0 @@
-package cn.reghao.tnb.content.app.vod.service.lucene;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.*;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.wltea.analyzer.lucene.IKAnalyzer;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Paths;
-import java.util.List;
-
-/**
- * @author reghao
- * @date 2023-03-02 09:28:58
- */
-public class LuceneIndex {
-    private final static String indexDirPath = "/opt/tmp/jsearch";
-
-    public static IndexWriter getIndexWriter() throws IOException {
-        File indexDir = new File(indexDirPath);
-        Directory indexDirectory = FSDirectory.open(indexDir.toPath());
-
-        Analyzer luceneAnalyzer = new StandardAnalyzer();
-        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(luceneAnalyzer);
-        return new IndexWriter(indexDirectory, indexWriterConfig);
-    }
-
-    public static void createIndex(Document document) throws IOException {
-        IndexWriter indexWriter = getIndexWriter();
-        indexWriter.addDocument(document);
-        indexWriter.commit();
-        indexWriter.close();
-    }
-
-    public static void createIndex(List<Document> list) throws IOException {
-        IndexWriter indexWriter = getIndexWriter();
-        list.forEach(doc -> {
-            try {
-                indexWriter.addDocument(doc);
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
-        });
-
-        indexWriter.commit();
-        indexWriter.close();
-    }
-
-    public static void updateIndex(Document document) throws IOException {
-        Directory directory = FSDirectory.open(Paths.get(indexDirPath));
-        Analyzer analyzer = new IKAnalyzer();
-        IndexWriterConfig config = new IndexWriterConfig(analyzer);
-        IndexWriter indexWriter = new IndexWriter(directory, config);
-
-        indexWriter.updateDocument(new Term("name", "李白"), document);
-        indexWriter.commit();
-        indexWriter.close();
-    }
-
-    public static void updateIndex() throws IOException {
-        Document document = new Document();
-        document.add(new LongPoint("id", 123456));
-        document.add(new StoredField("id", 123456));
-        document.add(new IntPoint("age", 20));
-        document.add(new StringField("name", "李白", Field.Store.YES));
-        document.add(new TextField("poems", "望天门山", Field.Store.YES));
-        document.add(new TextField("about", "号青莲居士", Field.Store.NO));
-
-        Directory directory = FSDirectory.open(Paths.get(indexDirPath));
-        Analyzer analyzer = new IKAnalyzer();
-        IndexWriterConfig config = new IndexWriterConfig(analyzer);
-        IndexWriter indexWriter = new IndexWriter(directory, config);
-
-        indexWriter.updateDocument(new Term("name", "李白"), document);
-        indexWriter.commit();
-        indexWriter.close();
-    }
-
-    public static void deleteIndex() throws IOException {
-        Directory directory = FSDirectory.open(Paths.get(indexDirPath));
-        Analyzer analyzer = new IKAnalyzer();
-        IndexWriterConfig config = new IndexWriterConfig(analyzer);
-        IndexWriter indexWriter = new IndexWriter(directory, config);
-
-        indexWriter.deleteDocuments(new Term("name", "李白"));
-        indexWriter.commit();
-        indexWriter.close();
-    }
-
-    public static void deleteAllIndex() throws IOException {
-        Directory directory = FSDirectory.open(Paths.get(indexDirPath));
-        Analyzer analyzer = new IKAnalyzer();
-        IndexWriterConfig config = new IndexWriterConfig(analyzer);
-        IndexWriter indexWriter = new IndexWriter(directory, config);
-
-        indexWriter.deleteAll();
-        indexWriter.commit();
-        indexWriter.close();
-    }
-
-    public static void main(String[] args) throws IOException {
-        IndexWriter indexWriter = getIndexWriter();
-        indexWriter.close();
-    }
-}

+ 0 - 120
content/content-service/src/main/java/cn/reghao/tnb/content/app/vod/service/lucene/LuceneQuery.java

@@ -1,120 +0,0 @@
-package cn.reghao.tnb.content.app.vod.service.lucene;
-
-import lombok.extern.slf4j.Slf4j;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.LongPoint;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.queryparser.classic.QueryParser;
-import org.apache.lucene.search.*;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.wltea.analyzer.lucene.IKAnalyzer;
-
-import java.io.IOException;
-import java.nio.file.Paths;
-
-/**
- * @author reghao
- * @date 2023-03-02 09:29:04
- */
-@Slf4j
-public class LuceneQuery {
-    private final static String indexDirPath = "/opt/tmp/jsearch";
-
-    static void query(Query query) {
-        try {
-            Directory directory = FSDirectory.open(Paths.get(indexDirPath));
-            IndexReader indexReader = DirectoryReader.open(directory);
-            IndexSearcher indexSearcher = new IndexSearcher(indexReader);
-
-            // 查询前 100 条数据
-            TopDocs topDocs = indexSearcher.search(query, 100);
-            log.info("本次搜索共找到" + topDocs.totalHits.value + "条数据");
-            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
-            for (ScoreDoc scoreDoc : scoreDocs) {
-                Document document = indexReader.document(scoreDoc.doc);
-                log.info(document.toString());
-                //log.info("id={},name={},poems={},success={},score={}", document.get("id"), document.get("name"), document.get("poems"), document.get("success"), scoreDoc.score);
-            }
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-    }
-
-    public static void termQuery() {
-        Query query = new TermQuery(new Term("name", "李白"));
-        query(query);
-    }
-
-    public static PrefixQuery prefixQuery(String field, String startWith) {
-        return new PrefixQuery(new Term(field, startWith));
-    }
-
-    public static void prefixQuery1(String field, String startWith) {
-        Query query1 = new TermQuery(new Term("tableName", "RujiaHotel"));
-        Query query2 = new PrefixQuery(new Term(field, startWith));
-
-        BooleanQuery.Builder builder = new BooleanQuery.Builder();
-        builder.add(query1, BooleanClause.Occur.MUST);
-        builder.add(query2, BooleanClause.Occur.MUST);
-        BooleanQuery booleanQuery = builder.build();
-        query(booleanQuery);
-    }
-
-    public static void wildcardQuery() {
-        Query query = new WildcardQuery(new Term("name", "李?"));
-        query(query);
-    }
-
-    public static void fuzzyQuery() {
-        //"李百"->"李白",只需修改一次,故可以搜索到数据;"里百"则搜索不到数据
-        Query query = new FuzzyQuery(new Term("name", "里百"), 1);
-        query(query);
-    }
-
-    public static void numberQuery() {
-        //精确查询
-        Query query = LongPoint.newExactQuery("id", 123456);
-        query(query);
-
-        //范围查询
-        query = LongPoint.newRangeQuery("id", 123L, 12345678L);
-        query(query);
-    }
-
-    public static void booleanQuery() {
-        Query query1 = new TermQuery(new Term("name", "李白"));
-        Query query2 = new TermQuery(new Term("name", "杜甫"));
-        BooleanQuery.Builder builder = new BooleanQuery.Builder();
-        builder.add(query1, BooleanClause.Occur.SHOULD);
-        builder.add(query2, BooleanClause.Occur.SHOULD);
-        BooleanQuery booleanQuery = builder.build();
-        query(booleanQuery);
-    }
-
-    public static void boostQuery() {
-        Query query1 = new BoostQuery(new TermQuery(new Term("name", "李白")), 1.5f);
-        Query query2 = new BoostQuery(new TermQuery(new Term("name", "杜甫")), 1.6f);
-        BooleanQuery.Builder builder = new BooleanQuery.Builder();
-        builder.add(query1, BooleanClause.Occur.SHOULD);
-        builder.add(query2, BooleanClause.Occur.SHOULD);
-        BooleanQuery booleanQuery = builder.build();
-
-        query(booleanQuery);
-    }
-
-    public static void queryParserQuery() throws ParseException {
-        QueryParser queryParser = new QueryParser("name", new IKAnalyzer());
-        Query query = queryParser.parse("李白和杜甫");
-        query(query);
-
-        //多字段查询
-        MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(new String[]{"name", "about"}, new IKAnalyzer());
-        query = multiFieldQueryParser.parse("李白和子美");
-        query(query);
-    }
-}

+ 0 - 97
content/content-service/src/main/java/cn/reghao/tnb/content/app/vod/service/lucene/LuceneSearch.java

@@ -1,97 +0,0 @@
-package cn.reghao.tnb.content.app.vod.service.lucene;
-
-import cn.reghao.jutil.jdk.db.PageList;
-import cn.reghao.tnb.content.api.dto.VideoCard;
-import cn.reghao.tnb.content.app.vod.model.vo.*;
-import cn.reghao.tnb.content.app.util.redis.ds.RedisHash;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.queryparser.classic.QueryParser;
-import org.apache.lucene.search.*;
-import org.apache.lucene.search.highlight.Formatter;
-import org.apache.lucene.search.highlight.*;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-
-import java.io.IOException;
-import java.nio.file.Paths;
-import java.util.*;
-
-/**
- * @author reghao
- * @date 2023-11-10 20:27:07
- */
-public class LuceneSearch {
-    private final int pageSize = 12;
-    private final String indexDirPath = "/opt/data/lucene";
-    private final RedisHash<VideoCard> redisHash;
-
-    public LuceneSearch(RedisHash<VideoCard> redisHash) {
-        this.redisHash = redisHash;
-    }
-
-    public PageList<VideoCard> searchByTitle(String keyword, int pageSize, int pageNumber) {
-        try {
-            SearchResult searchResult = search(keyword, pageSize, pageNumber);
-            long total = searchResult.getTotal();
-            Map<String, String> result = searchResult.getResult();
-            Set<String> videoIds = result.keySet();
-            if (!videoIds.isEmpty()) {
-                List<VideoCard> list = redisHash.multiGet("video:card:hash", videoIds);
-                return PageList.pageList(pageNumber, pageSize, (int) total, list);
-            }
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-
-        return PageList.empty();
-    }
-
-    public SearchResult search(String keyword, int pageSize, int pageNumber)
-            throws IOException, InvalidTokenOffsetsException, ParseException {
-        Directory directory = FSDirectory.open(Paths.get(indexDirPath));
-        IndexReader indexReader = DirectoryReader.open(directory);
-        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
-
-        String field = "title";
-        Analyzer analyzer = new StandardAnalyzer();
-        QueryParser queryParser = new QueryParser(field, analyzer);
-        Query query = queryParser.parse(keyword);
-        Query query1 = new TermQuery(new Term(field, keyword));
-
-        long total;
-        TopDocs topDocs;
-        if (pageNumber == 1) {
-            // topDocs = indexSearcher.search(query, pageSize);
-            topDocs = indexSearcher.searchAfter(null, query, pageSize);
-            total = topDocs.totalHits.value;
-        } else {
-            int count = (pageNumber-1)*pageSize;
-            TopDocs prevTopDocs = indexSearcher.searchAfter(null, query, count);
-            total = prevTopDocs.totalHits.value;
-
-            ScoreDoc[] prevScoreDocs = prevTopDocs.scoreDocs;
-            ScoreDoc after = prevScoreDocs[prevScoreDocs.length-1];
-            topDocs = indexSearcher.searchAfter(after, query, pageSize);
-        }
-
-        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
-        Map<String, String> map = new HashMap<>();
-        Formatter formatter = new SimpleHTMLFormatter("<em>", "</em>");
-        QueryScorer queryScorer = new QueryScorer(query);
-        Highlighter highlighter = new Highlighter(formatter, queryScorer);
-        for (ScoreDoc scoreDoc : scoreDocs) {
-            Document document = indexReader.document(scoreDoc.doc);
-            String videoId = document.get("videoId");
-            String title = document.get("title");
-            String htmlTitle = highlighter.getBestFragment(analyzer, field, title);
-            map.put(videoId, htmlTitle);
-        }
-        return new SearchResult(total, map);
-    }
-}

+ 0 - 95
content/content-service/src/test/java/cn/reghao/tnb/content/app/vod/service/LuceneDemo.java

@@ -1,95 +0,0 @@
-package cn.reghao.tnb.content.app.vod.service;
-
-import cn.reghao.jutil.jdk.serializer.JsonConverter;
-import cn.reghao.tnb.content.app.vod.model.vo.SearchResult;
-import com.google.gson.JsonObject;
-import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
-import org.apache.lucene.document.*;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.queryparser.classic.QueryParser;
-import org.apache.lucene.search.*;
-import org.apache.lucene.search.highlight.*;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Paths;
-import java.util.*;
-
-/**
- * @author reghao
- * @date 2023-03-02 09:12:40
- */
-public class LuceneDemo {
-    private final static Logger log = LoggerFactory.getLogger(LuceneDemo.class);
-    static String indexDirPath = "/opt/data/lucene";
-
-    static SearchResult highlighter(String keyword, int pageSize, int pageNumber)
-            throws IOException, InvalidTokenOffsetsException, ParseException {
-        Directory directory = FSDirectory.open(Paths.get(indexDirPath));
-        IndexReader indexReader = DirectoryReader.open(directory);
-        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
-
-        String field = "title";
-        //Analyzer analyzer = new StandardAnalyzer();
-        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
-        //IKAnalyzer analyzer = new IKAnalyzer();
-
-        QueryParser queryParser = new QueryParser(field, analyzer);
-        Query query = queryParser.parse(keyword);
-        TermQuery termQuery = new TermQuery(new Term(field, keyword));
-        FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term(field, keyword), 1);
-        PhraseQuery.Builder builder = new PhraseQuery.Builder();
-        builder.add(new Term(field, keyword), 1);
-        PhraseQuery phraseQuery = builder.build();
-
-        long total;
-        TopDocs topDocs;
-        if (pageNumber == 1) {
-            topDocs = indexSearcher.search(termQuery, pageSize);
-            //topDocs = indexSearcher.searchAfter(null, query, pageSize);
-            total = topDocs.totalHits.value;
-        } else {
-            int count = (pageNumber-1)*pageSize;
-            TopDocs prevTopDocs = indexSearcher.searchAfter(null, query, count);
-            total = prevTopDocs.totalHits.value;
-
-            ScoreDoc[] prevScoreDocs = prevTopDocs.scoreDocs;
-            ScoreDoc after = prevScoreDocs[prevScoreDocs.length-1];
-            topDocs = indexSearcher.searchAfter(after, query, pageSize);
-        }
-
-        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
-        Map<String, String> map = new HashMap<>();
-        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>");
-        QueryScorer queryScorer = new QueryScorer(query);
-        Highlighter highlighter = new Highlighter(formatter, queryScorer);
-        for (ScoreDoc scoreDoc : scoreDocs) {
-            Document document = indexReader.document(scoreDoc.doc);
-            String videoId = document.get("videoId");
-            String title = document.get("title");
-            String htmlTitle = highlighter.getBestFragment(analyzer, field, title);
-            map.put(videoId, htmlTitle);
-            log.info("{}: {} - {}", scoreDoc.score, videoId, title);
-        }
-        return new SearchResult(total, map);
-    }
-
-    static void parseQuestion() {
-        String filePath = "/home/reghao/Downloads/1/xxqg.json";
-        JsonObject jsonObject = JsonConverter.jsonToJsonElement(new File(filePath)).getAsJsonObject();
-
-        jsonObject.entrySet().forEach(entry -> {
-            String answer = entry.getValue().getAsString();
-            String str = entry.getKey();
-            String[] strs = str.replace("\u00A0", "_").split("\\|");
-            String title = strs[0];
-        });
-    }
-}

+ 0 - 94
content/content-service/src/test/java/cn/reghao/tnb/content/app/vod/service/LuceneTest.java

@@ -1,94 +0,0 @@
-package cn.reghao.tnb.content.app.vod.service;
-
-import cn.reghao.tnb.content.app.ContentApplication;
-import cn.reghao.tnb.content.app.vod.db.mapper.VideoPostMapper;
-import cn.reghao.tnb.content.app.vod.model.po.VideoPost;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
-import org.apache.lucene.document.*;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.junit.jupiter.api.Test;
-import org.junit.runner.RunWith;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.test.context.ActiveProfiles;
-import org.springframework.test.context.junit4.SpringRunner;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-import java.util.stream.Collectors;
-
-@Slf4j
-@ActiveProfiles("dev")
-@SpringBootTest(classes = ContentApplication.class)
-@RunWith(SpringRunner.class)
-class LuceneTest {
-    @Autowired
-    private VideoPostMapper videoPostMapper;
-
-    @Test
-    void getPageByTag() throws IOException {
-        int pageSize = 10000;
-        int nextId = 0;
-
-        List<VideoPost> list = videoPostMapper.findAllById(pageSize, nextId);
-        while (!list.isEmpty()) {
-            process(list);
-
-            nextId = list.get(list.size()-1).getId();
-            list = videoPostMapper.findAllById(pageSize, nextId);
-            log.info("get {}", nextId);
-        }
-    }
-
-    private void process(List<VideoPost> list) throws IOException {
-        List<Document> documents = list.stream()
-                .filter(videoPost -> videoPost.getDescription() != null)
-                .map(LuceneTest::getDocument)
-                .collect(Collectors.toList());
-        createIndex(documents);
-    }
-
-    static String indexDirPath = "/opt/data/lucene";
-    static IndexWriter getIndexWriter() throws IOException {
-        File indexDir = new File(indexDirPath);
-        Directory indexDirectory = FSDirectory.open(indexDir.toPath());
-
-        //Analyzer analyzer = new StandardAnalyzer();
-        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
-        //IKAnalyzer analyzer = new IKAnalyzer();
-
-        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
-        return new IndexWriter(indexDirectory, indexWriterConfig);
-    }
-
-    static void createIndex(List<Document> list) throws IOException {
-        IndexWriter indexWriter = getIndexWriter();
-        list.forEach(doc -> {
-            try {
-                indexWriter.addDocument(doc);
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
-        });
-
-        indexWriter.commit();
-        indexWriter.close();
-    }
-
-    static Document getDocument(VideoPost videoPost) {
-        String videoId = videoPost.getVideoId();
-        String title = videoPost.getTitle();
-        String description = videoPost.getDescription();
-
-        Document document = new Document();
-        document.add(new TextField("videoId", videoId, Field.Store.YES));
-        document.add(new TextField("title", title, Field.Store.YES));
-        //document.add(new TextField("description", description, Field.Store.YES));
-        return document;
-    }
-}