|
|
@@ -1,12 +1,6 @@
|
|
|
package cn.reghao.tnb.search.app.lucene;
|
|
|
|
|
|
-import cn.reghao.tnb.search.app.model.po.VideoText;
|
|
|
-import cn.reghao.tnb.search.app.model.vo.SearchResult;
|
|
|
-import cn.reghao.jutil.jdk.db.PageList;
|
|
|
-import cn.reghao.tnb.search.app.model.vo.VideoCard;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
-import org.apache.lucene.analysis.Analyzer;
|
|
|
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
|
import org.apache.lucene.document.Document;
|
|
|
import org.apache.lucene.document.LongPoint;
|
|
|
import org.apache.lucene.index.DirectoryReader;
|
|
|
@@ -16,238 +10,19 @@ import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
|
|
|
import org.apache.lucene.queryparser.classic.ParseException;
|
|
|
import org.apache.lucene.queryparser.classic.QueryParser;
|
|
|
import org.apache.lucene.search.*;
|
|
|
-import org.apache.lucene.search.highlight.Formatter;
|
|
|
-import org.apache.lucene.search.highlight.*;
|
|
|
import org.apache.lucene.store.Directory;
|
|
|
import org.apache.lucene.store.FSDirectory;
|
|
|
-import org.springframework.data.domain.Page;
|
|
|
-import org.springframework.data.domain.PageImpl;
|
|
|
-import org.springframework.data.domain.PageRequest;
|
|
|
-import org.springframework.stereotype.Service;
|
|
|
import org.wltea.analyzer.lucene.IKAnalyzer;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
import java.nio.file.Paths;
|
|
|
-import java.util.*;
|
|
|
|
|
|
/**
|
|
|
* @author reghao
|
|
|
- * @date 2023-03-02 09:29:04
|
|
|
+ * @date 2025-08-20 17:32:59
|
|
|
*/
|
|
|
@Slf4j
|
|
|
-@Service
|
|
|
public class LuceneQuery {
|
|
|
- private final static String indexDirPath = "/opt/data/bntdata/jsearch";
|
|
|
- private final Analyzer luceneAnalyzer;
|
|
|
- private final SimpleHTMLFormatter formatter;
|
|
|
- private final Directory directory;
|
|
|
-
|
|
|
- public LuceneQuery() throws IOException {
|
|
|
- this.luceneAnalyzer = new IKAnalyzer();
|
|
|
- this.formatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>");
|
|
|
- this.directory = FSDirectory.open(Paths.get(indexDirPath));
|
|
|
- }
|
|
|
-
|
|
|
- private IndexReader getIndexReader() throws IOException {
|
|
|
- Directory directory = FSDirectory.open(Paths.get(indexDirPath));
|
|
|
- IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
- return indexReader;
|
|
|
- }
|
|
|
-
|
|
|
- public Page<VideoText> queryWithHighlight(String index, String queryString, Integer pn, Integer ps) {
|
|
|
- try {
|
|
|
- IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
- IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
- String field = "title";
|
|
|
- QueryParser queryParser = new QueryParser(field, luceneAnalyzer);
|
|
|
- Query query = queryParser.parse(queryString);
|
|
|
-
|
|
|
- long total;
|
|
|
- TopDocs topDocs;
|
|
|
- if (pn == 1) {
|
|
|
- topDocs = indexSearcher.search(query, ps);
|
|
|
- //topDocs = indexSearcher.searchAfter(null, query, pageSize);
|
|
|
- total = topDocs.totalHits.value;
|
|
|
- } else {
|
|
|
- int count = (pn-1)*ps;
|
|
|
- TopDocs prevTopDocs = indexSearcher.searchAfter(null, query, count);
|
|
|
- total = prevTopDocs.totalHits.value;
|
|
|
-
|
|
|
- ScoreDoc[] prevScoreDocs = prevTopDocs.scoreDocs;
|
|
|
- ScoreDoc after = prevScoreDocs[prevScoreDocs.length-1];
|
|
|
- topDocs = indexSearcher.searchAfter(after, query, ps);
|
|
|
- }
|
|
|
-
|
|
|
- List<VideoText> list = new ArrayList<>();
|
|
|
- ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
- QueryScorer queryScorer = new QueryScorer(query);
|
|
|
- Highlighter highlighter = new Highlighter(formatter, queryScorer);
|
|
|
- for (ScoreDoc scoreDoc : scoreDocs) {
|
|
|
- Document document = indexReader.document(scoreDoc.doc);
|
|
|
- String videoId = document.get("videoId");
|
|
|
- String title = document.get("title");
|
|
|
- String htmlTitle = highlighter.getBestFragment(luceneAnalyzer, field, title);
|
|
|
- int scope = Integer.parseInt(document.get("scope"));
|
|
|
- long publishTime = Long.parseLong(document.get("publishTime"));
|
|
|
- list.add(new VideoText(videoId, htmlTitle, scope, publishTime));
|
|
|
- }
|
|
|
-
|
|
|
- PageRequest pageRequest = PageRequest.of(pn-1, ps);
|
|
|
- return new PageImpl<>(list, pageRequest, total);
|
|
|
- } catch (IOException | ParseException | InvalidTokenOffsetsException e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
-
|
|
|
- return Page.empty();
|
|
|
- }
|
|
|
-
|
|
|
- public Document getDocument(String videoId) {
|
|
|
- try {
|
|
|
- IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
- IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
-
|
|
|
- Query query = new TermQuery(new Term("videoId", videoId));
|
|
|
- TopDocs topDocs = indexSearcher.search(query, 1);
|
|
|
- ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
- if (scoreDocs.length == 1) {
|
|
|
- Document document = indexReader.document(scoreDocs[0].doc);
|
|
|
- return document;
|
|
|
- }
|
|
|
- } catch (Exception e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
-
|
|
|
- return null;
|
|
|
- }
|
|
|
-
|
|
|
- public PageList<VideoCard> searchByTitle(String keyword, int pageSize, int pageNumber) {
|
|
|
- try {
|
|
|
- SearchResult searchResult = search(keyword, pageSize, pageNumber);
|
|
|
- long total = searchResult.getTotal();
|
|
|
- Map<String, String> result = searchResult.getResult();
|
|
|
- Set<String> videoIds = result.keySet();
|
|
|
- if (!videoIds.isEmpty()) {
|
|
|
- /*List<VideoCard> list = redisHash.multiGet("video:card:hash", videoIds);
|
|
|
- return PageList.pageList(pageNumber, pageSize, (int) total, list);*/
|
|
|
- }
|
|
|
- } catch (Exception e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
-
|
|
|
- return PageList.empty();
|
|
|
- }
|
|
|
-
|
|
|
- public SearchResult search(String keyword, int pageSize, int pageNumber)
|
|
|
- throws IOException, InvalidTokenOffsetsException, ParseException {
|
|
|
- Directory directory = FSDirectory.open(Paths.get(indexDirPath));
|
|
|
- IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
- IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
-
|
|
|
- String field = "title";
|
|
|
- Analyzer analyzer = new StandardAnalyzer();
|
|
|
- QueryParser queryParser = new QueryParser(field, analyzer);
|
|
|
- Query query = queryParser.parse(keyword);
|
|
|
- Query query1 = new TermQuery(new Term(field, keyword));
|
|
|
-
|
|
|
- long total;
|
|
|
- TopDocs topDocs;
|
|
|
- if (pageNumber == 1) {
|
|
|
- // topDocs = indexSearcher.search(query, pageSize);
|
|
|
- topDocs = indexSearcher.searchAfter(null, query, pageSize);
|
|
|
- total = topDocs.totalHits.value;
|
|
|
- } else {
|
|
|
- int count = (pageNumber-1)*pageSize;
|
|
|
- TopDocs prevTopDocs = indexSearcher.searchAfter(null, query, count);
|
|
|
- total = prevTopDocs.totalHits.value;
|
|
|
-
|
|
|
- ScoreDoc[] prevScoreDocs = prevTopDocs.scoreDocs;
|
|
|
- ScoreDoc after = prevScoreDocs[prevScoreDocs.length-1];
|
|
|
- topDocs = indexSearcher.searchAfter(after, query, pageSize);
|
|
|
- }
|
|
|
-
|
|
|
- ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
- Map<String, String> map = new HashMap<>();
|
|
|
- Formatter formatter = new SimpleHTMLFormatter("<em>", "</em>");
|
|
|
- QueryScorer queryScorer = new QueryScorer(query);
|
|
|
- Highlighter highlighter = new Highlighter(formatter, queryScorer);
|
|
|
- for (ScoreDoc scoreDoc : scoreDocs) {
|
|
|
- Document document = indexReader.document(scoreDoc.doc);
|
|
|
- String videoId = document.get("videoId");
|
|
|
- String title = document.get("title");
|
|
|
- String htmlTitle = highlighter.getBestFragment(analyzer, field, title);
|
|
|
- map.put(videoId, htmlTitle);
|
|
|
- }
|
|
|
- return new SearchResult(total, map);
|
|
|
- }
|
|
|
-
|
|
|
- public SearchResult highlighter(String keyword, int pageSize, int pageNumber)
|
|
|
- throws IOException, InvalidTokenOffsetsException, ParseException {
|
|
|
- Directory directory = FSDirectory.open(Paths.get(indexDirPath));
|
|
|
- IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
- IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
-
|
|
|
- String field = "title";
|
|
|
- Analyzer analyzer = new IKAnalyzer();
|
|
|
-
|
|
|
- QueryParser queryParser = new QueryParser(field, analyzer);
|
|
|
- Query query = queryParser.parse(keyword);
|
|
|
- TermQuery termQuery = new TermQuery(new Term(field, keyword));
|
|
|
- FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term(field, keyword), 1);
|
|
|
- PhraseQuery.Builder builder = new PhraseQuery.Builder();
|
|
|
- builder.add(new Term(field, keyword), 1);
|
|
|
- PhraseQuery phraseQuery = builder.build();
|
|
|
-
|
|
|
- long total;
|
|
|
- TopDocs topDocs;
|
|
|
- if (pageNumber == 1) {
|
|
|
- topDocs = indexSearcher.search(termQuery, pageSize);
|
|
|
- //topDocs = indexSearcher.searchAfter(null, query, pageSize);
|
|
|
- total = topDocs.totalHits.value;
|
|
|
- } else {
|
|
|
- int count = (pageNumber-1)*pageSize;
|
|
|
- TopDocs prevTopDocs = indexSearcher.searchAfter(null, query, count);
|
|
|
- total = prevTopDocs.totalHits.value;
|
|
|
-
|
|
|
- ScoreDoc[] prevScoreDocs = prevTopDocs.scoreDocs;
|
|
|
- ScoreDoc after = prevScoreDocs[prevScoreDocs.length-1];
|
|
|
- topDocs = indexSearcher.searchAfter(after, query, pageSize);
|
|
|
- }
|
|
|
-
|
|
|
- ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
- Map<String, String> map = new HashMap<>();
|
|
|
- SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>");
|
|
|
- QueryScorer queryScorer = new QueryScorer(query);
|
|
|
- Highlighter highlighter = new Highlighter(formatter, queryScorer);
|
|
|
- for (ScoreDoc scoreDoc : scoreDocs) {
|
|
|
- Document document = indexReader.document(scoreDoc.doc);
|
|
|
- String videoId = document.get("videoId");
|
|
|
- String title = document.get("title");
|
|
|
- String htmlTitle = highlighter.getBestFragment(analyzer, field, title);
|
|
|
- map.put(videoId, htmlTitle);
|
|
|
- }
|
|
|
- return new SearchResult(total, map);
|
|
|
- }
|
|
|
-
|
|
|
- static void query(Query query) {
|
|
|
- try {
|
|
|
- Directory directory = FSDirectory.open(Paths.get(indexDirPath));
|
|
|
- IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
- IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
-
|
|
|
- // 查询前 100 条数据
|
|
|
- TopDocs topDocs = indexSearcher.search(query, 100);
|
|
|
- log.info("本次搜索共找到" + topDocs.totalHits.value + "条数据");
|
|
|
- ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
- for (ScoreDoc scoreDoc : scoreDocs) {
|
|
|
- Document document = indexReader.document(scoreDoc.doc);
|
|
|
- log.info(document.toString());
|
|
|
- //log.info("id={},name={},poems={},success={},score={}", document.get("id"), document.get("name"), document.get("poems"), document.get("success"), scoreDoc.score);
|
|
|
- }
|
|
|
- } catch (IOException e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
static void termQuery() {
|
|
|
Query query = new TermQuery(new Term("name", "李白"));
|
|
|
query(query);
|
|
|
@@ -321,4 +96,27 @@ public class LuceneQuery {
|
|
|
query = multiFieldQueryParser.parse("李白和子美");
|
|
|
query(query);
|
|
|
}
|
|
|
+
|
|
|
+ public static void query(Query query) {
|
|
|
+ String nativeLuceneDir = "/opt/data/search_data/native_lucene";
|
|
|
+ String indexName = "wenshu_lucene";
|
|
|
+ try {
|
|
|
+ String indexDir = String.format("%s/%s", nativeLuceneDir, indexName);
|
|
|
+ Directory directory = FSDirectory.open(Paths.get(indexDir));
|
|
|
+ IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
+ IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
+
|
|
|
+ // 查询前 100 条数据
|
|
|
+ TopDocs topDocs = indexSearcher.search(query, 100);
|
|
|
+ log.info("本次搜索共找到" + topDocs.totalHits.value + "条数据");
|
|
|
+ ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
+ for (ScoreDoc scoreDoc : scoreDocs) {
|
|
|
+ Document document = indexReader.document(scoreDoc.doc);
|
|
|
+ log.info(document.toString());
|
|
|
+ //log.info("id={},name={},poems={},success={},score={}", document.get("id"), document.get("name"), document.get("poems"), document.get("success"), scoreDoc.score);
|
|
|
+ }
|
|
|
+ } catch (IOException e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|