|
@@ -0,0 +1,323 @@
|
|
|
|
|
+package cn.reghao.tnb.search.app.lucene;
|
|
|
|
|
+
|
|
|
|
|
+import cn.reghao.tnb.search.app.model.po.VideoText;
|
|
|
|
|
+import cn.reghao.tnb.search.app.model.vo.SearchResult;
|
|
|
|
|
+import cn.reghao.jutil.jdk.db.PageList;
|
|
|
|
|
+import cn.reghao.tnb.search.app.model.vo.VideoCard;
|
|
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
|
|
+import org.apache.lucene.analysis.Analyzer;
|
|
|
|
|
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
|
|
|
+import org.apache.lucene.document.Document;
|
|
|
|
|
+import org.apache.lucene.document.LongPoint;
|
|
|
|
|
+import org.apache.lucene.index.DirectoryReader;
|
|
|
|
|
+import org.apache.lucene.index.IndexReader;
|
|
|
|
|
+import org.apache.lucene.index.Term;
|
|
|
|
|
+import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
|
|
|
|
|
+import org.apache.lucene.queryparser.classic.ParseException;
|
|
|
|
|
+import org.apache.lucene.queryparser.classic.QueryParser;
|
|
|
|
|
+import org.apache.lucene.search.*;
|
|
|
|
|
+import org.apache.lucene.search.highlight.Formatter;
|
|
|
|
|
+import org.apache.lucene.search.highlight.*;
|
|
|
|
|
+import org.apache.lucene.store.Directory;
|
|
|
|
|
+import org.apache.lucene.store.FSDirectory;
|
|
|
|
|
+import org.springframework.data.domain.Page;
|
|
|
|
|
+import org.springframework.data.domain.PageImpl;
|
|
|
|
|
+import org.springframework.data.domain.PageRequest;
|
|
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
|
|
+import org.wltea.analyzer.lucene.IKAnalyzer;
|
|
|
|
|
+
|
|
|
|
|
+import java.io.IOException;
|
|
|
|
|
+import java.nio.file.Paths;
|
|
|
|
|
+import java.util.*;
|
|
|
|
|
+
|
|
|
|
|
+/**
|
|
|
|
|
+ * @author reghao
|
|
|
|
|
+ * @date 2023-03-02 09:29:04
|
|
|
|
|
+ */
|
|
|
|
|
+@Slf4j
|
|
|
|
|
+@Service
|
|
|
|
|
+public class LuceneQuery {
|
|
|
|
|
+ private final static String indexDirPath = "/opt/data/bntdata/jsearch";
|
|
|
|
|
+ private final Analyzer luceneAnalyzer;
|
|
|
|
|
+ private final SimpleHTMLFormatter formatter;
|
|
|
|
|
+ private final Directory directory;
|
|
|
|
|
+
|
|
|
|
|
+ public LuceneQuery() throws IOException {
|
|
|
|
|
+ this.luceneAnalyzer = new IKAnalyzer();
|
|
|
|
|
+ this.formatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>");
|
|
|
|
|
+ this.directory = FSDirectory.open(Paths.get(indexDirPath));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private IndexReader getIndexReader() throws IOException {
|
|
|
|
|
+ Directory directory = FSDirectory.open(Paths.get(indexDirPath));
|
|
|
|
|
+ IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
|
|
+ return indexReader;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public Page<VideoText> queryWithHighlight(String index, String queryString, Integer pn, Integer ps) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
|
|
+ IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
|
|
+ String field = "title";
|
|
|
|
|
+ QueryParser queryParser = new QueryParser(field, luceneAnalyzer);
|
|
|
|
|
+ Query query = queryParser.parse(queryString);
|
|
|
|
|
+
|
|
|
|
|
+ long total;
|
|
|
|
|
+ TopDocs topDocs;
|
|
|
|
|
+ if (pn == 1) {
|
|
|
|
|
+ topDocs = indexSearcher.search(query, ps);
|
|
|
|
|
+ //topDocs = indexSearcher.searchAfter(null, query, pageSize);
|
|
|
|
|
+ total = topDocs.totalHits.value;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ int count = (pn-1)*ps;
|
|
|
|
|
+ TopDocs prevTopDocs = indexSearcher.searchAfter(null, query, count);
|
|
|
|
|
+ total = prevTopDocs.totalHits.value;
|
|
|
|
|
+
|
|
|
|
|
+ ScoreDoc[] prevScoreDocs = prevTopDocs.scoreDocs;
|
|
|
|
|
+ ScoreDoc after = prevScoreDocs[prevScoreDocs.length-1];
|
|
|
|
|
+ topDocs = indexSearcher.searchAfter(after, query, ps);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ List<VideoText> list = new ArrayList<>();
|
|
|
|
|
+ ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
|
|
+ QueryScorer queryScorer = new QueryScorer(query);
|
|
|
|
|
+ Highlighter highlighter = new Highlighter(formatter, queryScorer);
|
|
|
|
|
+ for (ScoreDoc scoreDoc : scoreDocs) {
|
|
|
|
|
+ Document document = indexReader.document(scoreDoc.doc);
|
|
|
|
|
+ String videoId = document.get("videoId");
|
|
|
|
|
+ String title = document.get("title");
|
|
|
|
|
+ String htmlTitle = highlighter.getBestFragment(luceneAnalyzer, field, title);
|
|
|
|
|
+ boolean vip = Boolean.parseBoolean(document.get("vip"));
|
|
|
|
|
+ list.add(new VideoText(videoId, htmlTitle, vip));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ PageRequest pageRequest = PageRequest.of(pn-1, ps);
|
|
|
|
|
+ return new PageImpl<>(list, pageRequest, total);
|
|
|
|
|
+ } catch (IOException | ParseException | InvalidTokenOffsetsException e) {
|
|
|
|
|
+ e.printStackTrace();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return Page.empty();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public Document getDocument(String videoId) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
|
|
+ IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
|
|
+
|
|
|
|
|
+ Query query = new TermQuery(new Term("videoId", videoId));
|
|
|
|
|
+ TopDocs topDocs = indexSearcher.search(query, 1);
|
|
|
|
|
+ ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
|
|
+ if (scoreDocs.length == 1) {
|
|
|
|
|
+ Document document = indexReader.document(scoreDocs[0].doc);
|
|
|
|
|
+ return document;
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ e.printStackTrace();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public PageList<VideoCard> searchByTitle(String keyword, int pageSize, int pageNumber) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ SearchResult searchResult = search(keyword, pageSize, pageNumber);
|
|
|
|
|
+ long total = searchResult.getTotal();
|
|
|
|
|
+ Map<String, String> result = searchResult.getResult();
|
|
|
|
|
+ Set<String> videoIds = result.keySet();
|
|
|
|
|
+ if (!videoIds.isEmpty()) {
|
|
|
|
|
+ /*List<VideoCard> list = redisHash.multiGet("video:card:hash", videoIds);
|
|
|
|
|
+ return PageList.pageList(pageNumber, pageSize, (int) total, list);*/
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ e.printStackTrace();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return PageList.empty();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public SearchResult search(String keyword, int pageSize, int pageNumber)
|
|
|
|
|
+ throws IOException, InvalidTokenOffsetsException, ParseException {
|
|
|
|
|
+ Directory directory = FSDirectory.open(Paths.get(indexDirPath));
|
|
|
|
|
+ IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
|
|
+ IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
|
|
+
|
|
|
|
|
+ String field = "title";
|
|
|
|
|
+ Analyzer analyzer = new StandardAnalyzer();
|
|
|
|
|
+ QueryParser queryParser = new QueryParser(field, analyzer);
|
|
|
|
|
+ Query query = queryParser.parse(keyword);
|
|
|
|
|
+ Query query1 = new TermQuery(new Term(field, keyword));
|
|
|
|
|
+
|
|
|
|
|
+ long total;
|
|
|
|
|
+ TopDocs topDocs;
|
|
|
|
|
+ if (pageNumber == 1) {
|
|
|
|
|
+ // topDocs = indexSearcher.search(query, pageSize);
|
|
|
|
|
+ topDocs = indexSearcher.searchAfter(null, query, pageSize);
|
|
|
|
|
+ total = topDocs.totalHits.value;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ int count = (pageNumber-1)*pageSize;
|
|
|
|
|
+ TopDocs prevTopDocs = indexSearcher.searchAfter(null, query, count);
|
|
|
|
|
+ total = prevTopDocs.totalHits.value;
|
|
|
|
|
+
|
|
|
|
|
+ ScoreDoc[] prevScoreDocs = prevTopDocs.scoreDocs;
|
|
|
|
|
+ ScoreDoc after = prevScoreDocs[prevScoreDocs.length-1];
|
|
|
|
|
+ topDocs = indexSearcher.searchAfter(after, query, pageSize);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
|
|
+ Map<String, String> map = new HashMap<>();
|
|
|
|
|
+ Formatter formatter = new SimpleHTMLFormatter("<em>", "</em>");
|
|
|
|
|
+ QueryScorer queryScorer = new QueryScorer(query);
|
|
|
|
|
+ Highlighter highlighter = new Highlighter(formatter, queryScorer);
|
|
|
|
|
+ for (ScoreDoc scoreDoc : scoreDocs) {
|
|
|
|
|
+ Document document = indexReader.document(scoreDoc.doc);
|
|
|
|
|
+ String videoId = document.get("videoId");
|
|
|
|
|
+ String title = document.get("title");
|
|
|
|
|
+ String htmlTitle = highlighter.getBestFragment(analyzer, field, title);
|
|
|
|
|
+ map.put(videoId, htmlTitle);
|
|
|
|
|
+ }
|
|
|
|
|
+ return new SearchResult(total, map);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public SearchResult highlighter(String keyword, int pageSize, int pageNumber)
|
|
|
|
|
+ throws IOException, InvalidTokenOffsetsException, ParseException {
|
|
|
|
|
+ Directory directory = FSDirectory.open(Paths.get(indexDirPath));
|
|
|
|
|
+ IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
|
|
+ IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
|
|
+
|
|
|
|
|
+ String field = "title";
|
|
|
|
|
+ Analyzer analyzer = new IKAnalyzer();
|
|
|
|
|
+
|
|
|
|
|
+ QueryParser queryParser = new QueryParser(field, analyzer);
|
|
|
|
|
+ Query query = queryParser.parse(keyword);
|
|
|
|
|
+ TermQuery termQuery = new TermQuery(new Term(field, keyword));
|
|
|
|
|
+ FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term(field, keyword), 1);
|
|
|
|
|
+ PhraseQuery.Builder builder = new PhraseQuery.Builder();
|
|
|
|
|
+ builder.add(new Term(field, keyword), 1);
|
|
|
|
|
+ PhraseQuery phraseQuery = builder.build();
|
|
|
|
|
+
|
|
|
|
|
+ long total;
|
|
|
|
|
+ TopDocs topDocs;
|
|
|
|
|
+ if (pageNumber == 1) {
|
|
|
|
|
+ topDocs = indexSearcher.search(termQuery, pageSize);
|
|
|
|
|
+ //topDocs = indexSearcher.searchAfter(null, query, pageSize);
|
|
|
|
|
+ total = topDocs.totalHits.value;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ int count = (pageNumber-1)*pageSize;
|
|
|
|
|
+ TopDocs prevTopDocs = indexSearcher.searchAfter(null, query, count);
|
|
|
|
|
+ total = prevTopDocs.totalHits.value;
|
|
|
|
|
+
|
|
|
|
|
+ ScoreDoc[] prevScoreDocs = prevTopDocs.scoreDocs;
|
|
|
|
|
+ ScoreDoc after = prevScoreDocs[prevScoreDocs.length-1];
|
|
|
|
|
+ topDocs = indexSearcher.searchAfter(after, query, pageSize);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
|
|
+ Map<String, String> map = new HashMap<>();
|
|
|
|
|
+ SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>");
|
|
|
|
|
+ QueryScorer queryScorer = new QueryScorer(query);
|
|
|
|
|
+ Highlighter highlighter = new Highlighter(formatter, queryScorer);
|
|
|
|
|
+ for (ScoreDoc scoreDoc : scoreDocs) {
|
|
|
|
|
+ Document document = indexReader.document(scoreDoc.doc);
|
|
|
|
|
+ String videoId = document.get("videoId");
|
|
|
|
|
+ String title = document.get("title");
|
|
|
|
|
+ String htmlTitle = highlighter.getBestFragment(analyzer, field, title);
|
|
|
|
|
+ map.put(videoId, htmlTitle);
|
|
|
|
|
+ }
|
|
|
|
|
+ return new SearchResult(total, map);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static void query(Query query) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ Directory directory = FSDirectory.open(Paths.get(indexDirPath));
|
|
|
|
|
+ IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
|
|
+ IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
|
|
+
|
|
|
|
|
+ // 查询前 100 条数据
|
|
|
|
|
+ TopDocs topDocs = indexSearcher.search(query, 100);
|
|
|
|
|
+ log.info("本次搜索共找到" + topDocs.totalHits.value + "条数据");
|
|
|
|
|
+ ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
|
|
+ for (ScoreDoc scoreDoc : scoreDocs) {
|
|
|
|
|
+ Document document = indexReader.document(scoreDoc.doc);
|
|
|
|
|
+ log.info(document.toString());
|
|
|
|
|
+ //log.info("id={},name={},poems={},success={},score={}", document.get("id"), document.get("name"), document.get("poems"), document.get("success"), scoreDoc.score);
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
|
+ e.printStackTrace();
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static void termQuery() {
|
|
|
|
|
+ Query query = new TermQuery(new Term("name", "李白"));
|
|
|
|
|
+ query(query);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static PrefixQuery prefixQuery(String field, String startWith) {
|
|
|
|
|
+ return new PrefixQuery(new Term(field, startWith));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static void prefixQuery1(String field, String startWith) {
|
|
|
|
|
+ Query query1 = new TermQuery(new Term("tableName", "RujiaHotel"));
|
|
|
|
|
+ Query query2 = new PrefixQuery(new Term(field, startWith));
|
|
|
|
|
+
|
|
|
|
|
+ BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
|
|
|
|
+ builder.add(query1, BooleanClause.Occur.MUST);
|
|
|
|
|
+ builder.add(query2, BooleanClause.Occur.MUST);
|
|
|
|
|
+ BooleanQuery booleanQuery = builder.build();
|
|
|
|
|
+ query(booleanQuery);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static void wildcardQuery() {
|
|
|
|
|
+ Query query = new WildcardQuery(new Term("name", "李?"));
|
|
|
|
|
+ query(query);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static void fuzzyQuery() {
|
|
|
|
|
+ //"李百"->"李白",只需修改一次,故可以搜索到数据;"里百"则搜索不到数据
|
|
|
|
|
+ Query query = new FuzzyQuery(new Term("name", "里百"), 1);
|
|
|
|
|
+ query(query);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static void numberQuery() {
|
|
|
|
|
+ //精确查询
|
|
|
|
|
+ Query query = LongPoint.newExactQuery("id", 123456);
|
|
|
|
|
+ query(query);
|
|
|
|
|
+
|
|
|
|
|
+ //范围查询
|
|
|
|
|
+ query = LongPoint.newRangeQuery("id", 123L, 12345678L);
|
|
|
|
|
+ query(query);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static void booleanQuery() {
|
|
|
|
|
+ Query query1 = new TermQuery(new Term("name", "李白"));
|
|
|
|
|
+ Query query2 = new TermQuery(new Term("name", "杜甫"));
|
|
|
|
|
+ BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
|
|
|
|
+ builder.add(query1, BooleanClause.Occur.SHOULD);
|
|
|
|
|
+ builder.add(query2, BooleanClause.Occur.SHOULD);
|
|
|
|
|
+ BooleanQuery booleanQuery = builder.build();
|
|
|
|
|
+ query(booleanQuery);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static void boostQuery() {
|
|
|
|
|
+ Query query1 = new BoostQuery(new TermQuery(new Term("name", "李白")), 1.5f);
|
|
|
|
|
+ Query query2 = new BoostQuery(new TermQuery(new Term("name", "杜甫")), 1.6f);
|
|
|
|
|
+ BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
|
|
|
|
+ builder.add(query1, BooleanClause.Occur.SHOULD);
|
|
|
|
|
+ builder.add(query2, BooleanClause.Occur.SHOULD);
|
|
|
|
|
+ BooleanQuery booleanQuery = builder.build();
|
|
|
|
|
+
|
|
|
|
|
+ query(booleanQuery);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static void queryParserQuery() throws ParseException {
|
|
|
|
|
+ IKAnalyzer ikAnalyzer = new IKAnalyzer();
|
|
|
|
|
+ QueryParser queryParser = new QueryParser("name", ikAnalyzer);
|
|
|
|
|
+ Query query = queryParser.parse("李白和杜甫");
|
|
|
|
|
+ query(query);
|
|
|
|
|
+
|
|
|
|
|
+ //多字段查询
|
|
|
|
|
+ MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(new String[]{"name", "about"}, ikAnalyzer);
|
|
|
|
|
+ query = multiFieldQueryParser.parse("李白和子美");
|
|
|
|
|
+ query(query);
|
|
|
|
|
+ }
|
|
|
|
|
+}
|