|
|
@@ -1,11 +1,16 @@
|
|
|
package cn.reghao.tnb.search.app.lucene;
|
|
|
|
|
|
+import cn.reghao.jutil.jdk.db.Page;
|
|
|
import cn.reghao.jutil.jdk.db.PageList;
|
|
|
+import cn.reghao.tnb.search.app.config.ElasticProperties;
|
|
|
import cn.reghao.tnb.search.app.model.vo.ElasticQuery;
|
|
|
+import cn.reghao.tnb.search.app.model.vo.LuceneQuery;
|
|
|
import cn.reghao.tnb.search.app.model.vo.SearchResult;
|
|
|
+import cn.reghao.tnb.search.app.model.vo.VideoQuery;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.apache.lucene.analysis.Analyzer;
|
|
|
import org.apache.lucene.document.Document;
|
|
|
+import org.apache.lucene.document.IntPoint;
|
|
|
import org.apache.lucene.index.DirectoryReader;
|
|
|
import org.apache.lucene.index.IndexReader;
|
|
|
import org.apache.lucene.index.Term;
|
|
|
@@ -33,120 +38,41 @@ import java.util.stream.Collectors;
|
|
|
@Slf4j
|
|
|
@Service
|
|
|
public class LuceneSearch {
|
|
|
- private String nativeLuceneDir = "/opt/data/search_data/native_lucene";
|
|
|
+ private String nativeLuceneDir;
|
|
|
private Analyzer luceneAnalyzer;
|
|
|
private SimpleHTMLFormatter formatter;
|
|
|
+ private Map<String, IndexReader> indexReaderMap = new HashMap<>();
|
|
|
|
|
|
- public LuceneSearch() {
|
|
|
+ public LuceneSearch(ElasticProperties elasticProperties) {
|
|
|
+ this.nativeLuceneDir = elasticProperties.getNativeLuceneDir();
|
|
|
this.luceneAnalyzer = new IKAnalyzer();
|
|
|
this.formatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>");
|
|
|
}
|
|
|
|
|
|
- public IndexSearcher getIndexSearcher(String indexName) throws IOException {
|
|
|
- String indexDir = String.format("%s/%s", nativeLuceneDir, indexName);
|
|
|
- Directory directory = FSDirectory.open(Paths.get(indexDir));
|
|
|
- IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
- IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
- return indexSearcher;
|
|
|
+ // TODO 并发处理
|
|
|
+ public IndexReader getIndexReader(String indexName) throws IOException {
|
|
|
+ IndexReader indexReader = indexReaderMap.get(indexName);
|
|
|
+ if (indexReader == null) {
|
|
|
+ synchronized (this) {
|
|
|
+ indexReader = getIndexReader0(indexName);
|
|
|
+ indexReaderMap.put(indexName, indexReader);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return indexReader;
|
|
|
}
|
|
|
|
|
|
- public IndexReader getIndexReader(String indexName) throws IOException {
|
|
|
+ public IndexReader getIndexReader0(String indexName) throws IOException {
|
|
|
String indexDir = String.format("%s/%s", nativeLuceneDir, indexName);
|
|
|
Directory directory = FSDirectory.open(Paths.get(indexDir));
|
|
|
- return DirectoryReader.open(directory);
|
|
|
+ IndexReader indexReader = DirectoryReader.open(directory);
|
|
|
+ return indexReader;
|
|
|
}
|
|
|
|
|
|
public PageList queryWithHighlight(ElasticQuery elasticQuery, Class clazz) {
|
|
|
- int pageSize = elasticQuery.getPageSize();
|
|
|
- int pageNumber = elasticQuery.getPageNumber();
|
|
|
- String indexName = elasticQuery.getIndexName();
|
|
|
- String highlightFieldName = elasticQuery.getHighlightFieldName();
|
|
|
- List<String> otherFiledNames = elasticQuery.getOtherFiledNames();
|
|
|
- String queryString = elasticQuery.getQueryString();
|
|
|
try {
|
|
|
- Set<String> queryFields = new HashSet<>(otherFiledNames);
|
|
|
- queryFields.add(highlightFieldName);
|
|
|
-
|
|
|
- Query query;
|
|
|
- if (otherFiledNames.isEmpty()) {
|
|
|
- // 单字段查询
|
|
|
- QueryParser queryParser = new QueryParser(highlightFieldName, luceneAnalyzer);
|
|
|
- query = queryParser.parse(queryString);
|
|
|
- } else {
|
|
|
- // 多字段查询
|
|
|
- List<String> fieldList = new ArrayList<>(otherFiledNames);
|
|
|
- fieldList.add(highlightFieldName);
|
|
|
- String[] fields = fieldList.toArray(new String[0]);
|
|
|
- MultiFieldQueryParser multiFieldQuery = new MultiFieldQueryParser(fields, luceneAnalyzer);
|
|
|
- query = multiFieldQuery.parse(queryString);
|
|
|
- }
|
|
|
-
|
|
|
- IndexReader indexReader = getIndexReader(indexName);
|
|
|
- IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
- long total;
|
|
|
- TopDocs topDocs;
|
|
|
- if (pageNumber == 1) {
|
|
|
- int count = pageSize;
|
|
|
- topDocs = indexSearcher.search(query, count);
|
|
|
- total = topDocs.totalHits.value;
|
|
|
- } else {
|
|
|
- int count = (pageNumber-1)*pageSize;
|
|
|
- TopDocs prevTopDocs = indexSearcher.searchAfter(null, query, count);
|
|
|
- total = prevTopDocs.totalHits.value;
|
|
|
-
|
|
|
- ScoreDoc[] prevScoreDocs = prevTopDocs.scoreDocs;
|
|
|
- ScoreDoc after = prevScoreDocs[prevScoreDocs.length-1];
|
|
|
- topDocs = indexSearcher.searchAfter(after, query, pageSize);
|
|
|
- }
|
|
|
-
|
|
|
- QueryScorer queryScorer = new QueryScorer(query);
|
|
|
- Highlighter highlighter = new Highlighter(formatter, queryScorer);
|
|
|
-
|
|
|
- Map<String, Field> map = Arrays.stream(clazz.getDeclaredFields())
|
|
|
- .collect(Collectors.toMap(Field::getName, k -> k));
|
|
|
- //List<T> list = new ArrayList<>();
|
|
|
- List<Object> list = new ArrayList<>();
|
|
|
- ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
- for (ScoreDoc scoreDoc : scoreDocs) {
|
|
|
- Document document = indexReader.document(scoreDoc.doc);
|
|
|
-
|
|
|
- //T tObject = null;
|
|
|
- Object object = null;
|
|
|
- for (Constructor<?> constructor : clazz.getDeclaredConstructors()) {
|
|
|
- int len = constructor.getParameterTypes().length;
|
|
|
- if (len == 0) {
|
|
|
- object = constructor.newInstance();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- for (Map.Entry<String, Field> entry : map.entrySet()) {
|
|
|
- String fieldName = entry.getKey();
|
|
|
- Field field0 = clazz.getDeclaredField(fieldName);
|
|
|
-
|
|
|
- String fieldValue = document.get(fieldName);
|
|
|
- /*if (fieldName.equals(highlightFieldName)) {
|
|
|
- fieldValue = highlighter.getBestFragment(luceneAnalyzer, highlightFieldName, fieldValue);
|
|
|
- }*/
|
|
|
- if (queryFields.contains(fieldName)) {
|
|
|
- fieldValue = highlighter.getBestFragment(luceneAnalyzer, fieldName, fieldValue);
|
|
|
- }
|
|
|
-
|
|
|
- field0.setAccessible(true);
|
|
|
- Class<?> fieldType = field0.getType();
|
|
|
- if (fieldType.equals(String.class)) {
|
|
|
- field0.set(object, fieldValue);
|
|
|
- } else if (field0.getType().equals(int.class) || field0.getType().equals(Integer.class)) {
|
|
|
- field0.set(object, Integer.parseInt(fieldValue));
|
|
|
- } else if (field0.getType().equals(long.class) || field0.getType().equals(Long.class)) {
|
|
|
- field0.set(object, Long.parseLong(fieldValue));
|
|
|
- } else if (field0.getType().equals(double.class) || field0.getType().equals(Double.class)) {
|
|
|
- field0.set(object, Double.parseDouble(fieldValue));
|
|
|
- }
|
|
|
- }
|
|
|
- list.add(object);
|
|
|
- }
|
|
|
-
|
|
|
- return PageList.pageList(pageNumber, pageSize, (int) total, list);
|
|
|
+ LuceneQuery luceneQuery = getLuceneQueryByElasticQuery(elasticQuery);
|
|
|
+ return getResults(luceneQuery, clazz);
|
|
|
} catch (Exception e) {
|
|
|
e.printStackTrace();
|
|
|
}
|
|
|
@@ -154,19 +80,60 @@ public class LuceneSearch {
|
|
|
return PageList.empty();
|
|
|
}
|
|
|
|
|
|
- public SearchResult search(String indexName, String keyword, int pageNumber, int pageSize) throws Exception {
|
|
|
- String field = "fullText";
|
|
|
- QueryParser queryParser = new QueryParser(field, luceneAnalyzer);
|
|
|
- Query query = queryParser.parse(keyword);
|
|
|
- Query query1 = new TermQuery(new Term(field, keyword));
|
|
|
+ private LuceneQuery getLuceneQueryByElasticQuery(ElasticQuery elasticQuery) throws Exception {
|
|
|
+ int pageSize = elasticQuery.getPageSize();
|
|
|
+ int pageNumber = elasticQuery.getPageNumber();
|
|
|
+ String indexName = elasticQuery.getIndexName();
|
|
|
+ List<String> queryFiledNames = elasticQuery.getQueryFiledNames();
|
|
|
+ Set<String> highlightFiledNames = new HashSet<>(elasticQuery.getHighlightFiledNames());
|
|
|
+ String queryString = elasticQuery.getQueryString();
|
|
|
+ Query query = getQueryByQueryParser(queryFiledNames, queryString);
|
|
|
+ return new LuceneQuery.Builder()
|
|
|
+ .pageSize(pageSize)
|
|
|
+ .pageNumber(pageNumber)
|
|
|
+ .indexName(indexName)
|
|
|
+ .highlightFiledNames(highlightFiledNames)
|
|
|
+ .query(query)
|
|
|
+ .build();
|
|
|
+ }
|
|
|
+
|
|
|
+ private LuceneQuery getLuceneQueryByVideoQuery(VideoQuery videoQuery) throws Exception {
|
|
|
+ int pageSize = videoQuery.getPageSize();
|
|
|
+ List<String> queryFiledNames = videoQuery.getQueryFiledNames();
|
|
|
+ Set<String> highlightFiledNames = new HashSet<>(videoQuery.getHighlightFiledNames());
|
|
|
+ int pageNumber = videoQuery.getPageNumber();
|
|
|
+ String indexName = videoQuery.getIndexName();
|
|
|
+
|
|
|
+ Query query1 = getQueryByQueryParser(queryFiledNames, videoQuery.getQueryString());
|
|
|
+ Query query2 = IntPoint.newSetQuery("scope", videoQuery.getScopes());
|
|
|
+ Query query = new BooleanQuery.Builder()
|
|
|
+ .add(query1, BooleanClause.Occur.MUST)
|
|
|
+ .add(query2, BooleanClause.Occur.MUST)
|
|
|
+ .build();
|
|
|
+
|
|
|
+ return new LuceneQuery.Builder()
|
|
|
+ .pageSize(pageSize)
|
|
|
+ .pageNumber(pageNumber)
|
|
|
+ .indexName(indexName)
|
|
|
+ .highlightFiledNames(highlightFiledNames)
|
|
|
+ .query(query)
|
|
|
+ .build();
|
|
|
+ }
|
|
|
+
|
|
|
+ private PageList getResults(LuceneQuery luceneQuery, Class clazz) throws Exception {
|
|
|
+ String indexName = luceneQuery.getIndexName();
|
|
|
+ int pageNumber = luceneQuery.getPageNumber();
|
|
|
+ int pageSize = luceneQuery.getPageSize();
|
|
|
+ Query query = luceneQuery.getQuery();
|
|
|
+ Set<String> highlightFiledNames = luceneQuery.getHighlightFiledNames();
|
|
|
|
|
|
IndexReader indexReader = getIndexReader(indexName);
|
|
|
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
long total;
|
|
|
TopDocs topDocs;
|
|
|
if (pageNumber == 1) {
|
|
|
- // topDocs = indexSearcher.search(query, pageSize);
|
|
|
- topDocs = indexSearcher.searchAfter(null, query, pageSize);
|
|
|
+ int count = pageSize;
|
|
|
+ topDocs = indexSearcher.search(query, count);
|
|
|
total = topDocs.totalHits.value;
|
|
|
} else {
|
|
|
int count = (pageNumber-1)*pageSize;
|
|
|
@@ -181,59 +148,86 @@ public class LuceneSearch {
|
|
|
QueryScorer queryScorer = new QueryScorer(query);
|
|
|
Highlighter highlighter = new Highlighter(formatter, queryScorer);
|
|
|
|
|
|
+ Map<String, Field> map = Arrays.stream(clazz.getDeclaredFields())
|
|
|
+ .collect(Collectors.toMap(Field::getName, k -> k));
|
|
|
+ //List<T> list = new ArrayList<>();
|
|
|
+ List<Object> list = new ArrayList<>();
|
|
|
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
- Map<String, String> map = new HashMap<>();
|
|
|
for (ScoreDoc scoreDoc : scoreDocs) {
|
|
|
Document document = indexReader.document(scoreDoc.doc);
|
|
|
- String id = document.get("id");
|
|
|
- String caseName = document.get("caseName");
|
|
|
- String fullText = document.get("fullText");
|
|
|
- String caseNameHighlight = highlighter.getBestFragment(luceneAnalyzer, field, fullText);
|
|
|
+
|
|
|
+ //T tObject = null;
|
|
|
+ Object object = null;
|
|
|
+ for (Constructor<?> constructor : clazz.getDeclaredConstructors()) {
|
|
|
+ int len = constructor.getParameterTypes().length;
|
|
|
+ if (len == 0) {
|
|
|
+ object = constructor.newInstance();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for (Map.Entry<String, Field> entry : map.entrySet()) {
|
|
|
+ String fieldName = entry.getKey();
|
|
|
+ Field field0 = clazz.getDeclaredField(fieldName);
|
|
|
+
|
|
|
+ String fieldValue = document.get(fieldName);
|
|
|
+ if (highlightFiledNames.contains(fieldName)) {
|
|
|
+ fieldValue = highlighter.getBestFragment(luceneAnalyzer, fieldName, fieldValue);
|
|
|
+ }
|
|
|
+
|
|
|
+ field0.setAccessible(true);
|
|
|
+ Class<?> fieldType = field0.getType();
|
|
|
+ if (fieldType.equals(String.class)) {
|
|
|
+ field0.set(object, fieldValue);
|
|
|
+ } else if (field0.getType().equals(int.class) || field0.getType().equals(Integer.class)) {
|
|
|
+ field0.set(object, Integer.parseInt(fieldValue));
|
|
|
+ } else if (field0.getType().equals(long.class) || field0.getType().equals(Long.class)) {
|
|
|
+ field0.set(object, Long.parseLong(fieldValue));
|
|
|
+ } else if (field0.getType().equals(double.class) || field0.getType().equals(Double.class)) {
|
|
|
+ field0.set(object, Double.parseDouble(fieldValue));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ list.add(object);
|
|
|
}
|
|
|
|
|
|
- return new SearchResult(total, map);
|
|
|
+ return PageList.pageList(pageNumber, pageSize, (int) total, list);
|
|
|
}
|
|
|
|
|
|
- public SearchResult highlighter(String indexName, String keyword, int pageSize, int pageNumber)
|
|
|
- throws IOException, InvalidTokenOffsetsException, ParseException {
|
|
|
- String field = "title";
|
|
|
- QueryParser queryParser = new QueryParser(field, luceneAnalyzer);
|
|
|
- Query query = queryParser.parse(keyword);
|
|
|
- TermQuery termQuery = new TermQuery(new Term(field, keyword));
|
|
|
- FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term(field, keyword), 1);
|
|
|
- PhraseQuery.Builder builder = new PhraseQuery.Builder();
|
|
|
- builder.add(new Term(field, keyword), 1);
|
|
|
- PhraseQuery phraseQuery = builder.build();
|
|
|
+ public PageList queryWithHighlight(VideoQuery videoQuery, Class clazz) {
|
|
|
+ try {
|
|
|
+ LuceneQuery luceneQuery = getLuceneQueryByVideoQuery(videoQuery);
|
|
|
+ return getResults(luceneQuery, clazz);
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
|
|
|
- IndexReader indexReader = getIndexReader(indexName);
|
|
|
- IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
|
|
- long total;
|
|
|
- TopDocs topDocs;
|
|
|
- if (pageNumber == 1) {
|
|
|
- topDocs = indexSearcher.search(termQuery, pageSize);
|
|
|
- total = topDocs.totalHits.value;
|
|
|
- } else {
|
|
|
- int count = (pageNumber-1)*pageSize;
|
|
|
- TopDocs prevTopDocs = indexSearcher.searchAfter(null, query, count);
|
|
|
- total = prevTopDocs.totalHits.value;
|
|
|
+ return PageList.empty();
|
|
|
+ }
|
|
|
|
|
|
- ScoreDoc[] prevScoreDocs = prevTopDocs.scoreDocs;
|
|
|
- ScoreDoc after = prevScoreDocs[prevScoreDocs.length-1];
|
|
|
- topDocs = indexSearcher.searchAfter(after, query, pageSize);
|
|
|
+ private Query getQueryByQueryParser(List<String> queryFiledNames, String queryString) throws Exception {
|
|
|
+ Query query;
|
|
|
+ if (queryFiledNames.size() == 1) {
|
|
|
+ // 单字段查询
|
|
|
+ QueryParser queryParser = new QueryParser(queryFiledNames.get(0), luceneAnalyzer);
|
|
|
+ query = queryParser.parse(queryString);
|
|
|
+ } else if (queryFiledNames.size() > 1) {
|
|
|
+ // 多字段查询
|
|
|
+ String[] fields = queryFiledNames.toArray(new String[0]);
|
|
|
+ MultiFieldQueryParser multiFieldQuery = new MultiFieldQueryParser(fields, luceneAnalyzer);
|
|
|
+ query = multiFieldQuery.parse(queryString);
|
|
|
+ } else {
|
|
|
+ throw new Exception("query field is empty");
|
|
|
}
|
|
|
|
|
|
- ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
- Map<String, String> map = new HashMap<>();
|
|
|
+ return query;
|
|
|
+ }
|
|
|
+
|
|
|
+ public String getHighlightText(String fieldName, String fieldValue, String queryString) throws Exception {
|
|
|
+ List<String> queryFiledNames = List.of(fieldName);
|
|
|
+ Query query = getQueryByQueryParser(queryFiledNames, queryString);
|
|
|
+
|
|
|
QueryScorer queryScorer = new QueryScorer(query);
|
|
|
Highlighter highlighter = new Highlighter(formatter, queryScorer);
|
|
|
- for (ScoreDoc scoreDoc : scoreDocs) {
|
|
|
- Document document = indexReader.document(scoreDoc.doc);
|
|
|
- String videoId = document.get("videoId");
|
|
|
- String title = document.get("title");
|
|
|
- String htmlTitle = highlighter.getBestFragment(luceneAnalyzer, field, title);
|
|
|
- map.put(videoId, htmlTitle);
|
|
|
- }
|
|
|
- return new SearchResult(total, map);
|
|
|
+ return highlighter.getBestFragment(luceneAnalyzer, fieldName, fieldValue);
|
|
|
}
|
|
|
|
|
|
public Document findDocumentById(String indexName, String id) {
|
|
|
@@ -246,6 +240,8 @@ public class LuceneSearch {
|
|
|
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
|
if (scoreDocs.length == 1) {
|
|
|
Document document = indexReader.document(scoreDocs[0].doc);
|
|
|
+ //highlighter.getBestFragment(luceneAnalyzer, fieldName, fieldValue);
|
|
|
+
|
|
|
return document;
|
|
|
}
|
|
|
} catch (Exception e) {
|