<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<lucene.version>9.12.1</lucene.version>
<hutool.all.version>5.8.26</hutool.all.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<!-- Lucene Search engines must -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<!--9.x renamed analyzers to analysis-->
<artifactId>lucene-analysis-common</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queries</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>${lucene.version}</version>
</dependency>
<!-- Chinese word segmentation dependence -->
<dependency>
<groupId>org.apache.lucene</groupId>
<!--9.x renamed analyzers to analysis-->
<artifactId> lucene-analysis-smartcn</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>com.github.magese</groupId>
<artifactId>ik-analyzer</artifactId>
<version>8.5.0</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>${hutool.all.version}</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.38</version>
</dependency>
</dependencies>
jdk 17
抽离公用部分源码方便展示
public static void showSearchResults(IndexSearcher indexSearcher, ScoreDoc[] hits) throws IOException {
System.out.println("------------------------结果呈现 START------------------------------");
System.out.printf("找到 %s 个结果%n", hits.length);
for (ScoreDoc scoreDoc : hits) {
int docId = scoreDoc.doc;
float score = scoreDoc.score;
Document document = indexSearcher.doc(docId);
StringBuffer sb = new StringBuffer();
document.getFields().forEach(field -> {
String name = field.name();
String value = field.stringValue();
sb.append(name).append("=").append(value).append(" , ");
});
System.out.printf(sb+"score: %s%n", score);
}
System.out.println("------------------------结果呈现 END------------------------------");
}
public static void addDocument(IndexWriter indexWriter,String name,Long price,String sn)throws IOException{
Document doc = new Document();
doc.add(new TextField("name", name, Field.Store.YES));
doc.add(new LongPoint("price", price));
doc.add(new StoredField("price", price));
doc.add(new StringField("sn", sn, Field.Store.YES));
indexWriter.addDocument(doc);
}
static String path="src/main/resources/index/app3";
StringField TextField
public static void main(String[] args) throws IOException, org.apache.lucene.queryparser.classic.ParseException {
//索引存放目录
try (FSDirectory directory = FSDirectory.open(Paths.get(path));
//分词器
Analyzer analyzer = new SmartChineseAnalyzer();
IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));
){
//创建索引
addDocument(indexWriter,"风冷变频节能空调小米1PRO",1999L,"XIAOMI-1PRO");
addDocument(indexWriter,"变频节能好空调美的X1",2999L,"Media-X1");
addDocument(indexWriter,"变频节能风管机空调小米G1Pro",3999L,"XIAOMI-G1PRO");
addDocument(indexWriter,"变频节能风扇小米F001",299L,"XIAOMI-F001");
addDocument(indexWriter,"变频节能风扇美的FX01",399L,"Media-FX01");
//提交索引写到文件
indexWriter.commit();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(directory));
//查询
Query query = new QueryParser("name",analyzer).parse("风冷空调");
TopDocs topDocs_10 = indexSearcher.search(query, 10);
ScoreDoc[] scoreDocArray = topDocs_10.scoreDocs;
showSearchResults(indexSearcher,scoreDocArray);
//测试索引删除
indexWriter.deleteAll();
}
}
查询结果:
修改查询词
Query query = new QueryParser("name",analyzer).parse("变频风扇");
查询结果
StringField TextField
Query query = new PrefixQuery(new Term("name","变频"));
查询结果
是不是感觉和平时的前缀查询结果不一致。是因为PrefixQuery查询器,查询的字段是参与分词的,所以的前缀是相对于分词后的词匹配的,并非分词前数据
sn试试
Query query = new PrefixQuery(new Term("sn","Media"));
查询结果:
由于没分词,所以结果和普通的前缀查询一样。
StringField TextField
Query query = new WildcardQuery(new Term("sn","XIAOMI*"));
查询结果
提示:查询分词字段,则匹配的是分词后的词语,与上方前缀查询逻辑一致
StringField TextField
Query query = new TermQuery(new Term("name","风扇"));
执行结果:
提示:这里的精确查询是指存的索引词精确,由于上面两个通过分词器存入索引时候分出来了风扇词语,所以能精确匹配。另外可与
StringField
配合实现 ID搜索功能
Query query = LongPoint.newRangeQuery("price", 2000, 5000);
执行结果示例:
提示:前面字段用啥类型存放的这里用啥类型创建查询器,注意范围查询包含两边边界值。
示例
Query query = new MultiFieldQueryParser(new String[]{"name", "sn"}, analyzer).parse("Media-FX01");
应用场景:一个文档中含有“标题”,“正文”等字段,搜索一个关键词,不管它在标题中出现还是在正文中出现都算符合条件。这时,我们就用到了多域查询。
BooleanQuery 查询器支持的字段类型与子查询器有关,BooleanQuery本身并不创建查询具体条件只是拼接各种查询条件
一个条件示例1:价格查询
BooleanQuery.Builder builder = new BooleanQuery.Builder();
Query price = LongPoint.newRangeQuery("price", 2000, 5000);
builder.add(price, BooleanClause.Occur.SHOULD);
BooleanQuery query = builder.build();
查询结果:
一个条件示例1:名称查询
BooleanQuery.Builder builder = new BooleanQuery.Builder();
Query name = new QueryParser("name",analyzer).parse("风扇");
builder.add(name, BooleanClause.Occur.SHOULD);
BooleanQuery query = builder.build();
查询结果:
两个条件查询:或
BooleanQuery.Builder builder = new BooleanQuery.Builder();
Query price = LongPoint.newRangeQuery("price", 2000, 5000);
Query name = new QueryParser("name",analyzer).parse("风扇");
builder.add(price, BooleanClause.Occur.SHOULD);
builder.add(name, BooleanClause.Occur.SHOULD);
BooleanQuery query = builder.build();
查询结果:
两个条件查询:且
BooleanQuery.Builder builder = new BooleanQuery.Builder();
Query price = LongPoint.newRangeQuery("price", 2000, 5000);
Query name = new QueryParser("name",analyzer).parse("风扇");
builder.add(price, BooleanClause.Occur.MUST);
builder.add(name, BooleanClause.Occur.MUST);
BooleanQuery query = builder.build();
查询结果:
提示:没有结果是正常的,查看上面的两个单一查询,发现结果并没有交集,所以这里没找到结果;
两个条件:Filter
Filter:参与关系为 或,但是不参与评分
BooleanQuery.Builder builder = new BooleanQuery.Builder();
Query price = LongPoint.newRangeQuery("price", 2000, 5000);
Query name = new QueryParser("name",analyzer).parse("风扇");
builder.add(price, BooleanClause.Occur.SHOULD);
builder.add(name, BooleanClause.Occur.FILTER);
BooleanQuery query = builder.build();
查询结果:
查询说名:一个查询,多个索引目录
try (DirectoryReader reader1 = DirectoryReader.open(directory);
DirectoryReader reader2 = DirectoryReader.open(directory);
MultiReader multiReader = new MultiReader(reader1,reader2);
){
IndexSearcher indexSearcher = new IndexSearcher(multiReader);
Query query = new QueryParser("sn", analyzer).parse("Media-FX01");
System.out.println(query);
TopDocs topDocs_10 = indexSearcher.search(query, 10);
}
https://blog.xqlee.com/article/250429151218455.html