在 Apache Lucene 中,Field 类是文档中存储数据的基础。不同类型的 Field 用于存储不同类型的数据(如文本、数字、二进制数据等)。
org.apache.lucene.document.TextField
static String path="src/resources/index/app2";
public static void main(String[] args) throws IOException, ParseException {
Document doc = new Document();
doc.add(new TextField("fieldName", "This is a sample text.", Field.Store.YES));
Document doc1 = new Document();
doc1.add(new TextField("fieldName", "Sample text.", Field.Store.YES));
//索引存放目录
try (FSDirectory directory = FSDirectory.open(Paths.get(path));
//分词器
Analyzer analyzer = new StandardAnalyzer();
IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));
){
//创建索引
indexWriter.addDocument(doc);
indexWriter.addDocument(doc1);
//提交索引写到文件
indexWriter.commit();
//查询索引
QueryParser queryParser = new QueryParser("fieldName", analyzer);
Query query = queryParser.parse("sample");
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
TopDocs topDocs_10 = indexSearcher.search(query, 10);
ScoreDoc[] scoreDocArray = topDocs_10.scoreDocs;
List<JSONObject> list = new ArrayList<>();
for (ScoreDoc scoreDoc : scoreDocArray) {
JSONObject jsonDoc = new JSONObject();
int docId = scoreDoc.doc;
Document document = indexSearcher.doc(docId);
jsonDoc.set("fieldName", document.get("fieldName"));
jsonDoc.set("score", scoreDoc.score);
jsonDoc.set("shardIndex", scoreDoc.shardIndex);
list.add(jsonDoc);
}
System.out.println(JSONUtil.toJsonStr(list));
//测试索引删除
indexWriter.deleteAll();
}
}
查询词:sample
查询词:a sample text
org.apache.lucene.document.StringField
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.Field.Store;
Document doc = new Document();
doc.add(new StringField("fieldName", "unique_identifier", Store.YES));
doc.add(new StringField("cover_image", "abc.jpg", Store.YES));
数字类型包含:IntField 、LongField、FloatField、DoubleField
org.apache.lucene.document.IntField
org.apache.lucene.document.LongField
org.apache.lucene.document.FloatField
org.apache.lucene.document.DoubleField
static String path="src/resources/index/app1";
public static void main(String[] args) throws IOException, ParseException {
//索引存放目录
try (FSDirectory directory = FSDirectory.open(Paths.get(path));
//分词器
Analyzer analyzer = new StandardAnalyzer();
IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));
){
//创建索引
addDocument(indexWriter,"Cb Fc Zhang3",8);
addDocument(indexWriter,"Cb Fb Lie",18);
addDocument(indexWriter,"Cb Fb Wang",6);
//提交索引写到文件
indexWriter.commit();
//数字类型 范围查询
Query query = LongField.newRangeQuery("age",8,30);
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
TopDocs topDocs_10 = indexSearcher.search(query, 10);
ScoreDoc[] scoreDocArray = topDocs_10.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocArray) {
int docId = scoreDoc.doc;
float score = scoreDoc.score;
Document document = indexSearcher.doc(docId);
System.out.printf("score: %s , name: %s ,age: %s%n", score, document.get("name"), document.get("age"));
}
//测试索引删除
indexWriter.deleteAll();
}
}
public static void addDocument(IndexWriter indexWriter,String name,Integer age)throws IOException{
Document doc = new Document();
doc.add(new TextField("name", name, Field.Store.YES));
doc.add(new LongField("age", age,Field.Store.YES));
indexWriter.addDocument(doc);
}
组合排序使用
特别提醒:有范围查询和排序需求的同一个字段,建议在索引文档创建两个类型,数字/数点类型用于范围查询,SortedDocValuesField 和 NumericDocValuesField 用于排序查询。
数点类型包含:IntPoint、LongPoint、FloatPoint、DoublePoint、BigIntegerPoint
org.apache.lucene.document.IntPoint
org.apache.lucene.document.LongPoint
org.apache.lucene.document.FloatPoint
org.apache.lucene.document.DoublePoint
StoredField
进行存储)。static String path="src/resources/index/app1";
public static void main(String[] args) throws IOException, ParseException {
//索引存放目录
try (FSDirectory directory = FSDirectory.open(Paths.get(path));
//分词器
Analyzer analyzer = new StandardAnalyzer();
IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));
){
//创建索引
addDocument(indexWriter,"Cb Fc Zhang3",8);
addDocument(indexWriter,"Cb Fb Lie",18);
addDocument(indexWriter,"Cb Fb Wang",6);
//提交索引写到文件
indexWriter.commit();
//Point 范围查询
Query query = IntPoint.newRangeQuery("age",8,30);
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
TopDocs topDocs_10 = indexSearcher.search(query, 10);
ScoreDoc[] scoreDocArray = topDocs_10.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocArray) {
int docId = scoreDoc.doc;
float score = scoreDoc.score;
Document document = indexSearcher.doc(docId);
System.out.printf("score: %s , name: %s ,age: %s%n", score, document.get("name"), document.get("age"));
}
//测试索引删除
indexWriter.deleteAll();
}
}
public static void addDocument(IndexWriter indexWriter,String name,Integer age)throws IOException{
Document doc = new Document();
doc.add(new TextField("name", name, Field.Store.YES));
doc.add(new IntPoint("age", age));
doc.add(new StoredField("age", age));
indexWriter.addDocument(doc);
}
可以看到只查询出来了8-30范围值,边界值包含在内。
org.apache.lucene.document.StoredField
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
Document doc = new Document();
doc.add(new StoredField("fieldName", "This is the stored content."));
org.apache.lucene.document.BinaryField
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.util.BytesRef;
Document doc = new Document();
byte[] byteArray = new byte[] {1, 2, 3, 4, 5};
doc.add(new StoredField("fieldName", new BytesRef(byteArray)));
包含:SortedDocValuesField 和 NumericDocValuesField
org.apache.lucene.document.SortedDocValuesField
org.apache.lucene.document.NumericDocValuesField
StoredField
存储原始值)。 static String path="src/resources/index/app1";
public static void main(String[] args) throws IOException, ParseException {
//索引存放目录
try (FSDirectory directory = FSDirectory.open(Paths.get(path));
//分词器
Analyzer analyzer = new StandardAnalyzer();
IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));
){
//创建索引
addDocument(indexWriter,"Cb Fc Zhang3",8);
addDocument(indexWriter,"Cb Fb Lie",18);
addDocument(indexWriter,"Cb Fb Wang",6);
//提交索引写到文件
indexWriter.commit();
//查询索引
Query query = new QueryParser("name", analyzer).parse("Cb");
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
// TopDocs topDocs_10 = indexSearcher.search(query, 10);
Sort sort=new Sort(new SortField[]{SortField.FIELD_SCORE,new SortField("age",SortField.Type.LONG,true)});
TopDocs topDocs_10 = indexSearcher.search(query, 10,sort);
ScoreDoc[] scoreDocArray = topDocs_10.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocArray) {
int docId = scoreDoc.doc;
Document document = indexSearcher.doc(docId);
System.out.printf("score: %s , name: %s ,age: %s%n", scoreDoc.score, document.get("name"), document.get("age"));
}
//测试索引删除
indexWriter.deleteAll();
}
}
public static void addDocument(IndexWriter indexWriter,String name,Integer age)throws IOException{
Document doc = new Document();
doc.add(new TextField("name", name, Field.Store.YES));
doc.add(new NumericDocValuesField("age", age));
doc.add(new StoredField("age", age));
indexWriter.addDocument(doc);
}
reverse
排序字段剖析 public SortField(String field, Type type, boolean reverse) {
this.initFieldType(field, type);
this.reverse = reverse;
}
构造函数中有个参数为:reverse
默认值 false,参考下图:
reverse
值说明:
眼尖的朋友可能已经发现排序后Score变成了NaN,原因是自定义了排序后,评分被移动到了fields里面,数组的分数和创建的排序顺序一致
重要提醒:
以上不同类型除开
StoredField
不要使用相同字段名会有问题
参考:
https://blog.xqlee.com/article/250428134000069.html