Fix query for enum fields (#1800)

The enum fields were not searchable, because they were stored without analysation or transformation, but if an enum field was searched for within a query, the StandardAnalyzer was used. This means that the enum was stored in the index as an uppercase string, but the query searches for lowercase (the StandardAnalyzer uses a lowercase filter).
To fix this problem we are now using the KeywordAnalyzer for every non tokenized field. The StandardAnalyzer is only used for tokenized fields, which does not specify an other analyzer such code, path or id.
For enum fields we have introduced a new analyzer which uses an uppercase filter by default, this makes it possible to ignore case during search for enum fields.
This commit is contained in:
Sebastian Sdorra
2021-09-08 10:56:57 +02:00
committed by GitHub
parent 49dfd66a4f
commit 5b4d032611
6 changed files with 105 additions and 10 deletions

View File

@@ -55,6 +55,7 @@ import java.io.IOException;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.List;
import java.util.Locale;
import java.util.function.Consumer;
import java.util.stream.Collectors;
@@ -545,6 +546,26 @@ class LuceneQueryBuilderTest {
assertThat(hit.getRepositoryId()).contains("4211");
}
@Test
void shouldQueryByEnumField() throws IOException {
try (IndexWriter writer = writer()) {
writer.addDocument(animalPerson("Trillian", Animal.PENGUIN));
}
QueryResult result = query(PersonWithAnimal.class, "animal:penguin");
assertThat(result.getTotalHits()).isOne();
}
@Test
void shouldQueryByEnumFieldAndIgnoreCase() throws IOException {
try (IndexWriter writer = writer()) {
writer.addDocument(animalPerson("Arthur", Animal.ALPACA));
}
QueryResult result = query(PersonWithAnimal.class, "animal:AlPaCa");
assertThat(result.getTotalHits()).isOne();
}
private QueryResult query(Class<?> type, String queryString) throws IOException {
return query(type, queryString, null, null);
}
@@ -555,7 +576,7 @@ class LuceneQueryBuilderTest {
LuceneSearchableType searchableType = resolver.resolve(type);
lenient().when(opener.openForRead(searchableType, "default")).thenReturn(reader);
LuceneQueryBuilder<T> builder = new LuceneQueryBuilder<T>(
opener, "default", searchableType, new StandardAnalyzer()
opener, "default", searchableType, new AnalyzerFactory().create(searchableType)
);
return builder.count(queryString).getTotalHits();
}
@@ -579,7 +600,7 @@ class LuceneQueryBuilderTest {
lenient().when(opener.openForRead(searchableType, "default")).thenReturn(reader);
LuceneQueryBuilder<T> builder = new LuceneQueryBuilder<>(
opener, "default", searchableType, new StandardAnalyzer()
opener, "default", searchableType, new AnalyzerFactory().create(searchableType)
);
consumer.accept(builder);
return builder.execute(queryString);
@@ -648,6 +669,13 @@ class LuceneQueryBuilderTest {
return document;
}
private Document animalPerson(String name, Animal animal) {
Document document = new Document();
document.add(new TextField("name", name, Field.Store.YES));
document.add(new StringField("animal", animal.name(), Field.Store.YES));
return document;
}
@Getter
@IndexedType
static class Types {
@@ -693,4 +721,18 @@ class LuceneQueryBuilderTest {
private String content;
}
enum Animal {
PENGUIN, ALPACA
}
@Getter
@IndexedType
static class PersonWithAnimal {
@Indexed(defaultQuery = true)
private String name;
@Indexed
private Animal animal;
}
}

View File

@@ -43,6 +43,17 @@ class ValueExtractorsTest {
assertThat(value).isEqualTo(Animal.PENGUIN);
}
@Test
void shouldExtractEnumLowerCaseValue() {
Document document = new Document();
document.add(new StoredField("animal", "alpaca"));
ValueExtractor extractor = ValueExtractors.create("animal", Animal.class);
Object value = extractor.extract(document);
assertThat(value).isEqualTo(Animal.ALPACA);
}
enum Animal {
PENGUIN, ALPACA
}