Highlight only queried fields (#1887)

Expert queries highlight only the fields which are used in the query.
This commit is contained in:
Sebastian Sdorra
2021-12-07 08:08:21 +01:00
committed by GitHub
parent ad5bbfeef3
commit 9375d2694c
6 changed files with 79 additions and 9 deletions

View File

@@ -0,0 +1,2 @@
- type: fixed
description: Highlight only queried fields ([#1887](https://github.com/scm-manager/scm-manager/pull/1887))

View File

@@ -26,10 +26,17 @@ package sonia.scm.search;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public final class LuceneHighlighter {
@@ -42,11 +49,25 @@ public final class LuceneHighlighter {
private final Analyzer analyzer;
private final Highlighter highlighter;
private final Set<String> queriedFields = new HashSet<>();
public LuceneHighlighter(Analyzer analyzer, Query query) {
this.analyzer = analyzer;
QueryScorer scorer = new QueryScorer(query);
this.highlighter = new Highlighter(new SimpleHTMLFormatter(PRE_TAG, POST_TAG), scorer);
this.highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, FRAGMENT_SIZE));
query.visit(new QueryVisitor() {
@Override
public boolean acceptField(String field) {
queriedFields.add(field);
return super.acceptField(field);
}
});
}
public boolean isHighlightable(LuceneSearchableField field) {
return field.isHighlighted() && queriedFields.contains(field.getName());
}
public String[] highlight(String fieldName, Indexed.Analyzer fieldAnalyzer, String value) throws InvalidTokenOffsetsException, IOException {

View File

@@ -132,8 +132,8 @@ public class LuceneQueryBuilder<T> extends QueryBuilder<T> {
}
public Query createBestGuessQuery(LuceneSearchableType searchableType, QueryBuilder.QueryParams queryParams) throws QueryNodeException, IOException {
String[] fieldNames = searchableType.getFieldNames();
if (fieldNames == null || fieldNames.length == 0) {
String[] defaultFieldNames = searchableType.getDefaultFieldNames();
if (defaultFieldNames == null || defaultFieldNames.length == 0) {
throw new NoDefaultQueryFieldsFoundException(searchableType.getType());
}
@@ -141,7 +141,7 @@ public class LuceneQueryBuilder<T> extends QueryBuilder<T> {
boolean hasWildcard = containsWildcard(queryString);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (String fieldName : fieldNames) {
for (String fieldName : defaultFieldNames) {
Query query;
if (!hasWildcard) {
query = createWildcardQuery(fieldName, queryString);

View File

@@ -29,7 +29,6 @@ import lombok.Value;
import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
@@ -47,7 +46,7 @@ public class LuceneSearchableType implements SearchableType {
String name;
String permission;
List<LuceneSearchableField> fields;
String[] fieldNames;
String[] defaultFieldNames;
Map<String, Float> boosts;
Map<String, PointsConfig> pointsConfig;
TypeConverter typeConverter;
@@ -57,7 +56,7 @@ public class LuceneSearchableType implements SearchableType {
this.name = Names.create(type, annotation);
this.permission = Strings.emptyToNull(annotation.permission());
this.fields = fields;
this.fieldNames = fieldNames(fields);
this.defaultFieldNames = defaultFieldNames(fields);
this.boosts = boosts(fields);
this.pointsConfig = pointsConfig(fields);
this.typeConverter = TypeConverters.create(type);
@@ -67,7 +66,7 @@ public class LuceneSearchableType implements SearchableType {
return Optional.ofNullable(permission);
}
private String[] fieldNames(List<LuceneSearchableField> fields) {
private String[] defaultFieldNames(List<LuceneSearchableField> fields) {
return fields.stream()
.filter(LuceneSearchableField::isDefaultQuery)
.map(LuceneSearchableField::getName)

View File

@@ -74,7 +74,7 @@ public class QueryResultFactory {
private Optional<Hit.Field> field(Document document, LuceneSearchableField field) throws IOException, InvalidTokenOffsetsException {
Object value = field.value(document);
if (value != null) {
if (field.isHighlighted()) {
if (highlighter.isHighlightable(field)) {
String[] fragments = createFragments(field, value.toString());
if (fragments.length > 0) {
return of(new Hit.HighlightedField(fragments));

View File

@@ -30,16 +30,25 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import java.io.IOException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class LuceneHighlighterTest {
@Test
void shouldHighlightText() throws InvalidTokenOffsetsException, IOException {
StandardAnalyzer analyzer = new StandardAnalyzer();
@@ -99,6 +108,45 @@ class LuceneHighlighterTest {
assertThat(snippets).hasSize(1);
}
@Nested
class IsHighlightableTests {
@Mock
private LuceneSearchableField field;
private LuceneHighlighter highlighter;
@BeforeEach
void setUpHighlighter() {
Query query = new TermQuery(new Term("content", "ka"));
highlighter = new LuceneHighlighter(new StandardAnalyzer(), query);
}
@Test
void shouldReturnFalseForNonHighlightedField() {
when(field.isHighlighted()).thenReturn(false);
assertThat(highlighter.isHighlightable(field)).isFalse();
}
@Test
void shouldReturnFalseIfNotInQuery() {
when(field.isHighlighted()).thenReturn(true);
when(field.getName()).thenReturn("name");
assertThat(highlighter.isHighlightable(field)).isFalse();
}
@Test
void shouldReturnTrue() {
when(field.isHighlighted()).thenReturn(true);
when(field.getName()).thenReturn("content");
assertThat(highlighter.isHighlightable(field)).isTrue();
}
}
private String[] highlightCode(String resource, String search) throws IOException, InvalidTokenOffsetsException {
NonNaturalLanguageAnalyzer analyzer = new NonNaturalLanguageAnalyzer();
Query query = new TermQuery(new Term("content", search));