From 9375d2694c9fa403d6c029594c314e9146d272b2 Mon Sep 17 00:00:00 2001 From: Sebastian Sdorra Date: Tue, 7 Dec 2021 08:08:21 +0100 Subject: [PATCH] Highlight only queried fields (#1887) Expert queries highlight only the fields which are used in the query. --- .../changelog/highlight_queried_fields.yaml | 2 + .../sonia/scm/search/LuceneHighlighter.java | 23 ++++++++- .../sonia/scm/search/LuceneQueryBuilder.java | 6 +-- .../scm/search/LuceneSearchableType.java | 7 ++- .../sonia/scm/search/QueryResultFactory.java | 2 +- .../scm/search/LuceneHighlighterTest.java | 48 +++++++++++++++++++ 6 files changed, 79 insertions(+), 9 deletions(-) create mode 100644 gradle/changelog/highlight_queried_fields.yaml diff --git a/gradle/changelog/highlight_queried_fields.yaml b/gradle/changelog/highlight_queried_fields.yaml new file mode 100644 index 0000000000..fb09562360 --- /dev/null +++ b/gradle/changelog/highlight_queried_fields.yaml @@ -0,0 +1,2 @@ +- type: fixed + description: Highlight only queried fields ([#1887](https://github.com/scm-manager/scm-manager/pull/1887)) diff --git a/scm-webapp/src/main/java/sonia/scm/search/LuceneHighlighter.java b/scm-webapp/src/main/java/sonia/scm/search/LuceneHighlighter.java index a1cbd97a9e..b4445cb211 100644 --- a/scm-webapp/src/main/java/sonia/scm/search/LuceneHighlighter.java +++ b/scm-webapp/src/main/java/sonia/scm/search/LuceneHighlighter.java @@ -26,10 +26,17 @@ package sonia.scm.search; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.Query; -import org.apache.lucene.search.highlight.*; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; +import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import java.io.IOException; import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; public final class LuceneHighlighter { @@ -42,11 +49,25 @@ public final class LuceneHighlighter { private final Analyzer analyzer; private final Highlighter highlighter; + private final Set queriedFields = new HashSet<>(); + public LuceneHighlighter(Analyzer analyzer, Query query) { this.analyzer = analyzer; QueryScorer scorer = new QueryScorer(query); this.highlighter = new Highlighter(new SimpleHTMLFormatter(PRE_TAG, POST_TAG), scorer); this.highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, FRAGMENT_SIZE)); + + query.visit(new QueryVisitor() { + @Override + public boolean acceptField(String field) { + queriedFields.add(field); + return super.acceptField(field); + } + }); + } + + public boolean isHighlightable(LuceneSearchableField field) { + return field.isHighlighted() && queriedFields.contains(field.getName()); } public String[] highlight(String fieldName, Indexed.Analyzer fieldAnalyzer, String value) throws InvalidTokenOffsetsException, IOException { diff --git a/scm-webapp/src/main/java/sonia/scm/search/LuceneQueryBuilder.java b/scm-webapp/src/main/java/sonia/scm/search/LuceneQueryBuilder.java index dc7963a2e7..60de55a55b 100644 --- a/scm-webapp/src/main/java/sonia/scm/search/LuceneQueryBuilder.java +++ b/scm-webapp/src/main/java/sonia/scm/search/LuceneQueryBuilder.java @@ -132,8 +132,8 @@ public class LuceneQueryBuilder extends QueryBuilder { } public Query createBestGuessQuery(LuceneSearchableType searchableType, QueryBuilder.QueryParams queryParams) throws QueryNodeException, IOException { - String[] fieldNames = searchableType.getFieldNames(); - if (fieldNames == null || fieldNames.length == 0) { + String[] defaultFieldNames = searchableType.getDefaultFieldNames(); + if (defaultFieldNames == null || defaultFieldNames.length == 0) { throw new NoDefaultQueryFieldsFoundException(searchableType.getType()); } @@ -141,7 +141,7 @@ public class LuceneQueryBuilder extends QueryBuilder { boolean hasWildcard = containsWildcard(queryString); BooleanQuery.Builder builder = new BooleanQuery.Builder(); - for (String fieldName : fieldNames) { + for (String fieldName : defaultFieldNames) { Query query; if (!hasWildcard) { query = createWildcardQuery(fieldName, queryString); diff --git a/scm-webapp/src/main/java/sonia/scm/search/LuceneSearchableType.java b/scm-webapp/src/main/java/sonia/scm/search/LuceneSearchableType.java index 0a7c4711bd..638cd2a0cb 100644 --- a/scm-webapp/src/main/java/sonia/scm/search/LuceneSearchableType.java +++ b/scm-webapp/src/main/java/sonia/scm/search/LuceneSearchableType.java @@ -29,7 +29,6 @@ import lombok.Value; import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig; import javax.annotation.Nonnull; -import javax.annotation.Nullable; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -47,7 +46,7 @@ public class LuceneSearchableType implements SearchableType { String name; String permission; List fields; - String[] fieldNames; + String[] defaultFieldNames; Map boosts; Map pointsConfig; TypeConverter typeConverter; @@ -57,7 +56,7 @@ public class LuceneSearchableType implements SearchableType { this.name = Names.create(type, annotation); this.permission = Strings.emptyToNull(annotation.permission()); this.fields = fields; - this.fieldNames = fieldNames(fields); + this.defaultFieldNames = defaultFieldNames(fields); this.boosts = boosts(fields); this.pointsConfig = pointsConfig(fields); this.typeConverter = TypeConverters.create(type); @@ -67,7 +66,7 @@ public class LuceneSearchableType implements SearchableType { return Optional.ofNullable(permission); } - private String[] fieldNames(List fields) { + private String[] defaultFieldNames(List fields) { return fields.stream() .filter(LuceneSearchableField::isDefaultQuery) .map(LuceneSearchableField::getName) diff --git a/scm-webapp/src/main/java/sonia/scm/search/QueryResultFactory.java b/scm-webapp/src/main/java/sonia/scm/search/QueryResultFactory.java index 68ee7b2717..429b91c196 100644 --- a/scm-webapp/src/main/java/sonia/scm/search/QueryResultFactory.java +++ b/scm-webapp/src/main/java/sonia/scm/search/QueryResultFactory.java @@ -74,7 +74,7 @@ public class QueryResultFactory { private Optional field(Document document, LuceneSearchableField field) throws IOException, InvalidTokenOffsetsException { Object value = field.value(document); if (value != null) { - if (field.isHighlighted()) { + if (highlighter.isHighlightable(field)) { String[] fragments = createFragments(field, value.toString()); if (fragments.length > 0) { return of(new Hit.HighlightedField(fragments)); diff --git a/scm-webapp/src/test/java/sonia/scm/search/LuceneHighlighterTest.java b/scm-webapp/src/test/java/sonia/scm/search/LuceneHighlighterTest.java index 2e4d149247..ade813f995 100644 --- a/scm-webapp/src/test/java/sonia/scm/search/LuceneHighlighterTest.java +++ b/scm-webapp/src/test/java/sonia/scm/search/LuceneHighlighterTest.java @@ -30,16 +30,25 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; import java.io.IOException; import java.net.URL; import java.nio.charset.StandardCharsets; import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.when; +@ExtendWith(MockitoExtension.class) class LuceneHighlighterTest { + + @Test void shouldHighlightText() throws InvalidTokenOffsetsException, IOException { StandardAnalyzer analyzer = new StandardAnalyzer(); @@ -99,6 +108,45 @@ class LuceneHighlighterTest { assertThat(snippets).hasSize(1); } + @Nested + class IsHighlightableTests { + + @Mock + private LuceneSearchableField field; + + private LuceneHighlighter highlighter; + + @BeforeEach + void setUpHighlighter() { + Query query = new TermQuery(new Term("content", "ka")); + highlighter = new LuceneHighlighter(new StandardAnalyzer(), query); + } + + @Test + void shouldReturnFalseForNonHighlightedField() { + when(field.isHighlighted()).thenReturn(false); + + assertThat(highlighter.isHighlightable(field)).isFalse(); + } + + @Test + void shouldReturnFalseIfNotInQuery() { + when(field.isHighlighted()).thenReturn(true); + when(field.getName()).thenReturn("name"); + + assertThat(highlighter.isHighlightable(field)).isFalse(); + } + + @Test + void shouldReturnTrue() { + when(field.isHighlighted()).thenReturn(true); + when(field.getName()).thenReturn("content"); + + assertThat(highlighter.isHighlightable(field)).isTrue(); + } + + } + private String[] highlightCode(String resource, String search) throws IOException, InvalidTokenOffsetsException { NonNaturalLanguageAnalyzer analyzer = new NonNaturalLanguageAnalyzer(); Query query = new TermQuery(new Term("content", search));