Remove unsafe index options api (#1787)

The IndexOptions api has several problems:
- It is possible to open the same index with different options, which could lead to scoring problems
- If the index is already opened from another task, the options are ignored and the one from the opening task are used
- The analyzer which is derived from the options is used for every field which has not configured a specific analyzer
- This change removes the options api completely.

Co-authored-by: Konstantin Schaper <konstantin.schaper@cloudogu.com>
This commit is contained in:
Sebastian Sdorra
2021-08-31 14:03:16 +02:00
committed by GitHub
parent 61c2ebe80e
commit 765a39e4ce
15 changed files with 23 additions and 229 deletions

View File

@@ -1,104 +0,0 @@
/*
* MIT License
*
* Copyright (c) 2020-present Cloudogu GmbH and Contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package sonia.scm.search;
import com.google.common.annotations.Beta;
import lombok.EqualsAndHashCode;
import java.io.Serializable;
import java.util.Locale;
/**
* Options to configure how things are indexed and searched.
*
* @since 2.21.0
*/
@Beta
@EqualsAndHashCode
public class IndexOptions implements Serializable {
private final Type type;
private final Locale locale;
private IndexOptions(Type type, Locale locale) {
this.type = type;
this.locale = locale;
}
/**
* Returns the type of the index.
* @return type of index
*/
public Type getType() {
return type;
}
/**
* Returns the locale of the index content.
*
* @return locale of index content
*/
public Locale getLocale() {
return locale;
}
/**
* Returns the default index options which should match most of the use cases.
*
* @return default index options
*/
public static IndexOptions defaults() {
return new IndexOptions(Type.GENERIC, Locale.ENGLISH);
}
/**
* Returns index options for a specific language.
* This options should be used if the content is written in a specific natural language.
*
* @param locale natural language of content
*
* @return options for content in natural language
*/
public static IndexOptions naturalLanguage(Locale locale) {
return new IndexOptions(Type.NATURAL_LANGUAGE, locale);
}
/**
* Type of indexing.
*/
public enum Type {
/**
* Not specified content.
*/
GENERIC,
/**
* Content in natural language.
*/
NATURAL_LANGUAGE;
}
}

View File

@@ -105,15 +105,6 @@ public interface SearchEngine {
return forResource(resource.getId());
}
/**
* Specify options for the index.
* If not used the default options will be used.
* @param options index options
* @return {@code this}
* @see IndexOptions#defaults()
*/
ForIndices withOptions(IndexOptions options);
/**
* Submits the task and execute it for every index
* which are matching the predicate ({@link #matching(Predicate)}.
@@ -145,15 +136,6 @@ public interface SearchEngine {
*/
interface ForType<T> {
/**
* Specify options for the index.
* If not used the default options will be used.
* @param options index options
* @return {@code this}
* @see IndexOptions#defaults()
*/
ForType<T> withOptions(IndexOptions options);
/**
* Name of the index which should be used.
* If not specified the default index will be used.

View File

@@ -25,45 +25,16 @@
package sonia.scm.search;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import javax.annotation.Nonnull;
import java.util.HashMap;
import java.util.Map;
public class AnalyzerFactory {
@Nonnull
public Analyzer create(IndexOptions options) {
if (options.getType() == IndexOptions.Type.NATURAL_LANGUAGE) {
return createNaturalLanguageAnalyzer(options.getLocale().getLanguage());
}
return createDefaultAnalyzer();
}
private Analyzer createDefaultAnalyzer() {
return new StandardAnalyzer();
}
private Analyzer createNaturalLanguageAnalyzer(String lang) {
switch (lang) {
case "en":
return new EnglishAnalyzer();
case "de":
return new GermanAnalyzer();
case "es":
return new SpanishAnalyzer();
default:
return createDefaultAnalyzer();
}
}
public Analyzer create(LuceneSearchableType type, IndexOptions options) {
Analyzer defaultAnalyzer = create(options);
public Analyzer create(LuceneSearchableType type) {
Analyzer defaultAnalyzer = createDefaultAnalyzer();
Map<String, Analyzer> analyzerMap = new HashMap<>();
for (LuceneSearchableField field : type.getAllFields()) {
@@ -73,6 +44,10 @@ public class AnalyzerFactory {
return new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerMap);
}
private Analyzer createDefaultAnalyzer() {
return new StandardAnalyzer();
}
private void addFieldAnalyzer(Map<String, Analyzer> analyzerMap, LuceneSearchableField field) {
if (field.getAnalyzer() != Indexed.Analyzer.DEFAULT) {
analyzerMap.put(field.getName(), new NonNaturalLanguageAnalyzer());

View File

@@ -87,7 +87,7 @@ public class IndexManager {
}
public IndexWriter openForWrite(IndexParams indexParams) {
IndexWriterConfig config = new IndexWriterConfig(analyzerFactory.create(indexParams.getSearchableType(), indexParams.getOptions()));
IndexWriterConfig config = new IndexWriterConfig(analyzerFactory.create(indexParams.getSearchableType()));
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
Path path = resolveIndexDirectory(indexParams);

View File

@@ -31,7 +31,6 @@ public class IndexParams implements IndexDetails {
String index;
LuceneSearchableType searchableType;
IndexOptions options;
@Override
public Class<?> getType() {
@@ -47,4 +46,5 @@ public class IndexParams implements IndexDetails {
public String toString() {
return searchableType.getName() + "/" + index;
}
}

View File

@@ -33,7 +33,6 @@ public abstract class LuceneIndexTask implements Runnable, Serializable {
private final Class<?> type;
private final String indexName;
private final IndexOptions options;
private transient LuceneIndexFactory indexFactory;
private transient SearchableTypeResolver searchableTypeResolver;
@@ -42,7 +41,6 @@ public abstract class LuceneIndexTask implements Runnable, Serializable {
protected LuceneIndexTask(IndexParams params) {
this.type = params.getSearchableType().getType();
this.indexName = params.getIndex();
this.options = params.getOptions();
}
@Inject
@@ -66,7 +64,7 @@ public abstract class LuceneIndexTask implements Runnable, Serializable {
public void run() {
LuceneSearchableType searchableType = searchableTypeResolver.resolve(type);
IndexTask<?> task = task(injector);
try (LuceneIndex index = indexFactory.create(new IndexParams(indexName, searchableType, options))) {
try (LuceneIndex index = indexFactory.create(new IndexParams(indexName, searchableType))) {
task.update(index);
}
task.afterUpdate();

View File

@@ -42,7 +42,7 @@ public class LuceneQueryBuilderFactory {
indexManager,
indexParams.getIndex(),
indexParams.getSearchableType(),
analyzerFactory.create(indexParams.getSearchableType(), indexParams.getOptions())
analyzerFactory.create(indexParams.getSearchableType())
);
}

View File

@@ -101,7 +101,6 @@ public class LuceneSearchEngine implements SearchEngine {
private final List<String> resources = new ArrayList<>();
private Predicate<IndexDetails> predicate = details -> true;
private IndexOptions options = IndexOptions.defaults();
@Override
public ForIndices matching(Predicate<IndexDetails> predicate) {
@@ -109,12 +108,6 @@ public class LuceneSearchEngine implements SearchEngine {
return this;
}
@Override
public ForIndices withOptions(IndexOptions options) {
this.options = options;
return this;
}
@Override
public ForIndices forResource(String resource) {
this.resources.add(resource);
@@ -135,7 +128,7 @@ public class LuceneSearchEngine implements SearchEngine {
indexManager.all()
.stream()
.filter(predicate)
.map(details -> new IndexParams(details.getName(), resolver.resolve(details.getType()), options))
.map(details -> new IndexParams(details.getName(), resolver.resolve(details.getType())))
.forEach(consumer);
}
@@ -147,7 +140,6 @@ public class LuceneSearchEngine implements SearchEngine {
class LuceneForType<T> implements ForType<T> {
private final LuceneSearchableType searchableType;
private IndexOptions options = IndexOptions.defaults();
private String index = "default";
private final List<String> resources = new ArrayList<>();
@@ -155,12 +147,6 @@ public class LuceneSearchEngine implements SearchEngine {
this.searchableType = searchableType;
}
@Override
public ForType<T> withOptions(IndexOptions options) {
this.options = options;
return this;
}
@Override
public ForType<T> withIndex(String index) {
this.index = index;
@@ -168,7 +154,7 @@ public class LuceneSearchEngine implements SearchEngine {
}
private IndexParams params() {
return new IndexParams(index, searchableType, options);
return new IndexParams(index, searchableType);
}
@Override

View File

@@ -43,46 +43,6 @@ class AnalyzerFactoryTest {
private final AnalyzerFactory analyzerFactory = new AnalyzerFactory();
@Nested
class FromIndexOptionsTests {
@Test
void shouldReturnStandardAnalyzer() {
Analyzer analyzer = analyzerFactory.create(IndexOptions.defaults());
assertThat(analyzer).isInstanceOf(StandardAnalyzer.class);
}
@Test
void shouldReturnStandardAnalyzerForUnknownLocale() {
Analyzer analyzer = analyzerFactory.create(IndexOptions.naturalLanguage(Locale.CHINESE));
assertThat(analyzer).isInstanceOf(StandardAnalyzer.class);
}
@Test
void shouldReturnEnglishAnalyzer() {
Analyzer analyzer = analyzerFactory.create(IndexOptions.naturalLanguage(Locale.ENGLISH));
assertThat(analyzer).isInstanceOf(EnglishAnalyzer.class);
}
@Test
void shouldReturnGermanAnalyzer() {
Analyzer analyzer = analyzerFactory.create(IndexOptions.naturalLanguage(Locale.GERMAN));
assertThat(analyzer).isInstanceOf(GermanAnalyzer.class);
}
@Test
void shouldReturnGermanAnalyzerForLocaleGermany() {
Analyzer analyzer = analyzerFactory.create(IndexOptions.naturalLanguage(Locale.GERMANY));
assertThat(analyzer).isInstanceOf(GermanAnalyzer.class);
}
@Test
void shouldReturnSpanishAnalyzer() {
Analyzer analyzer = analyzerFactory.create(IndexOptions.naturalLanguage(new Locale("es", "ES")));
assertThat(analyzer).isInstanceOf(SpanishAnalyzer.class);
}
}
@Nested
class FromSearchableTypeTests {
@@ -98,8 +58,7 @@ class AnalyzerFactoryTest {
private void analyze(Class<?> type, String text, String field, String... expectedTokens) throws IOException {
LuceneSearchableType searchableType = SearchableTypes.create(type);
IndexOptions defaults = IndexOptions.defaults();
Analyzer analyzer = analyzerFactory.create(searchableType, defaults);
Analyzer analyzer = analyzerFactory.create(searchableType);
List<String> tokens = Analyzers.tokenize(analyzer, field, text);
assertThat(tokens).containsOnly(expectedTokens);
}

View File

@@ -73,7 +73,7 @@ class IndexManagerTest {
void createIndexWriterFactory(@TempDir Path tempDirectory) {
this.directory = tempDirectory;
when(context.resolve(Paths.get("index"))).thenReturn(tempDirectory.resolve("index"));
when(analyzerFactory.create(any(LuceneSearchableType.class), any(IndexOptions.class))).thenReturn(new SimpleAnalyzer());
when(analyzerFactory.create(any(LuceneSearchableType.class))).thenReturn(new SimpleAnalyzer());
when(pluginLoader.getUberClassLoader()).thenReturn(IndexManagerTest.class.getClassLoader());
indexManager = new IndexManager(context, pluginLoader, analyzerFactory);
}
@@ -139,10 +139,10 @@ class IndexManagerTest {
}
@SuppressWarnings({"rawtypes", "unchecked"})
private IndexWriter open(Class type, String indexName) throws IOException {
private IndexWriter open(Class type, String indexName) {
lenient().when(searchableType.getType()).thenReturn(type);
when(searchableType.getName()).thenReturn(type.getSimpleName().toLowerCase(Locale.ENGLISH));
return indexManager.openForWrite(new IndexParams(indexName, searchableType, IndexOptions.defaults()));
return indexManager.openForWrite(new IndexParams(indexName, searchableType));
}
@Test

View File

@@ -82,6 +82,6 @@ class LuceneIndexFactoryTest {
}
private IndexParams params(String indexName, Class<?> type) {
return new IndexParams(indexName, SearchableTypes.create(type), IndexOptions.defaults());
return new IndexParams(indexName, SearchableTypes.create(type));
}
}

View File

@@ -263,7 +263,7 @@ class LuceneIndexTest {
private <T> LuceneIndex<T> createIndex(Class<T> type, Supplier<IndexWriter> writerFactor) {
SearchableTypeResolver resolver = new SearchableTypeResolver(type);
return new LuceneIndex<>(
new IndexParams("default", resolver.resolve(type), IndexOptions.defaults()), writerFactor
new IndexParams("default", resolver.resolve(type)), writerFactor
);
}

View File

@@ -59,7 +59,7 @@ class LuceneInjectingIndexTaskTest {
Injector injector = createInjector();
LuceneSearchableType searchableType = resolver.resolve(User.class);
IndexParams params = new IndexParams("default", searchableType, IndexOptions.defaults());
IndexParams params = new IndexParams("default", searchableType);
LuceneInjectingIndexTask task = new LuceneInjectingIndexTask(params, InjectingTask.class);
injector.injectMembers(task);

View File

@@ -128,7 +128,7 @@ class LuceneSearchEngineTest {
LuceneQueryBuilder<Repository> mockedBuilder = mock(LuceneQueryBuilder.class);
when(resolver.resolve(Repository.class)).thenReturn(searchableType);
IndexParams params = new IndexParams("default", searchableType, IndexOptions.defaults());
IndexParams params = new IndexParams("default", searchableType);
when(queryBuilderFactory.<Repository>create(params)).thenReturn(mockedBuilder);
QueryBuilder<Repository> queryBuilder = searchEngine.forType(Repository.class).search();
@@ -139,15 +139,13 @@ class LuceneSearchEngineTest {
@Test
@SuppressWarnings("unchecked")
void shouldDelegateSearch() {
IndexOptions options = IndexOptions.naturalLanguage(Locale.GERMAN);
LuceneQueryBuilder<Repository> mockedBuilder = mock(LuceneQueryBuilder.class);
when(resolver.resolve(Repository.class)).thenReturn(searchableType);
IndexParams params = new IndexParams("idx", searchableType, options);
IndexParams params = new IndexParams("idx", searchableType);
when(queryBuilderFactory.<Repository>create(params)).thenReturn(mockedBuilder);
QueryBuilder<Repository> queryBuilder = searchEngine.forType(Repository.class).withIndex("idx").withOptions(options).search();
QueryBuilder<Repository> queryBuilder = searchEngine.forType(Repository.class).withIndex("idx").search();
assertThat(queryBuilder).isSameAs(mockedBuilder);
}

View File

@@ -56,7 +56,7 @@ class LuceneSimpleIndexTaskTest {
LuceneSearchableType searchableType = resolver.resolve(Repository.class);
IndexParams params = new IndexParams("default", searchableType, IndexOptions.defaults());
IndexParams params = new IndexParams("default", searchableType);
AtomicReference<Index<?>> ref = new AtomicReference<>();
LuceneSimpleIndexTask task = new LuceneSimpleIndexTask(params, ref::set);