diff --git a/docs/changelog/129223.yaml b/docs/changelog/129223.yaml new file mode 100644 index 0000000000000..ec84ec52c8cf7 --- /dev/null +++ b/docs/changelog/129223.yaml @@ -0,0 +1,5 @@ +pr: 129223 +summary: Fix text similarity reranker does not propagate min score correctly +area: Search +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java index 6cf0af0ef1541..3f9353251b920 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java @@ -195,7 +195,8 @@ public void onFailure(Exception e) { RankDocsRetrieverBuilder rankDocsRetrieverBuilder = new RankDocsRetrieverBuilder( rankWindowSize, newRetrievers.stream().map(s -> s.retriever).toList(), - results::get + results::get, + this.minScore ); rankDocsRetrieverBuilder.retrieverName(retrieverName()); return rankDocsRetrieverBuilder; diff --git a/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java index a77f5327fbc26..0cdd5ab35adcd 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java @@ -33,13 +33,14 @@ public class RankDocsRetrieverBuilder extends RetrieverBuilder { final List sources; final Supplier rankDocs; - public RankDocsRetrieverBuilder(int rankWindowSize, List sources, Supplier rankDocs) { + public RankDocsRetrieverBuilder(int rankWindowSize, List sources, Supplier rankDocs, Float minScore) { this.rankWindowSize = rankWindowSize; this.rankDocs = rankDocs; if (sources == null || sources.isEmpty()) { throw new IllegalArgumentException("sources must not be null or empty"); } this.sources = sources; + this.minScore = minScore; } @Override @@ -48,7 +49,7 @@ public String getName() { } private boolean sourceHasMinScore() { - return minScore != null || sources.stream().anyMatch(x -> x.minScore() != null); + return this.minScore != null || sources.stream().anyMatch(x -> x.minScore() != null); } private boolean sourceShouldRewrite(QueryRewriteContext ctx) throws IOException { @@ -132,7 +133,7 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder searchSourceBuilder.size(rankWindowSize); } if (sourceHasMinScore()) { - searchSourceBuilder.minScore(this.minScore() == null ? Float.MIN_VALUE : this.minScore()); + searchSourceBuilder.minScore(this.minScore == null ? Float.MIN_VALUE : this.minScore); } if (searchSourceBuilder.size() + searchSourceBuilder.from() > rankDocResults.length) { searchSourceBuilder.size(Math.max(0, rankDocResults.length - searchSourceBuilder.from())); diff --git a/server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java b/server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java index eafab1d25c38e..165ad9b2de183 100644 --- a/server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java @@ -97,7 +97,7 @@ private List preFilters(QueryRewriteContext queryRewriteContext) t } private RankDocsRetrieverBuilder createRandomRankDocsRetrieverBuilder(QueryRewriteContext queryRewriteContext) throws IOException { - return new RankDocsRetrieverBuilder(randomIntBetween(1, 100), innerRetrievers(queryRewriteContext), rankDocsSupplier()); + return new RankDocsRetrieverBuilder(randomIntBetween(1, 100), innerRetrievers(queryRewriteContext), rankDocsSupplier(), null); } public void testExtractToSearchSourceBuilder() throws IOException { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index d724bae4ecb64..42e2c1b0e7ba9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -52,6 +52,7 @@ public Set getTestFeatures() { SemanticInferenceMetadataFieldsMapper.EXPLICIT_NULL_FIXES, SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED, TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX, + TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_MINSCORE_FIX, SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT, SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT, SEMANTIC_KNN_FILTER_FIX, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java index 4f913e28539ab..0a6ff009f367e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java @@ -23,6 +23,7 @@ import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -39,6 +40,7 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder public static final NodeFeature TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX = new NodeFeature( "text_similarity_reranker_alias_handling_fix" ); + public static final NodeFeature TEXT_SIMILARITY_RERANKER_MINSCORE_FIX = new NodeFeature("text_similarity_reranker_minscore_fix"); public static final ParseField RETRIEVER_FIELD = new ParseField("retriever"); public static final ParseField INFERENCE_ID_FIELD = new ParseField("inference_id"); @@ -157,23 +159,21 @@ protected TextSimilarityRankRetrieverBuilder clone( protected RankDoc[] combineInnerRetrieverResults(List rankResults, boolean explain) { assert rankResults.size() == 1; ScoreDoc[] scoreDocs = rankResults.getFirst(); - TextSimilarityRankDoc[] textSimilarityRankDocs = new TextSimilarityRankDoc[scoreDocs.length]; + List filteredDocs = new ArrayList<>(); + // Filtering by min_score must be done here, after reranking. + // Applying min_score in the child retriever could prematurely exclude documents that would receive high scores from the reranker. for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; assert scoreDoc.score >= 0; - if (explain) { - textSimilarityRankDocs[i] = new TextSimilarityRankDoc( - scoreDoc.doc, - scoreDoc.score, - scoreDoc.shardIndex, - inferenceId, - field - ); - } else { - textSimilarityRankDocs[i] = new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex); + if (minScore == null || scoreDoc.score >= minScore) { + if (explain) { + filteredDocs.add(new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex, inferenceId, field)); + } else { + filteredDocs.add(new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex)); + } } } - return textSimilarityRankDocs; + return filteredDocs.toArray(new TextSimilarityRankDoc[0]); } @Override diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml index c14aa83bc27a9..98e392ed1ccee 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml @@ -379,3 +379,111 @@ setup: - match: { hits.total.value: 1 } - length: { hits.hits: 1 } - match: { hits.hits.0._id: "doc_1" } + +--- +"Text similarity reranker respects min_score": + + - requires: + cluster_features: "text_similarity_reranker_minscore_fix" + reason: test min score functionality + + - do: + index: + index: test-index + id: doc_2 + body: + text: "The phases of the Moon come from the position of the Moon relative to the Earth and Sun." + topic: [ "science" ] + subtopic: [ "astronomy" ] + inference_text_field: "10" + refresh: true + + - do: + search: + index: test-index + body: + track_total_hits: true + fields: [ "text", "topic" ] + retriever: + text_similarity_reranker: + retriever: + standard: + query: + bool: + should: + - constant_score: + filter: + term: { subtopic: "technology" } + boost: 10 + - constant_score: + filter: + term: { subtopic: "astronomy" } + boost: 1 + rank_window_size: 10 + inference_id: my-rerank-model + inference_text: "How often does the moon hide the sun?" + field: inference_text_field + min_score: 10 + size: 10 + + - match: { hits.total.value: 1 } + - length: { hits.hits: 1 } + - match: { hits.hits.0._id: "doc_2" } + +--- +"Text similarity reranker with min_score zero includes all docs": + + - requires: + cluster_features: "text_similarity_reranker_minscore_fix" + reason: test min score functionality + + - do: + search: + index: test-index + body: + track_total_hits: true + fields: [ "text", "topic" ] + retriever: + text_similarity_reranker: + retriever: + standard: + query: + match_all: {} + rank_window_size: 10 + inference_id: my-rerank-model + inference_text: "How often does the moon hide the sun?" + field: inference_text_field + min_score: 0 + size: 10 + + - match: { hits.total.value: 3 } + - length: { hits.hits: 3 } + +--- +"Text similarity reranker with high min_score excludes all docs": + + - requires: + cluster_features: "text_similarity_reranker_minscore_fix" + reason: test min score functionality + + - do: + search: + index: test-index + body: + track_total_hits: true + fields: [ "text", "topic" ] + retriever: + text_similarity_reranker: + retriever: + standard: + query: + match_all: {} + rank_window_size: 10 + inference_id: my-rerank-model + inference_text: "How often does the moon hide the sun?" + field: inference_text_field + min_score: 1000 + size: 10 + + - match: { hits.total.value: 0 } + - length: { hits.hits: 0 }