diff --git a/docs/changelog/129359.yaml b/docs/changelog/129359.yaml new file mode 100644 index 0000000000000..9b1f6234d6579 --- /dev/null +++ b/docs/changelog/129359.yaml @@ -0,0 +1,5 @@ +pr: 129359 +summary: Add min score linear retriever +area: Search +type: enhancement +issues: [] diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java index 053013064b317..00395ebd18239 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java @@ -14,6 +14,7 @@ import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT; import static org.elasticsearch.xpack.rank.linear.L2ScoreNormalizer.LINEAR_RETRIEVER_L2_NORM; +import static org.elasticsearch.xpack.rank.linear.LinearRetrieverBuilder.LINEAR_RETRIEVER_MINSCORE_FIX; import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX; public class RankRRFFeatures implements FeatureSpecification { @@ -27,6 +28,11 @@ public Set<NodeFeature> getFeatures() { @Override public Set<NodeFeature> getTestFeatures() { - return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX, LINEAR_RETRIEVER_L2_NORM); + return Set.of( + INNER_RETRIEVERS_FILTER_SUPPORT, + LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX, + LINEAR_RETRIEVER_L2_NORM, + LINEAR_RETRIEVER_MINSCORE_FIX + ); } } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 436096523a1ec..7631446ca71d0 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -10,6 +10,7 @@ import org.apache.lucene.search.ScoreDoc; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.util.Maps; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.license.LicenseUtils; import org.elasticsearch.search.builder.SearchSourceBuilder; @@ -46,6 +47,7 @@ */ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder<LinearRetrieverBuilder> { + public static final NodeFeature LINEAR_RETRIEVER_MINSCORE_FIX = new NodeFeature("linear_retriever_minscore_fix"); public static final String NAME = "linear"; public static final ParseField RETRIEVERS_FIELD = new ParseField("retrievers"); @@ -125,12 +127,35 @@ public LinearRetrieverBuilder( this.normalizers = normalizers; } + public LinearRetrieverBuilder( + List<RetrieverSource> innerRetrievers, + int rankWindowSize, + float[] weights, + ScoreNormalizer[] normalizers, + Float minScore, + String retrieverName, + List<QueryBuilder> preFilterQueryBuilders + ) { + this(innerRetrievers, rankWindowSize, weights, normalizers); + this.minScore = minScore; + if (minScore != null && minScore < 0) { + throw new IllegalArgumentException("[min_score] must be greater than or equal to 0, was: [" + minScore + "]"); + } + this.retrieverName = retrieverName; + this.preFilterQueryBuilders = preFilterQueryBuilders; + } + @Override protected LinearRetrieverBuilder clone(List<RetrieverSource> newChildRetrievers, List<QueryBuilder> newPreFilterQueryBuilders) { - LinearRetrieverBuilder clone = new LinearRetrieverBuilder(newChildRetrievers, rankWindowSize, weights, normalizers); - clone.preFilterQueryBuilders = newPreFilterQueryBuilders; - clone.retrieverName = retrieverName; - return clone; + return new LinearRetrieverBuilder( + newChildRetrievers, + rankWindowSize, + weights, + normalizers, + minScore, + retrieverName, + newPreFilterQueryBuilders + ); } @Override @@ -181,6 +206,10 @@ protected RankDoc[] combineInnerRetrieverResults(List<ScoreDoc[]> rankResults, b topResults[rank] = sortedResults[rank]; topResults[rank].rank = rank + 1; } + // Filter by minScore if set(inclusive) + if (minScore != null) { + topResults = Arrays.stream(topResults).filter(doc -> doc.score >= minScore).toArray(LinearRankDoc[]::new); + } return topResults; } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index 3c672ee70978d..f62c7e4987046 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -319,7 +319,7 @@ setup: - close_to: { hits.hits.2._score: { value: 1.6, error: 0.001 } } - match: { hits.hits.3._id: "3" } - close_to: { hits.hits.3._score: { value: 1.2, error: 0.001} } - + --- "should handle all zero scores in normalization": - requires: @@ -1196,6 +1196,111 @@ setup: rank_window_size: -10 - match: { status: 400 } +--- +"linear retriever respects min_score after normalization": + + - requires: + cluster_features: [ "linear_retriever_minscore_fix" ] + reason: test min_score functionality for linear retriever + + - do: + search: + index: test + body: + retriever: + linear: + retrievers: + - retriever: + standard: + query: + function_score: + query: + match_all: {} + functions: + - filter: { term: { _id: "1" } } + weight: 1 + - filter: { term: { _id: "2" } } + weight: 2 + - filter: { term: { _id: "3" } } + weight: 3 + - filter: { term: { _id: "4" } } + weight: 4 + weight: 1.0 + normalizer: "minmax" + rank_window_size: 10 + min_score: 0.8 + size: 10 + + - match: { hits.total.value: 1 } + - length: { hits.hits: 1 } + - match: { hits.hits.0._id: "4" } + +--- +"linear retriever with min_score zero includes all docs": + + - requires: + cluster_features: [ "linear_retriever_minscore_fix" ] + reason: test min score functionality for linear retriever + + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + match_all: {} + } + } + }, + weight: 1.0, + normalizer: "minmax" + } + ] + rank_window_size: 10 + min_score: 0 + size: 10 + + - match: { hits.total.value: 4 } + - length: { hits.hits: 4 } + +--- +"linear retriever with high min_score excludes all docs": + + - requires: + cluster_features: [ "linear_retriever_minscore_fix" ] + reason: test min score functionality for linear retriever + + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + match_all: {} + } + } + }, + weight: 1.0, + normalizer: "minmax" + } + ] + rank_window_size: 10 + min_score: 2.0 + size: 10 + + - match: { hits.total.value: 0 } + - length: { hits.hits: 0 } + --- "minmax normalization properly handles a single doc result set": - requires: