Skip to content

Implement the query logic for the semantic field. #1315

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Implement analyzer based neural sparse query ([#1088](https://github.com/opensearch-project/neural-search/pull/1088) [#1279](https://github.com/opensearch-project/neural-search/pull/1279))
- [Semantic Field] Add semantic mapping transformer. ([#1276](https://github.com/opensearch-project/neural-search/pull/1276))
- [Semantic Field] Add semantic ingest processor. ([#1309](https://github.com/opensearch-project/neural-search/pull/1309))
- [Semantic Field] Implement the query logic for the semantic field. ([#1315](https://github.com/opensearch-project/neural-search/pull/1315))

### Enhancements
- [Performance Improvement] Add custom bulk scorer for hybrid query (2-3x faster) ([#1289](https://github.com/opensearch-project/neural-search/pull/1289))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,23 @@
*/
public final class MinClusterVersionUtil {

private static final Version MINIMAL_SUPPORTED_VERSION_DEFAULT_MODEL_ID = Version.V_2_11_0;
private static final Version MINIMAL_SUPPORTED_VERSION_DEFAULT_DENSE_MODEL_ID = Version.V_2_11_0;
private static final Version MINIMAL_SUPPORTED_VERSION_RADIAL_SEARCH = Version.V_2_14_0;
private static final Version MINIMAL_SUPPORTED_VERSION_QUERY_IMAGE_FIX = Version.V_2_19_0;
private static final Version MINIMAL_SUPPORTED_VERSION_PAGINATION_IN_HYBRID_QUERY = Version.V_2_19_0;
private static final Version MINIMAL_SUPPORTED_VERSION_NEURAL_ORIGINAL_QUERY_TEXT = Version.V_3_0_0;
public static final Version MINIMAL_SUPPORTED_VERSION_SEMANTIC_FIELD = Version.V_3_1_0;

// Note this minimal version will act as a override
// Note this minimal version will act as an override
private static final Map<String, Version> MINIMAL_VERSION_NEURAL = ImmutableMap.<String, Version>builder()
.put(MODEL_ID_FIELD.getPreferredName(), MINIMAL_SUPPORTED_VERSION_DEFAULT_MODEL_ID)
.put(MODEL_ID_FIELD.getPreferredName(), MINIMAL_SUPPORTED_VERSION_DEFAULT_DENSE_MODEL_ID)
.put(MAX_DISTANCE_FIELD.getPreferredName(), MINIMAL_SUPPORTED_VERSION_RADIAL_SEARCH)
.put(MIN_SCORE_FIELD.getPreferredName(), MINIMAL_SUPPORTED_VERSION_RADIAL_SEARCH)
.put(QUERY_IMAGE_FIELD.getPreferredName(), MINIMAL_SUPPORTED_VERSION_QUERY_IMAGE_FIX)
.build();

public static boolean isClusterOnOrAfterMinReqVersionForDefaultModelIdSupport() {
return NeuralSearchClusterUtil.instance().getClusterMinVersion().onOrAfter(MINIMAL_SUPPORTED_VERSION_DEFAULT_MODEL_ID);
public static boolean isClusterOnOrAfterMinReqVersionForDefaultDenseModelIdSupport() {
return NeuralSearchClusterUtil.instance().getClusterMinVersion().onOrAfter(MINIMAL_SUPPORTED_VERSION_DEFAULT_DENSE_MODEL_ID);
}

public static boolean isClusterOnOrAfterMinReqVersionForRadialSearch() {
Expand Down Expand Up @@ -66,4 +67,13 @@ public static boolean isClusterOnOrAfterMinReqVersion(String key) {
public static boolean isVersionOnOrAfterMinReqVersionForNeuralKNNQueryText(Version version) {
return version.onOrAfter(MINIMAL_SUPPORTED_VERSION_NEURAL_ORIGINAL_QUERY_TEXT);
}

/**
* Checks if the cluster min version is on or after the minimum required version for semantic field type
*
* @return true if the version is on or after the minimum required version
*/
public static boolean isClusterOnOrAfterMinReqVersionForSemanticFieldType() {
return NeuralSearchClusterUtil.instance().getClusterMinVersion().onOrAfter(MINIMAL_SUPPORTED_VERSION_SEMANTIC_FIELD);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.highlight.extractor;

import org.apache.lucene.search.Query;
import org.opensearch.index.search.OpenSearchToParentBlockJoinQuery;

public class NestedQueryTextExtractor implements QueryTextExtractor {
private final QueryTextExtractorRegistry registry;

public NestedQueryTextExtractor(QueryTextExtractorRegistry registry) {
this.registry = registry;
}

@Override
public String extractQueryText(Query query, String fieldName) {
OpenSearchToParentBlockJoinQuery neuralQuery = toQueryType(query, OpenSearchToParentBlockJoinQuery.class);
return registry.extractQueryText(neuralQuery.getChildQuery(), fieldName);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.opensearch.index.search.OpenSearchToParentBlockJoinQuery;
import org.opensearch.neuralsearch.query.NeuralKNNQuery;
import org.opensearch.neuralsearch.query.HybridQuery;

Expand Down Expand Up @@ -38,6 +39,9 @@ private void initialize() {
register(TermQuery.class, new TermQueryTextExtractor());
register(HybridQuery.class, new HybridQueryTextExtractor(this));

// Handle nested query
register(OpenSearchToParentBlockJoinQuery.class, new NestedQueryTextExtractor(this));

// BooleanQueryTextExtractor needs a reference to this registry
// so we need to register it after creating the registry instance
register(BooleanQuery.class, new BooleanQueryTextExtractor(this));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public class SemanticParameters {
public SemanticParameters(String modelId, String searchModelId, String rawFieldType, String semanticInfoFieldName) {
this.modelId = modelId;
this.searchModelId = searchModelId;
this.semanticInfoFieldName = semanticInfoFieldName;
this.rawFieldType = rawFieldType;
this.semanticInfoFieldName = semanticInfoFieldName;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -111,17 +111,18 @@ private Map<String, Object> buildRankFeaturesFieldConfig() {
return config;
}

public SemanticInfoConfigBuilder mlModel(@NonNull final MLModel mlModel) {
// Here we also require the model id because sometimes the MLModel does not have that info.
public SemanticInfoConfigBuilder mlModel(@NonNull final MLModel mlModel, @NonNull final String modelId) {
switch (mlModel.getAlgorithm()) {
case FunctionName.TEXT_EMBEDDING -> extractInfoForTextEmbeddingModel(mlModel);
case FunctionName.TEXT_EMBEDDING -> extractInfoForTextEmbeddingModel(mlModel, modelId);
case FunctionName.SPARSE_ENCODING, FunctionName.SPARSE_TOKENIZE -> extractInfoForSparseModel();
case FunctionName.REMOTE -> extractInfoForRemoteModel(mlModel);
case FunctionName.REMOTE -> extractInfoForRemoteModel(mlModel, modelId);
default -> throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"The algorithm %s of the model %s is not supported in the semantic field. Supported algorithms: [%s].",
mlModel.getAlgorithm().name(),
mlModel.getModelId(),
modelId,
String.join(",", SUPPORTED_MODEL_ALGORITHMS)
)
);
Expand All @@ -130,11 +131,9 @@ public SemanticInfoConfigBuilder mlModel(@NonNull final MLModel mlModel) {
return this;
}

private void extractInfoForTextEmbeddingModel(@NonNull final MLModel mlModel) {
private void extractInfoForTextEmbeddingModel(@NonNull final MLModel mlModel, @NonNull final String modelId) {
this.embeddingFieldType = KNNVectorFieldMapper.CONTENT_TYPE;

final String modelId = mlModel.getModelId();

if (mlModel.getModelConfig() instanceof TextEmbeddingModelConfig == false) {
throw new IllegalArgumentException(
String.format(
Expand Down Expand Up @@ -185,8 +184,7 @@ private void extractInfoForSparseModel() {
this.embeddingFieldType = RankFeaturesFieldMapper.CONTENT_TYPE;
}

private void extractInfoForRemoteModel(@NonNull final MLModel mlModel) {
final String modelId = mlModel.getModelId();
private void extractInfoForRemoteModel(@NonNull final MLModel mlModel, @NonNull final String modelId) {
final MLModelConfig mlModelConfig = mlModel.getModelConfig();
if (mlModelConfig == null) {
throw new IllegalArgumentException(String.format(Locale.ROOT, "Model config is null for the remote model %s.", modelId));
Expand All @@ -200,7 +198,7 @@ private void extractInfoForRemoteModel(@NonNull final MLModel mlModel) {
}

switch (modelTypeFunctionName) {
case FunctionName.TEXT_EMBEDDING -> extractInfoForTextEmbeddingModel(mlModel);
case FunctionName.TEXT_EMBEDDING -> extractInfoForTextEmbeddingModel(mlModel, modelId);
case FunctionName.SPARSE_ENCODING, FunctionName.SPARSE_TOKENIZE -> extractInfoForSparseModel();
default -> throw new IllegalArgumentException(getUnsupportedRemoteModelError(modelType, modelId));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ private void modifyMappings(
final List<String> fieldPathList = modelIdToFieldPathMap.get(modelId);
for (String fieldPath : fieldPathList) {
try {
final Map<String, Object> semanticInfoConfig = createSemanticInfoField(mlModel);
final Map<String, Object> semanticInfoConfig = createSemanticInfoField(mlModel, modelId);
final Map<String, Object> fieldConfig = semanticFieldPathToConfigMap.get(fieldPath);
setSemanticInfoField(mappings, fieldPath, fieldConfig.get(SEMANTIC_INFO_FIELD_NAME), semanticInfoConfig);
} catch (IllegalArgumentException e) {
Expand All @@ -207,9 +207,9 @@ private String getModifyMappingErrorMessage(@NonNull final String fieldPath, fin
}

@VisibleForTesting
private Map<String, Object> createSemanticInfoField(final @NonNull MLModel modelConfig) {
private Map<String, Object> createSemanticInfoField(final @NonNull MLModel modelConfig, String modelId) {
SemanticInfoConfigBuilder builder = new SemanticInfoConfigBuilder(xContentRegistry);
return builder.mlModel(modelConfig).build();
return builder.mlModel(modelConfig, modelId).build();
}

@SuppressWarnings("unchecked")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public Collection<Object> createComponents(
final IndexNameExpressionResolver indexNameExpressionResolver,
final Supplier<RepositoriesService> repositoriesServiceSupplier
) {
NeuralSearchClusterUtil.instance().initialize(clusterService);
NeuralSearchClusterUtil.instance().initialize(clusterService, indexNameExpressionResolver);
NeuralQueryBuilder.initialize(clientAccessor);
NeuralSparseQueryBuilder.initialize(clientAccessor);
QueryTextExtractorRegistry queryTextExtractorRegistry = new QueryTextExtractorRegistry();
Expand Down
Loading
Loading