Skip to content

Commit 7b3a45f

Browse files
bzhangamYeonghyeonKO
authored andcommitted
Implement the query logic for the semantic field. (opensearch-project#1315)
Signed-off-by: Bo Zhang <[email protected]> Signed-off-by: yeonghyeonKo <[email protected]>
1 parent b383d72 commit 7b3a45f

28 files changed

+2695
-378
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
99
- Implement analyzer based neural sparse query ([#1088](https://github.com/opensearch-project/neural-search/pull/1088) [#1279](https://github.com/opensearch-project/neural-search/pull/1279))
1010
- [Semantic Field] Add semantic mapping transformer. ([#1276](https://github.com/opensearch-project/neural-search/pull/1276))
1111
- [Semantic Field] Add semantic ingest processor. ([#1309](https://github.com/opensearch-project/neural-search/pull/1309))
12+
- [Semantic Field] Implement the query logic for the semantic field. ([#1315](https://github.com/opensearch-project/neural-search/pull/1315))
1213

1314
### Enhancements
1415
- [Performance Improvement] Add custom bulk scorer for hybrid query (2-3x faster) ([#1289](https://github.com/opensearch-project/neural-search/pull/1289))

src/main/java/org/opensearch/neuralsearch/common/MinClusterVersionUtil.java

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,23 @@
2121
*/
2222
public final class MinClusterVersionUtil {
2323

24-
private static final Version MINIMAL_SUPPORTED_VERSION_DEFAULT_MODEL_ID = Version.V_2_11_0;
24+
private static final Version MINIMAL_SUPPORTED_VERSION_DEFAULT_DENSE_MODEL_ID = Version.V_2_11_0;
2525
private static final Version MINIMAL_SUPPORTED_VERSION_RADIAL_SEARCH = Version.V_2_14_0;
2626
private static final Version MINIMAL_SUPPORTED_VERSION_QUERY_IMAGE_FIX = Version.V_2_19_0;
2727
private static final Version MINIMAL_SUPPORTED_VERSION_PAGINATION_IN_HYBRID_QUERY = Version.V_2_19_0;
2828
private static final Version MINIMAL_SUPPORTED_VERSION_NEURAL_ORIGINAL_QUERY_TEXT = Version.V_3_0_0;
29+
public static final Version MINIMAL_SUPPORTED_VERSION_SEMANTIC_FIELD = Version.V_3_1_0;
2930

30-
// Note this minimal version will act as a override
31+
// Note this minimal version will act as an override
3132
private static final Map<String, Version> MINIMAL_VERSION_NEURAL = ImmutableMap.<String, Version>builder()
32-
.put(MODEL_ID_FIELD.getPreferredName(), MINIMAL_SUPPORTED_VERSION_DEFAULT_MODEL_ID)
33+
.put(MODEL_ID_FIELD.getPreferredName(), MINIMAL_SUPPORTED_VERSION_DEFAULT_DENSE_MODEL_ID)
3334
.put(MAX_DISTANCE_FIELD.getPreferredName(), MINIMAL_SUPPORTED_VERSION_RADIAL_SEARCH)
3435
.put(MIN_SCORE_FIELD.getPreferredName(), MINIMAL_SUPPORTED_VERSION_RADIAL_SEARCH)
3536
.put(QUERY_IMAGE_FIELD.getPreferredName(), MINIMAL_SUPPORTED_VERSION_QUERY_IMAGE_FIX)
3637
.build();
3738

38-
public static boolean isClusterOnOrAfterMinReqVersionForDefaultModelIdSupport() {
39-
return NeuralSearchClusterUtil.instance().getClusterMinVersion().onOrAfter(MINIMAL_SUPPORTED_VERSION_DEFAULT_MODEL_ID);
39+
public static boolean isClusterOnOrAfterMinReqVersionForDefaultDenseModelIdSupport() {
40+
return NeuralSearchClusterUtil.instance().getClusterMinVersion().onOrAfter(MINIMAL_SUPPORTED_VERSION_DEFAULT_DENSE_MODEL_ID);
4041
}
4142

4243
public static boolean isClusterOnOrAfterMinReqVersionForRadialSearch() {
@@ -66,4 +67,13 @@ public static boolean isClusterOnOrAfterMinReqVersion(String key) {
6667
public static boolean isVersionOnOrAfterMinReqVersionForNeuralKNNQueryText(Version version) {
6768
return version.onOrAfter(MINIMAL_SUPPORTED_VERSION_NEURAL_ORIGINAL_QUERY_TEXT);
6869
}
70+
71+
/**
72+
* Checks if the cluster min version is on or after the minimum required version for semantic field type
73+
*
74+
* @return true if the version is on or after the minimum required version
75+
*/
76+
public static boolean isClusterOnOrAfterMinReqVersionForSemanticFieldType() {
77+
return NeuralSearchClusterUtil.instance().getClusterMinVersion().onOrAfter(MINIMAL_SUPPORTED_VERSION_SEMANTIC_FIELD);
78+
}
6979
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
package org.opensearch.neuralsearch.highlight.extractor;
6+
7+
import org.apache.lucene.search.Query;
8+
import org.opensearch.index.search.OpenSearchToParentBlockJoinQuery;
9+
10+
public class NestedQueryTextExtractor implements QueryTextExtractor {
11+
private final QueryTextExtractorRegistry registry;
12+
13+
public NestedQueryTextExtractor(QueryTextExtractorRegistry registry) {
14+
this.registry = registry;
15+
}
16+
17+
@Override
18+
public String extractQueryText(Query query, String fieldName) {
19+
OpenSearchToParentBlockJoinQuery neuralQuery = toQueryType(query, OpenSearchToParentBlockJoinQuery.class);
20+
return registry.extractQueryText(neuralQuery.getChildQuery(), fieldName);
21+
}
22+
}

src/main/java/org/opensearch/neuralsearch/highlight/extractor/QueryTextExtractorRegistry.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import org.apache.lucene.search.BooleanQuery;
88
import org.apache.lucene.search.Query;
99
import org.apache.lucene.search.TermQuery;
10+
import org.opensearch.index.search.OpenSearchToParentBlockJoinQuery;
1011
import org.opensearch.neuralsearch.query.NeuralKNNQuery;
1112
import org.opensearch.neuralsearch.query.HybridQuery;
1213

@@ -38,6 +39,9 @@ private void initialize() {
3839
register(TermQuery.class, new TermQueryTextExtractor());
3940
register(HybridQuery.class, new HybridQueryTextExtractor(this));
4041

42+
// Handle nested query
43+
register(OpenSearchToParentBlockJoinQuery.class, new NestedQueryTextExtractor(this));
44+
4145
// BooleanQueryTextExtractor needs a reference to this registry
4246
// so we need to register it after creating the registry instance
4347
register(BooleanQuery.class, new BooleanQueryTextExtractor(this));

src/main/java/org/opensearch/neuralsearch/mapper/dto/SemanticParameters.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public class SemanticParameters {
1919
public SemanticParameters(String modelId, String searchModelId, String rawFieldType, String semanticInfoFieldName) {
2020
this.modelId = modelId;
2121
this.searchModelId = searchModelId;
22-
this.semanticInfoFieldName = semanticInfoFieldName;
2322
this.rawFieldType = rawFieldType;
23+
this.semanticInfoFieldName = semanticInfoFieldName;
2424
}
2525
}

src/main/java/org/opensearch/neuralsearch/mappingtransformer/SemanticInfoConfigBuilder.java

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -111,17 +111,18 @@ private Map<String, Object> buildRankFeaturesFieldConfig() {
111111
return config;
112112
}
113113

114-
public SemanticInfoConfigBuilder mlModel(@NonNull final MLModel mlModel) {
114+
// Here we also require the model id because sometimes the MLModel does not have that info.
115+
public SemanticInfoConfigBuilder mlModel(@NonNull final MLModel mlModel, @NonNull final String modelId) {
115116
switch (mlModel.getAlgorithm()) {
116-
case FunctionName.TEXT_EMBEDDING -> extractInfoForTextEmbeddingModel(mlModel);
117+
case FunctionName.TEXT_EMBEDDING -> extractInfoForTextEmbeddingModel(mlModel, modelId);
117118
case FunctionName.SPARSE_ENCODING, FunctionName.SPARSE_TOKENIZE -> extractInfoForSparseModel();
118-
case FunctionName.REMOTE -> extractInfoForRemoteModel(mlModel);
119+
case FunctionName.REMOTE -> extractInfoForRemoteModel(mlModel, modelId);
119120
default -> throw new IllegalArgumentException(
120121
String.format(
121122
Locale.ROOT,
122123
"The algorithm %s of the model %s is not supported in the semantic field. Supported algorithms: [%s].",
123124
mlModel.getAlgorithm().name(),
124-
mlModel.getModelId(),
125+
modelId,
125126
String.join(",", SUPPORTED_MODEL_ALGORITHMS)
126127
)
127128
);
@@ -130,11 +131,9 @@ public SemanticInfoConfigBuilder mlModel(@NonNull final MLModel mlModel) {
130131
return this;
131132
}
132133

133-
private void extractInfoForTextEmbeddingModel(@NonNull final MLModel mlModel) {
134+
private void extractInfoForTextEmbeddingModel(@NonNull final MLModel mlModel, @NonNull final String modelId) {
134135
this.embeddingFieldType = KNNVectorFieldMapper.CONTENT_TYPE;
135136

136-
final String modelId = mlModel.getModelId();
137-
138137
if (mlModel.getModelConfig() instanceof TextEmbeddingModelConfig == false) {
139138
throw new IllegalArgumentException(
140139
String.format(
@@ -185,8 +184,7 @@ private void extractInfoForSparseModel() {
185184
this.embeddingFieldType = RankFeaturesFieldMapper.CONTENT_TYPE;
186185
}
187186

188-
private void extractInfoForRemoteModel(@NonNull final MLModel mlModel) {
189-
final String modelId = mlModel.getModelId();
187+
private void extractInfoForRemoteModel(@NonNull final MLModel mlModel, @NonNull final String modelId) {
190188
final MLModelConfig mlModelConfig = mlModel.getModelConfig();
191189
if (mlModelConfig == null) {
192190
throw new IllegalArgumentException(String.format(Locale.ROOT, "Model config is null for the remote model %s.", modelId));
@@ -200,7 +198,7 @@ private void extractInfoForRemoteModel(@NonNull final MLModel mlModel) {
200198
}
201199

202200
switch (modelTypeFunctionName) {
203-
case FunctionName.TEXT_EMBEDDING -> extractInfoForTextEmbeddingModel(mlModel);
201+
case FunctionName.TEXT_EMBEDDING -> extractInfoForTextEmbeddingModel(mlModel, modelId);
204202
case FunctionName.SPARSE_ENCODING, FunctionName.SPARSE_TOKENIZE -> extractInfoForSparseModel();
205203
default -> throw new IllegalArgumentException(getUnsupportedRemoteModelError(modelType, modelId));
206204
}

src/main/java/org/opensearch/neuralsearch/mappingtransformer/SemanticMappingTransformer.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ private void modifyMappings(
190190
final List<String> fieldPathList = modelIdToFieldPathMap.get(modelId);
191191
for (String fieldPath : fieldPathList) {
192192
try {
193-
final Map<String, Object> semanticInfoConfig = createSemanticInfoField(mlModel);
193+
final Map<String, Object> semanticInfoConfig = createSemanticInfoField(mlModel, modelId);
194194
final Map<String, Object> fieldConfig = semanticFieldPathToConfigMap.get(fieldPath);
195195
setSemanticInfoField(mappings, fieldPath, fieldConfig.get(SEMANTIC_INFO_FIELD_NAME), semanticInfoConfig);
196196
} catch (IllegalArgumentException e) {
@@ -207,9 +207,9 @@ private String getModifyMappingErrorMessage(@NonNull final String fieldPath, fin
207207
}
208208

209209
@VisibleForTesting
210-
private Map<String, Object> createSemanticInfoField(final @NonNull MLModel modelConfig) {
210+
private Map<String, Object> createSemanticInfoField(final @NonNull MLModel modelConfig, String modelId) {
211211
SemanticInfoConfigBuilder builder = new SemanticInfoConfigBuilder(xContentRegistry);
212-
return builder.mlModel(modelConfig).build();
212+
return builder.mlModel(modelConfig, modelId).build();
213213
}
214214

215215
@SuppressWarnings("unchecked")

src/main/java/org/opensearch/neuralsearch/plugin/NeuralSearch.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ public Collection<Object> createComponents(
144144
final IndexNameExpressionResolver indexNameExpressionResolver,
145145
final Supplier<RepositoriesService> repositoriesServiceSupplier
146146
) {
147-
NeuralSearchClusterUtil.instance().initialize(clusterService);
147+
NeuralSearchClusterUtil.instance().initialize(clusterService, indexNameExpressionResolver);
148148
NeuralQueryBuilder.initialize(clientAccessor);
149149
NeuralSparseQueryBuilder.initialize(clientAccessor);
150150
QueryTextExtractorRegistry queryTextExtractorRegistry = new QueryTextExtractorRegistry();

0 commit comments

Comments
 (0)