Skip to content

Commit ebc0bc8

Browse files
chloewqgYeonghyeonKO
authored andcommitted
Fix the edge case when the value of a fieldMap key in ingestDocument is empty string (opensearch-project#1257)
Signed-off-by: Chloe Gao <[email protected]> Signed-off-by: yeonghyeonKo <[email protected]>
1 parent 76ee272 commit ebc0bc8

File tree

3 files changed

+77
-12
lines changed

3 files changed

+77
-12
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1010
### Enhancements
1111

1212
### Bug Fixes
13+
- Add validations to prevent empty input_text_field and input_image_field in TextImageEmbeddingProcessor ([#1257](https://github.com/opensearch-project/neural-search/pull/1257))
1314

1415
### Infrastructure
1516

src/main/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessor.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,26 @@ Map<String, String> buildMapWithKnnKeyAndOriginalValue(final IngestDocument inge
183183
if (!sourceAndMetadataMap.containsKey(originalKey)) {
184184
continue;
185185
}
186-
if (!(sourceAndMetadataMap.get(originalKey) instanceof String)) {
187-
throw new IllegalArgumentException("Unsupported format of the field in the document, value must be a string");
186+
187+
Object metaValue = sourceAndMetadataMap.get(originalKey);
188+
if (Objects.isNull(metaValue)) {
189+
throw new IllegalArgumentException(
190+
String.format(
191+
Locale.getDefault(),
192+
"Unsupported format of the field in the document, %s value must be a non-empty string. Currently it is null",
193+
originalKey
194+
)
195+
);
196+
} else if ((metaValue instanceof String) == false || Objects.isNull(metaValue) || StringUtils.isBlank((String) metaValue)) {
197+
throw new IllegalArgumentException(
198+
String.format(
199+
Locale.getDefault(),
200+
"Unsupported format of the field in the document, %s value must be a non-empty string. Currently it is '%s'. Type is %s",
201+
originalKey,
202+
metaValue,
203+
metaValue.getClass()
204+
)
205+
);
188206
}
189207
mapWithKnnKeys.put(originalKey, (String) sourceAndMetadataMap.get(originalKey));
190208
}

src/test/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessorTests.java

Lines changed: 56 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.ArrayList;
2323
import java.util.HashMap;
2424
import java.util.List;
25+
import java.util.Locale;
2526
import java.util.Map;
2627
import java.util.function.BiConsumer;
2728
import java.util.function.Supplier;
@@ -323,31 +324,76 @@ public void testExecute_MLClientAccessorThrowFail_handlerFailure() {
323324
}
324325

325326
public void testExecute_mapHasNonStringValue_throwIllegalArgumentException() {
326-
Map<String, String> map1 = ImmutableMap.of("test1", "test2");
327-
Map<String, Double> map2 = ImmutableMap.of("test3", 209.3D);
328327
Map<String, Object> sourceAndMetadata = new HashMap<>();
329-
sourceAndMetadata.put("key1", map1);
330-
sourceAndMetadata.put("my_text_field", map2);
328+
sourceAndMetadata.put("key1", "test2");
329+
sourceAndMetadata.put("my_text_field", 209.3D);
331330
sourceAndMetadata.put(IndexFieldMapper.NAME, "my_index");
332331
IngestDocument ingestDocument = new IngestDocument(sourceAndMetadata, new HashMap<>());
333332
TextImageEmbeddingProcessor processor = createInstance(false);
334333
BiConsumer handler = mock(BiConsumer.class);
335334
processor.execute(ingestDocument, handler);
336-
verify(handler).accept(isNull(), any(IllegalArgumentException.class));
335+
336+
ArgumentCaptor<Throwable> argumentCaptor = ArgumentCaptor.forClass(Throwable.class);
337+
verify(handler).accept(isNull(), argumentCaptor.capture());
338+
339+
Throwable capturedException = argumentCaptor.getValue();
340+
assertTrue(capturedException instanceof IllegalArgumentException);
341+
final String desiredExceptionMessage = String.format(
342+
Locale.getDefault(),
343+
"Unsupported format of the field in the document, %s value must be a non-empty string. Currently it is '%s'. Type is %s",
344+
"my_text_field",
345+
209.3D,
346+
Double.class
347+
);
348+
assertEquals(desiredExceptionMessage, capturedException.getMessage());
337349
}
338350

339351
public void testExecute_mapHasEmptyStringValue_throwIllegalArgumentException() {
340-
Map<String, String> map1 = ImmutableMap.of("test1", "test2");
341-
Map<String, String> map2 = ImmutableMap.of("test3", " ");
342352
Map<String, Object> sourceAndMetadata = new HashMap<>();
343-
sourceAndMetadata.put("key1", map1);
344-
sourceAndMetadata.put("my_text_field", map2);
353+
sourceAndMetadata.put("key1", "test2");
354+
sourceAndMetadata.put("my_text_field", "");
345355
sourceAndMetadata.put(IndexFieldMapper.NAME, "my_index");
346356
IngestDocument ingestDocument = new IngestDocument(sourceAndMetadata, new HashMap<>());
347357
TextImageEmbeddingProcessor processor = createInstance(false);
348358
BiConsumer handler = mock(BiConsumer.class);
349359
processor.execute(ingestDocument, handler);
350-
verify(handler).accept(isNull(), any(IllegalArgumentException.class));
360+
361+
ArgumentCaptor<Throwable> argumentCaptor = ArgumentCaptor.forClass(Throwable.class);
362+
verify(handler).accept(isNull(), argumentCaptor.capture());
363+
364+
Throwable capturedException = argumentCaptor.getValue();
365+
assertTrue(capturedException instanceof IllegalArgumentException);
366+
final String desiredExceptionMessage = String.format(
367+
Locale.getDefault(),
368+
"Unsupported format of the field in the document, %s value must be a non-empty string. Currently it is '%s'. Type is %s",
369+
"my_text_field",
370+
"",
371+
"".getClass()
372+
);
373+
assertEquals(desiredExceptionMessage, capturedException.getMessage());
374+
}
375+
376+
public void testExecute_mapHasNullValue_throwIllegalArgumentException() {
377+
Map<String, Object> sourceAndMetadata = new HashMap<>();
378+
sourceAndMetadata.put("key1", "test2");
379+
sourceAndMetadata.put("my_text_field", null);
380+
sourceAndMetadata.put(IndexFieldMapper.NAME, "my_index");
381+
IngestDocument ingestDocument = new IngestDocument(sourceAndMetadata, new HashMap<>());
382+
TextImageEmbeddingProcessor processor = createInstance(false);
383+
BiConsumer handler = mock(BiConsumer.class);
384+
processor.execute(ingestDocument, handler);
385+
386+
ArgumentCaptor<Throwable> argumentCaptor = ArgumentCaptor.forClass(Throwable.class);
387+
verify(handler).accept(isNull(), argumentCaptor.capture());
388+
389+
Throwable capturedException = argumentCaptor.getValue();
390+
assertTrue(capturedException instanceof IllegalArgumentException);
391+
final String desiredExceptionMessage = String.format(
392+
Locale.getDefault(),
393+
"Unsupported format of the field in the document, %s value must be a non-empty string. Currently it is null",
394+
"my_text_field"
395+
);
396+
assertEquals(desiredExceptionMessage, capturedException.getMessage());
351397
}
352398

353399
public void testExecute_hybridTypeInput_successful() throws Exception {

0 commit comments

Comments
 (0)