|
9 | 9 | import org.apache.lucene.tests.analysis.MockTokenizer;
|
10 | 10 | import org.junit.Before;
|
11 | 11 | import java.util.ArrayList;
|
| 12 | +import java.util.EnumSet; |
12 | 13 | import java.util.HashMap;
|
13 | 14 | import java.util.List;
|
14 | 15 | import java.util.Locale;
|
|
38 | 39 | import org.opensearch.neuralsearch.processor.chunker.FixedTokenLengthChunker;
|
39 | 40 | import org.opensearch.neuralsearch.processor.factory.TextChunkingProcessorFactory;
|
40 | 41 | import org.opensearch.neuralsearch.settings.NeuralSearchSettingsAccessor;
|
| 42 | +import org.opensearch.neuralsearch.stats.events.EventStatName; |
41 | 43 | import org.opensearch.neuralsearch.stats.events.EventStatsManager;
|
| 44 | +import org.opensearch.neuralsearch.stats.events.TimestampedEventStatSnapshot; |
42 | 45 | import org.opensearch.plugins.AnalysisPlugin;
|
43 | 46 | import org.opensearch.test.OpenSearchTestCase;
|
44 | 47 | import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.TYPE;
|
@@ -95,6 +98,7 @@ public void setup() {
|
95 | 98 | when(clusterService.state()).thenReturn(clusterState);
|
96 | 99 | textChunkingProcessorFactory = new TextChunkingProcessorFactory(environment, clusterService, getAnalysisRegistry());
|
97 | 100 |
|
| 101 | + EventStatsManager.instance().reset(); |
98 | 102 | NeuralSearchSettingsAccessor settingsAccessor = mock(NeuralSearchSettingsAccessor.class);
|
99 | 103 | when(settingsAccessor.isStatsEnabled()).thenReturn(true);
|
100 | 104 | EventStatsManager.instance().initialize(settingsAccessor);
|
@@ -978,4 +982,70 @@ public void testExecute_withIgnoreMissing_thenSucceed() {
|
978 | 982 | IngestDocument document = processor.execute(ingestDocument);
|
979 | 983 | assertFalse(document.getSourceAndMetadata().containsKey(OUTPUT_FIELD));
|
980 | 984 | }
|
| 985 | + |
| 986 | + @SneakyThrows |
| 987 | + public void testExecute_statsDisabled_thenSucceed() { |
| 988 | + NeuralSearchSettingsAccessor settingsAccessor = mock(NeuralSearchSettingsAccessor.class); |
| 989 | + when(settingsAccessor.isStatsEnabled()).thenReturn(false); |
| 990 | + EventStatsManager.instance().initialize(settingsAccessor); |
| 991 | + |
| 992 | + TextChunkingProcessor processor = createFixedTokenLengthInstance(createStringFieldMap()); |
| 993 | + IngestDocument ingestDocument = createIngestDocumentWithSourceData(createSourceDataString()); |
| 994 | + IngestDocument document = processor.execute(ingestDocument); |
| 995 | + assert document.getSourceAndMetadata().containsKey(OUTPUT_FIELD); |
| 996 | + Object passages = document.getSourceAndMetadata().get(OUTPUT_FIELD); |
| 997 | + assert (passages instanceof List<?>); |
| 998 | + List<String> expectedPassages = new ArrayList<>(); |
| 999 | + expectedPassages.add("This is an example document to be chunked. The document "); |
| 1000 | + expectedPassages.add("contains a single paragraph, two sentences and 24 tokens by "); |
| 1001 | + expectedPassages.add("standard tokenizer in OpenSearch."); |
| 1002 | + assertEquals(expectedPassages, passages); |
| 1003 | + |
| 1004 | + Map<EventStatName, TimestampedEventStatSnapshot> snapshots = EventStatsManager.instance() |
| 1005 | + .getTimestampedEventStatSnapshots(EnumSet.allOf(EventStatName.class)); |
| 1006 | + |
| 1007 | + assertEquals(0L, snapshots.get(EventStatName.TEXT_CHUNKING_PROCESSOR_EXECUTIONS).getValue().longValue()); |
| 1008 | + assertEquals(0L, snapshots.get(EventStatName.TEXT_CHUNKING_FIXED_LENGTH_EXECUTIONS).getValue().longValue()); |
| 1009 | + } |
| 1010 | + |
| 1011 | + @SneakyThrows |
| 1012 | + public void testExecute_statsEnabled_withFixedTokenLength_andSourceDataString_thenSucceed() { |
| 1013 | + TextChunkingProcessor processor = createFixedTokenLengthInstance(createStringFieldMap()); |
| 1014 | + IngestDocument ingestDocument = createIngestDocumentWithSourceData(createSourceDataString()); |
| 1015 | + IngestDocument document = processor.execute(ingestDocument); |
| 1016 | + assert document.getSourceAndMetadata().containsKey(OUTPUT_FIELD); |
| 1017 | + Object passages = document.getSourceAndMetadata().get(OUTPUT_FIELD); |
| 1018 | + assert (passages instanceof List<?>); |
| 1019 | + List<String> expectedPassages = new ArrayList<>(); |
| 1020 | + expectedPassages.add("This is an example document to be chunked. The document "); |
| 1021 | + expectedPassages.add("contains a single paragraph, two sentences and 24 tokens by "); |
| 1022 | + expectedPassages.add("standard tokenizer in OpenSearch."); |
| 1023 | + assertEquals(expectedPassages, passages); |
| 1024 | + |
| 1025 | + Map<EventStatName, TimestampedEventStatSnapshot> snapshots = EventStatsManager.instance() |
| 1026 | + .getTimestampedEventStatSnapshots(EnumSet.allOf(EventStatName.class)); |
| 1027 | + |
| 1028 | + assertEquals(1L, snapshots.get(EventStatName.TEXT_CHUNKING_PROCESSOR_EXECUTIONS).getValue().longValue()); |
| 1029 | + assertEquals(1L, snapshots.get(EventStatName.TEXT_CHUNKING_FIXED_LENGTH_EXECUTIONS).getValue().longValue()); |
| 1030 | + } |
| 1031 | + |
| 1032 | + @SneakyThrows |
| 1033 | + public void testExecute_statsEnabled_withDelimiter_andSourceDataString_thenSucceed() { |
| 1034 | + TextChunkingProcessor processor = createDelimiterInstance(); |
| 1035 | + IngestDocument ingestDocument = createIngestDocumentWithSourceData(createSourceDataString()); |
| 1036 | + IngestDocument document = processor.execute(ingestDocument); |
| 1037 | + assert document.getSourceAndMetadata().containsKey(OUTPUT_FIELD); |
| 1038 | + Object passages = document.getSourceAndMetadata().get(OUTPUT_FIELD); |
| 1039 | + assert (passages instanceof List<?>); |
| 1040 | + List<String> expectedPassages = new ArrayList<>(); |
| 1041 | + expectedPassages.add("This is an example document to be chunked."); |
| 1042 | + expectedPassages.add(" The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch."); |
| 1043 | + assertEquals(expectedPassages, passages); |
| 1044 | + |
| 1045 | + Map<EventStatName, TimestampedEventStatSnapshot> snapshots = EventStatsManager.instance() |
| 1046 | + .getTimestampedEventStatSnapshots(EnumSet.allOf(EventStatName.class)); |
| 1047 | + |
| 1048 | + assertEquals(1L, snapshots.get(EventStatName.TEXT_CHUNKING_PROCESSOR_EXECUTIONS).getValue().longValue()); |
| 1049 | + assertEquals(1L, snapshots.get(EventStatName.TEXT_CHUNKING_DELIMITER_EXECUTIONS).getValue().longValue()); |
| 1050 | + } |
981 | 1051 | }
|
0 commit comments