Skip to content

Commit 966d125

Browse files
committed
Add normalization processor and RRF processor stats
Signed-off-by: Andy Qin <[email protected]>
1 parent 4d12707 commit 966d125

File tree

9 files changed

+280
-2
lines changed

9 files changed

+280
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1414
### Enhancements
1515
- [Performance Improvement] Add custom bulk scorer for hybrid query (2-3x faster) ([#1289](https://github.com/opensearch-project/neural-search/pull/1289))
1616
- [Stats] Add stats for text chunking processor algorithms ([#1308](https://github.com/opensearch-project/neural-search/pull/1308))
17+
- [Stats] Add stats for normalization processor and RRF processor ([#1326](https://github.com/opensearch-project/neural-search/pull/1326))
1718

1819
### Bug Fixes
1920
- Fix score value as null for single shard when sorting is not done on score field ([#1277](https://github.com/opensearch-project/neural-search/pull/1277))

src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessor.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,16 @@
1515
import org.opensearch.action.search.SearchPhaseContext;
1616
import org.opensearch.action.search.SearchPhaseName;
1717
import org.opensearch.action.search.SearchPhaseResults;
18+
import org.opensearch.neuralsearch.processor.combination.ArithmeticMeanScoreCombinationTechnique;
19+
import org.opensearch.neuralsearch.processor.combination.GeometricMeanScoreCombinationTechnique;
20+
import org.opensearch.neuralsearch.processor.combination.HarmonicMeanScoreCombinationTechnique;
1821
import org.opensearch.neuralsearch.processor.combination.ScoreCombinationTechnique;
22+
import org.opensearch.neuralsearch.processor.normalization.L2ScoreNormalizationTechnique;
23+
import org.opensearch.neuralsearch.processor.normalization.MinMaxScoreNormalizationTechnique;
1924
import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique;
25+
import org.opensearch.neuralsearch.processor.normalization.ZScoreNormalizationTechnique;
26+
import org.opensearch.neuralsearch.stats.events.EventStatName;
27+
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
2028
import org.opensearch.search.SearchPhaseResult;
2129
import org.opensearch.search.fetch.FetchSearchResult;
2230
import org.opensearch.search.pipeline.PipelineProcessingContext;
@@ -54,6 +62,7 @@ <Result extends SearchPhaseResult> void hybridizeScores(
5462
Optional<FetchSearchResult> fetchSearchResult = getFetchSearchResults(searchPhaseResult);
5563
boolean explain = Objects.nonNull(searchPhaseContext.getRequest().source().explain())
5664
&& searchPhaseContext.getRequest().source().explain();
65+
recordStats(normalizationTechnique, combinationTechnique);
5766
NormalizationProcessorWorkflowExecuteRequest request = NormalizationProcessorWorkflowExecuteRequest.builder()
5867
.querySearchResults(querySearchResults)
5968
.fetchSearchResultOptional(fetchSearchResult)
@@ -135,4 +144,36 @@ private <Result extends SearchPhaseResult> Optional<FetchSearchResult> getFetchS
135144
Optional<Result> optionalFirstSearchPhaseResult = searchPhaseResults.getAtomicArray().asList().stream().findFirst();
136145
return optionalFirstSearchPhaseResult.map(SearchPhaseResult::fetchResult);
137146
}
147+
148+
private void recordStats(ScoreNormalizationTechnique normalizationTechnique, ScoreCombinationTechnique combinationTechnique) {
149+
EventStatsManager.increment(EventStatName.NORMALIZATION_PROCESSOR_EXECUTIONS);
150+
recordNormalizationCombinationTechniqueStats(normalizationTechnique, combinationTechnique);
151+
}
152+
153+
void recordNormalizationCombinationTechniqueStats(
154+
ScoreNormalizationTechnique normalizationTechnique,
155+
ScoreCombinationTechnique combinationTechnique
156+
) {
157+
switch (normalizationTechnique.techniqueName()) {
158+
case L2ScoreNormalizationTechnique.TECHNIQUE_NAME -> EventStatsManager.increment(EventStatName.NORM_TECHNIQUE_L2_EXECUTIONS);
159+
case MinMaxScoreNormalizationTechnique.TECHNIQUE_NAME -> EventStatsManager.increment(
160+
EventStatName.NORM_TECHNIQUE_MINMAX_EXECUTIONS
161+
);
162+
case ZScoreNormalizationTechnique.TECHNIQUE_NAME -> EventStatsManager.increment(
163+
EventStatName.NORM_TECHNIQUE_NORM_ZSCORE_EXECUTIONS
164+
);
165+
}
166+
167+
switch (combinationTechnique.techniqueName()) {
168+
case ArithmeticMeanScoreCombinationTechnique.TECHNIQUE_NAME -> EventStatsManager.increment(
169+
EventStatName.COMB_TECHNIQUE_ARITHMETIC_EXECUTIONS
170+
);
171+
case GeometricMeanScoreCombinationTechnique.TECHNIQUE_NAME -> EventStatsManager.increment(
172+
EventStatName.COMB_TECHNIQUE_GEOMETRIC_EXECUTIONS
173+
);
174+
case HarmonicMeanScoreCombinationTechnique.TECHNIQUE_NAME -> EventStatsManager.increment(
175+
EventStatName.COMB_TECHNIQUE_HARMONIC_EXECUTIONS
176+
);
177+
}
178+
}
138179
}

src/main/java/org/opensearch/neuralsearch/processor/RRFProcessor.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
import lombok.Getter;
1717
import org.opensearch.neuralsearch.processor.combination.ScoreCombinationTechnique;
1818
import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique;
19+
import org.opensearch.neuralsearch.stats.events.EventStatName;
20+
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
1921
import org.opensearch.search.fetch.FetchSearchResult;
2022
import org.opensearch.search.pipeline.PipelineProcessingContext;
2123
import org.opensearch.search.query.QuerySearchResult;
@@ -70,6 +72,7 @@ <Result extends SearchPhaseResult> void hybridizeScores(
7072
Optional<FetchSearchResult> fetchSearchResult = getFetchSearchResults(searchPhaseResult);
7173
boolean explain = Objects.nonNull(searchPhaseContext.getRequest().source().explain())
7274
&& searchPhaseContext.getRequest().source().explain();
75+
EventStatsManager.increment(EventStatName.RRF_PROCESSOR_EXECUTIONS);
7376
// make data transfer object to pass in, execute will get object with 4 or 5 fields, depending
7477
// on coming from NormalizationProcessor or RRFProcessor
7578
NormalizationProcessorWorkflowExecuteRequest normalizationExecuteDTO = NormalizationProcessorWorkflowExecuteRequest.builder()

src/main/java/org/opensearch/neuralsearch/stats/events/EventStatName.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,28 @@
1919
@Getter
2020
public enum EventStatName implements StatName {
2121
TEXT_EMBEDDING_PROCESSOR_EXECUTIONS("text_embedding_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER),
22+
// Text chunking processor stats
2223
TEXT_CHUNKING_PROCESSOR_EXECUTIONS("text_chunking_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER),
2324
TEXT_CHUNKING_FIXED_LENGTH_EXECUTIONS(
2425
"text_chunking_fixed_length_executions",
2526
"processors.ingest",
2627
EventStatType.TIMESTAMPED_EVENT_COUNTER
2728
),
28-
TEXT_CHUNKING_DELIMITER_EXECUTIONS("text_chunking_delimiter_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER);
29+
TEXT_CHUNKING_DELIMITER_EXECUTIONS("text_chunking_delimiter_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER),
30+
// Normalization processor stats
31+
NORMALIZATION_PROCESSOR_EXECUTIONS(
32+
"normalization_processor_executions",
33+
"processors.search.hybrid",
34+
EventStatType.TIMESTAMPED_EVENT_COUNTER
35+
),
36+
NORM_TECHNIQUE_L2_EXECUTIONS("norm_l2_executions", "processors.search.hybrid", EventStatType.TIMESTAMPED_EVENT_COUNTER),
37+
NORM_TECHNIQUE_MINMAX_EXECUTIONS("norm_minmax_executions", "processors.search.hybrid", EventStatType.TIMESTAMPED_EVENT_COUNTER),
38+
NORM_TECHNIQUE_NORM_ZSCORE_EXECUTIONS("norm_zscore_executions", "processors.search.hybrid", EventStatType.TIMESTAMPED_EVENT_COUNTER),
39+
COMB_TECHNIQUE_ARITHMETIC_EXECUTIONS("comb_arithmetic_executions", "processors.search.hybrid", EventStatType.TIMESTAMPED_EVENT_COUNTER),
40+
COMB_TECHNIQUE_GEOMETRIC_EXECUTIONS("comb_geometric_executions", "processors.search.hybrid", EventStatType.TIMESTAMPED_EVENT_COUNTER),
41+
COMB_TECHNIQUE_HARMONIC_EXECUTIONS("comb_harmonic_executions", "processors.search.hybrid", EventStatType.TIMESTAMPED_EVENT_COUNTER),
42+
// RRF processor stats
43+
RRF_PROCESSOR_EXECUTIONS("rrf_processor_executions", "processors.search.hybrid", EventStatType.TIMESTAMPED_EVENT_COUNTER),;
2944

3045
private final String nameString;
3146
private final String path;

src/main/java/org/opensearch/neuralsearch/stats/info/InfoStatName.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,20 @@ public enum InfoStatName implements StatName {
2121
// Cluster info
2222
CLUSTER_VERSION("cluster_version", "", InfoStatType.INFO_STRING),
2323
TEXT_EMBEDDING_PROCESSORS("text_embedding_processors_in_pipelines", "processors.ingest", InfoStatType.INFO_COUNTER),
24+
// Text chunking processor stats
2425
TEXT_CHUNKING_PROCESSORS("text_chunking_processors", "processors.ingest", InfoStatType.INFO_COUNTER),
2526
TEXT_CHUNKING_DELIMITER_PROCESSORS("text_chunking_delimiter_processors", "processors.ingest", InfoStatType.INFO_COUNTER),
26-
TEXT_CHUNKING_FIXED_LENGTH_PROCESSORS("text_chunking_fixed_length_processors", "processors.ingest", InfoStatType.INFO_COUNTER);
27+
TEXT_CHUNKING_FIXED_LENGTH_PROCESSORS("text_chunking_fixed_length_processors", "processors.ingest", InfoStatType.INFO_COUNTER),
28+
// Normalization processor
29+
NORMALIZATION_PROCESSORS("normalization_processors", "processors.search.hybrid", InfoStatType.INFO_COUNTER),
30+
NORM_TECHNIQUE_L2_PROCESSORS("norm_l2_processors", "processors.search.hybrid", InfoStatType.INFO_COUNTER),
31+
NORM_TECHNIQUE_MINMAX_PROCESSORS("norm_minmax_processors", "processors.search.hybrid", InfoStatType.INFO_COUNTER),
32+
NORM_TECHNIQUE_ZSCORE_PROCESSORS("norm_zscore_processors", "processors.search.hybrid", InfoStatType.INFO_COUNTER),
33+
COMB_TECHNIQUE_ARITHMETIC_PROCESSORS("comb_arithmetic_mean_processors", "processors.search.hybrid", InfoStatType.INFO_COUNTER),
34+
COMB_TECHNIQUE_GEOMETRIC_PROCESSORS("comb_geometric_mean_processors", "processors.search.hybrid", InfoStatType.INFO_COUNTER),
35+
COMB_TECHNIQUE_HARMONIC_PROCESSORS("comb_harmonic_mean_processors", "processors.search.hybrid", InfoStatType.INFO_COUNTER),
36+
// RRF processor
37+
RRF_PROCESSORS("rrf_processors", "processors.search.hybrid", InfoStatType.INFO_COUNTER),;
2738

2839
private final String nameString;
2940
private final String path;

src/main/java/org/opensearch/neuralsearch/stats/info/InfoStatsManager.java

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,17 @@
44
*/
55
package org.opensearch.neuralsearch.stats.info;
66

7+
import org.opensearch.neuralsearch.processor.NormalizationProcessor;
8+
import org.opensearch.neuralsearch.processor.RRFProcessor;
79
import org.opensearch.neuralsearch.processor.TextChunkingProcessor;
810
import org.opensearch.neuralsearch.processor.TextEmbeddingProcessor;
11+
import org.opensearch.neuralsearch.processor.combination.ArithmeticMeanScoreCombinationTechnique;
12+
import org.opensearch.neuralsearch.processor.combination.GeometricMeanScoreCombinationTechnique;
13+
import org.opensearch.neuralsearch.processor.combination.HarmonicMeanScoreCombinationTechnique;
14+
import org.opensearch.neuralsearch.processor.factory.NormalizationProcessorFactory;
15+
import org.opensearch.neuralsearch.processor.normalization.L2ScoreNormalizationTechnique;
16+
import org.opensearch.neuralsearch.processor.normalization.MinMaxScoreNormalizationTechnique;
17+
import org.opensearch.neuralsearch.processor.normalization.ZScoreNormalizationTechnique;
918
import org.opensearch.neuralsearch.processor.chunker.DelimiterChunker;
1019
import org.opensearch.neuralsearch.processor.chunker.FixedTokenLengthChunker;
1120
import org.opensearch.neuralsearch.settings.NeuralSearchSettingsAccessor;
@@ -24,6 +33,7 @@
2433
*/
2534
public class InfoStatsManager {
2635
public static final String PROCESSORS_KEY = "processors";
36+
public static final String PHASE_RESULTS_PROCESSORS_KEY = "phase_results_processors";
2737

2838
private final NeuralSearchClusterUtil neuralSearchClusterUtil;
2939
private final NeuralSearchSettingsAccessor settingsAccessor;
@@ -83,6 +93,9 @@ private Map<InfoStatName, CountableInfoStatSnapshot> getCountableStats() {
8393
// Parses ingest pipeline processor configs for processor info
8494
addIngestProcessorStats(countableInfoStats);
8595

96+
// Parses search pipeline processor configs for processor info
97+
addSearchProcessorStats(countableInfoStats);
98+
8699
// Helpers to parse search pipeline processor configs for processor info would go here
87100
return countableInfoStats;
88101
}
@@ -162,6 +175,81 @@ private void countTextChunkingProcessorStats(Map<InfoStatName, CountableInfoStat
162175
}
163176
}
164177

178+
/**
179+
* Adds search processor info stats, mutating the input
180+
* @param stats mutable map of info stats that the result will be added to
181+
*/
182+
private void addSearchProcessorStats(Map<InfoStatName, CountableInfoStatSnapshot> stats) {
183+
List<Map<String, Object>> pipelineConfigs = pipelineServiceUtil.getSearchPipelineConfigs();
184+
185+
// Iterate through all search processors and count their stats individually by calling helpers
186+
for (Map<String, Object> pipelineConfig : pipelineConfigs) {
187+
// Search phase results processors
188+
List<Map<String, Object>> phaseResultsProcessors = asListOfMaps(pipelineConfig.get(PHASE_RESULTS_PROCESSORS_KEY));
189+
for (Map<String, Object> phaseResultsProcessor : phaseResultsProcessors) {
190+
for (Map.Entry<String, Object> entry : phaseResultsProcessor.entrySet()) {
191+
String processorType = entry.getKey();
192+
Map<String, Object> processorConfig = asMap(entry.getValue());
193+
switch (processorType) {
194+
case NormalizationProcessor.TYPE:
195+
countNormalizationProcessorStats(stats, processorConfig);
196+
break;
197+
case RRFProcessor.TYPE:
198+
countRRFProcessorStats(stats, processorConfig);
199+
break;
200+
}
201+
}
202+
}
203+
}
204+
}
205+
206+
private void countNormalizationProcessorStats(Map<InfoStatName, CountableInfoStatSnapshot> stats, Map<String, Object> processorConfig) {
207+
increment(stats, InfoStatName.NORMALIZATION_PROCESSORS);
208+
209+
String normalizationTechnique = asString(
210+
asMap(processorConfig.get(NormalizationProcessorFactory.NORMALIZATION_CLAUSE)).get(NormalizationProcessorFactory.TECHNIQUE)
211+
);
212+
String combinationTechnique = asString(
213+
asMap(processorConfig.get(NormalizationProcessorFactory.COMBINATION_CLAUSE)).get(NormalizationProcessorFactory.TECHNIQUE)
214+
);
215+
216+
countNormalizationTechniqueStats(stats, normalizationTechnique);
217+
countCombinationTechniqueStats(stats, combinationTechnique);
218+
}
219+
220+
private void countRRFProcessorStats(Map<InfoStatName, CountableInfoStatSnapshot> stats, Map<String, Object> processorConfig) {
221+
increment(stats, InfoStatName.RRF_PROCESSORS);
222+
223+
// RRF only has combination technique
224+
String combinationTechnique = asString(
225+
asMap(processorConfig.get(NormalizationProcessorFactory.COMBINATION_CLAUSE)).get(NormalizationProcessorFactory.TECHNIQUE)
226+
);
227+
228+
countCombinationTechniqueStats(stats, combinationTechnique);
229+
}
230+
231+
private void countNormalizationTechniqueStats(Map<InfoStatName, CountableInfoStatSnapshot> stats, String normalizationTechnique) {
232+
switch (normalizationTechnique) {
233+
case L2ScoreNormalizationTechnique.TECHNIQUE_NAME -> increment(stats, InfoStatName.NORM_TECHNIQUE_L2_PROCESSORS);
234+
case MinMaxScoreNormalizationTechnique.TECHNIQUE_NAME -> increment(stats, InfoStatName.NORM_TECHNIQUE_MINMAX_PROCESSORS);
235+
case ZScoreNormalizationTechnique.TECHNIQUE_NAME -> increment(stats, InfoStatName.NORM_TECHNIQUE_ZSCORE_PROCESSORS);
236+
}
237+
}
238+
239+
private void countCombinationTechniqueStats(Map<InfoStatName, CountableInfoStatSnapshot> stats, String combinationTechnique) {
240+
switch (combinationTechnique) {
241+
case ArithmeticMeanScoreCombinationTechnique.TECHNIQUE_NAME -> increment(
242+
stats,
243+
InfoStatName.COMB_TECHNIQUE_ARITHMETIC_PROCESSORS
244+
);
245+
case GeometricMeanScoreCombinationTechnique.TECHNIQUE_NAME -> increment(
246+
stats,
247+
InfoStatName.COMB_TECHNIQUE_GEOMETRIC_PROCESSORS
248+
);
249+
case HarmonicMeanScoreCombinationTechnique.TECHNIQUE_NAME -> increment(stats, InfoStatName.COMB_TECHNIQUE_HARMONIC_PROCESSORS);
250+
}
251+
}
252+
165253
/**
166254
* Increments a countable info stat in the given stat name
167255
* @param stats map containing the stat to increment
@@ -203,6 +291,17 @@ private Map<String, Object> asMap(Object value) {
203291
return value instanceof Map ? (Map<String, Object>) value : null;
204292
}
205293

294+
/**
295+
* Helper to cast generic object into String or null
296+
* Used to parse pipeline processor configs
297+
* @param value the object
298+
* @return the string or null if not a string
299+
*/
300+
@SuppressWarnings("unchecked")
301+
private String asString(Object value) {
302+
return value instanceof String ? (String) value : null;
303+
}
304+
206305
/**
207306
* Helper to cast generic object into a list of Map<String, Object>
208307
* Used to parse pipeline processor configs

0 commit comments

Comments
 (0)