opensearch-project
diff --git a/‎src/main/java/org/opensearch/neuralsearch/query/HybridBulkScorer.java
Lines changed: 175 additions & 0 deletions b/‎src/main/java/org/opensearch/neuralsearch/query/HybridBulkScorer.java
Lines changed: 175 additions & 0 deletions
diff --git a/‎src/main/java/org/opensearch/neuralsearch/query/HybridQueryDocIdStream.java
Lines changed: 62 additions & 0 deletions b/‎src/main/java/org/opensearch/neuralsearch/query/HybridQueryDocIdStream.java
Lines changed: 62 additions & 0 deletions
diff --git a/‎src/main/java/org/opensearch/neuralsearch/query/HybridQueryScorer.java
Lines changed: 2 additions & 2 deletions b/‎src/main/java/org/opensearch/neuralsearch/query/HybridQueryScorer.java
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/main/java/org/opensearch/neuralsearch/query/HybridQueryWeight.java
Lines changed: 17 additions & 4 deletions b/‎src/main/java/org/opensearch/neuralsearch/query/HybridQueryWeight.java
Lines changed: 17 additions & 4 deletions
diff --git a/‎src/main/java/org/opensearch/neuralsearch/query/HybridSubQueryScorer.java
Lines changed: 48 additions & 0 deletions b/‎src/main/java/org/opensearch/neuralsearch/query/HybridSubQueryScorer.java
Lines changed: 48 additions & 0 deletions
@@ -0,0 +1,175 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package org.opensearch.neuralsearch.query;
+
+import lombok.Getter;
+import org.apache.lucene.search.BulkScorer;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.LeafCollector;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.FixedBitSet;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Bulk scorer for hybrid query
+ */
+public class HybridBulkScorer extends BulkScorer {
+    private static final int SHIFT = 10;
+    private static final int WINDOW_SIZE = 1 << SHIFT;
+    private static final int MASK = WINDOW_SIZE - 1;
+
+    private final long cost;
+    private final Scorer[] disiWrappers;
+    @Getter
+    private final HybridSubQueryScorer hybridSubQueryScorer;
+    private final boolean needsScores;
+    @Getter
+    private final FixedBitSet matching;
+    @Getter
+    private final float[][] windowScores;
+    private final HybridQueryDocIdStream hybridQueryDocIdStream;
+    private final int maxDoc;
+
+    public HybridBulkScorer(List<Scorer> scorers, boolean needsScores, int maxDoc) {
+        long cost = 0;
+        this.disiWrappers = new Scorer[scorers.size()];
+        for (int subQueryIndex = 0; subQueryIndex < scorers.size(); subQueryIndex++) {
+            Scorer scorer = scorers.get(subQueryIndex);
+            if (Objects.isNull(scorer)) {
+                continue;
+            }
+            cost += scorer.iterator().cost();
+            disiWrappers[subQueryIndex] = scorer;
+        }
+        this.cost = cost;
+        this.hybridSubQueryScorer = new HybridSubQueryScorer(scorers.size());
+        this.needsScores = needsScores;
+        this.matching = new FixedBitSet(WINDOW_SIZE);
+        this.windowScores = new float[disiWrappers.length][WINDOW_SIZE];
+        this.maxDoc = maxDoc;
+        this.hybridQueryDocIdStream = new HybridQueryDocIdStream(this);
+    }
+
+    @Override
+    public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
+        collector.setScorer(hybridSubQueryScorer);
+        // making sure we are not going over the global limit defined by maxDoc
+        max = Math.min(max, maxDoc);
+        int[] docsIds = advance(min, disiWrappers);
+        while (allDocIdsUsed(docsIds, max) == false) {
+            scoreWindow(collector, acceptDocs, min, max, docsIds);
+        }
+        return getNextDocIdCandidate(docsIds);
+    }
+
+    private void scoreWindow(LeafCollector collector, Bits acceptDocs, int min, int max, int[] docsIds) throws IOException {
+        // pick the lowest out of all not yet used doc ids
+        int topDoc = -1;
+        for (int docsId : docsIds) {
+            if (docsId < max) {
+                topDoc = docsId;
+                break;
+            }
+        }
+
+        final int windowBase = topDoc & ~MASK; // take the next match (at random) and find the window where it belongs
+        final int windowMin = Math.max(min, windowBase);
+        final int windowMax = Math.min(max, windowBase + WINDOW_SIZE);
+
+        scoreWindowIntoBitSetWithSubqueryScorers(collector, acceptDocs, max, docsIds, windowMin, windowMax, windowBase);
+    }
+
+    private void scoreWindowIntoBitSetWithSubqueryScorers(
+        LeafCollector collector,
+        Bits acceptDocs,
+        int max,
+        int[] docsIds,
+        int windowMin,
+        int windowMax,
+        int windowBase
+    ) throws IOException {
+        for (int i = 0; i < disiWrappers.length; i++) {
+            if (disiWrappers[i] == null || docsIds[i] >= max) {
+                continue;
+            }
+            DocIdSetIterator it = disiWrappers[i].iterator();
+            int doc = docsIds[i];
+            if (doc < windowMin) {
+                doc = it.advance(windowMin);
+            }
+            for (; doc < windowMax; doc = it.nextDoc()) {
+                if (Objects.isNull(acceptDocs) || acceptDocs.get(doc)) {
+                    int d = doc & MASK;
+                    if (needsScores) {
+                        float score = disiWrappers[i].score();
+                        if (score > hybridSubQueryScorer.getMinScores()[i]) {
+                            matching.set(d);
+                            windowScores[i][d] = score;
+                        }
+                    } else {
+                        matching.set(d);
+                    }
+                }
+            }
+            docsIds[i] = doc;
+        }
+
+        hybridQueryDocIdStream.setBase(windowBase);
+        collector.collect(hybridQueryDocIdStream);
+
+        matching.clear();
+
+        for (float[] windowScore : windowScores) {
+            Arrays.fill(windowScore, 0.0f);
+        }
+    }
+
+    private int[] advance(int min, Scorer[] scorers) throws IOException {
+        int[] docIds = new int[scorers.length];
+        for (int i = 0; i < scorers.length; i++) {
+            if (scorers[i] == null) {
+                docIds[i] = DocIdSetIterator.NO_MORE_DOCS;
+                continue;
+            }
+            DocIdSetIterator it = scorers[i].iterator();
+            int doc = it.docID();
+            if (doc < min) {
+                doc = it.advance(min);
+            }
+            docIds[i] = doc;
+        }
+        return docIds;
+    }
+
+    private boolean allDocIdsUsed(int[] docsIds, int max) {
+        for (int docId : docsIds) {
+            if (docId < max) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private int getNextDocIdCandidate(final int[] docsIds) {
+        int nextDoc = -1;
+        for (int doc : docsIds) {
+            if (doc != DocIdSetIterator.NO_MORE_DOCS) {
+                nextDoc = Math.max(nextDoc, doc);
+            }
+        }
+        return nextDoc == -1 ? DocIdSetIterator.NO_MORE_DOCS : nextDoc;
+    }
+
+    @Override
+    public long cost() {
+        return cost;
+    }
+
+}
@@ -0,0 +1,62 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package org.opensearch.neuralsearch.query;
+
+import lombok.RequiredArgsConstructor;
+import lombok.Setter;
+import org.apache.lucene.search.CheckedIntConsumer;
+import org.apache.lucene.search.DocIdStream;
+import org.apache.lucene.util.FixedBitSet;
+
+import java.io.IOException;
+import java.util.Objects;
+
+/**
+ * This class is used to create a DocIdStream for HybridQuery
+ */
+@RequiredArgsConstructor
+public class HybridQueryDocIdStream extends DocIdStream {
+    private final HybridBulkScorer hybridBulkScorer;
+    @Setter
+    private int base;
+
+    /**
+     * Iterate over all doc ids and collect each doc id with leaf collector
+     * @param consumer consumer that is called for each accepted doc id
+     * @throws IOException in case of IO exception
+     */
+    @Override
+    public void forEach(CheckedIntConsumer<IOException> consumer) throws IOException {
+        // bitset that represents matching documents, bit is set (1) if doc id is a match
+        FixedBitSet matchingBitSet = hybridBulkScorer.getMatching();
+        long[] bitArray = matchingBitSet.getBits();
+        // iterate through each block of 64 documents (since each long contains 64 bits)
+        for (int idx = 0; idx < bitArray.length; idx++) {
+            long bits = bitArray[idx];
+            while (bits != 0L) {
+                // find position of the rightmost set bit (1)
+                int numberOfTrailingZeros = Long.numberOfTrailingZeros(bits);
+                // calculate actual document ID within the window
+                // idx << 6 is equivalent to idx * 64 (block offset)
+                // numberOfTrailingZeros gives position within the block
+                final int docIndexInWindow = (idx << 6) | numberOfTrailingZeros;
+                float[][] windowScores = hybridBulkScorer.getWindowScores();
+                for (int subQueryIndex = 0; subQueryIndex < windowScores.length; subQueryIndex++) {
+                    if (Objects.isNull(windowScores[subQueryIndex])) {
+                        continue;
+                    }
+                    float scoreOfDocIdForSubQuery = windowScores[subQueryIndex][docIndexInWindow];
+                    hybridBulkScorer.getHybridSubQueryScorer().getSubQueryScores()[subQueryIndex] = scoreOfDocIdForSubQuery;
+                }
+                // process the document with its base offset
+                consumer.accept(base | docIndexInWindow);
+                // reset scores after processing of one doc, this is required because scorer object is re-used
+                hybridBulkScorer.getHybridSubQueryScorer().resetScores();
+                // reset bit for this doc id to indicate that it has been consumed
+                bits ^= 1L << numberOfTrailingZeros;
+            }
+        }
+    }
+}
@@ -75,7 +75,7 @@ public HybridQueryScorer(final Weight weight, final List<Scorer> subScorers) thr
                 sumMatchCost += w.matchCost * costWeight;
             }
         }
-        if (!hasApproximation) { // no sub scorer supports approximations
+        if (hasApproximation == false) { // no sub scorer supports approximations
             twoPhase = null;
         } else {
             final float matchCost = sumMatchCost / sumApproxCost;
@@ -284,7 +284,7 @@ public boolean matches() throws IOException {
                     wrapper.next = verifiedMatches;
                     verifiedMatches = wrapper;
 
-                    if (!needsScores) {
+                    if (needsScores == false) {
                         // we can stop here
                         return true;
                     }
 
@@ -15,6 +15,7 @@
 import lombok.Getter;
 import lombok.RequiredArgsConstructor;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.BulkScorer;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Matches;
@@ -37,7 +38,6 @@ public final class HybridQueryWeight extends Weight {
     // The Weights for our subqueries, in 1-1 correspondence
     @Getter(AccessLevel.PACKAGE)
     private final List<Weight> weights;
-
     private final ScoreMode scoreMode;
 
     /**
@@ -95,7 +95,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
         if (scorerSuppliers.isEmpty()) {
             return null;
         }
-        return new HybridScorerSupplier(scorerSuppliers, this, scoreMode);
+        return new HybridScorerSupplier(scorerSuppliers, this, scoreMode, context);
     }
 
     private Void addScoreSupplier(Weight weight, HybridQueryExecutorCollector<LeafReaderContext, ScorerSupplier> collector) {
@@ -145,7 +145,7 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
                 max = Math.max(max, score);
                 subsOnMatch.add(e);
             } else {
-                if (!match) {
+                if (match == false) {
                     subsOnNoMatch.add(e);
                 }
                 subsOnMatch.add(e);
@@ -161,10 +161,23 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
 
     @RequiredArgsConstructor
     static class HybridScorerSupplier extends ScorerSupplier {
+
+        @Override
+        public BulkScorer bulkScorer() throws IOException {
+            List<Scorer> scorers = new ArrayList<>();
+            for (Weight weight : weight.getWeights()) {
+                Scorer scorer = weight.scorer(context);
+                scorers.add(scorer);
+            }
+            return new HybridBulkScorer(scorers, scoreMode.needsScores(), context.reader().maxDoc());
+        }
+
         private long cost = -1;
+        @Getter
         private final List<ScorerSupplier> scorerSuppliers;
-        private final Weight weight;
+        private final HybridQueryWeight weight;
         private final ScoreMode scoreMode;
+        private final LeafReaderContext context;
 
         @Override
         public Scorer get(long leadCost) throws IOException {
 
@@ -0,0 +1,48 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package org.opensearch.neuralsearch.query;
+
+import lombok.Data;
+import org.apache.lucene.search.Scorable;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Scorer implementation for Hybrid Query. This object is light and expected to be re-used between different doc ids
+ */
+@Data
+public class HybridSubQueryScorer extends Scorable {
+    // array of scores from all sub-queries for a single doc id
+    private final float[] subQueryScores;
+    // array of min competitive scores, score is shard level
+    private final float[] minScores;
+
+    public HybridSubQueryScorer(int numOfSubQueries) {
+        this.minScores = new float[numOfSubQueries];
+        this.subQueryScores = new float[numOfSubQueries];
+    }
+
+    @Override
+    public float score() throws IOException {
+        // for scenarios when scorer is needed (like in aggregations) for one doc id return sum of sub-query scores
+        float totalScore = 0.0f;
+        for (float score : subQueryScores) {
+            totalScore += score;
+        }
+        return totalScore;
+    }
+
+    /**
+     * Reset sub-query scores to 0.0f so this scorer can be reused for next doc id
+     */
+    public void resetScores() {
+        Arrays.fill(subQueryScores, 0.0f);
+    }
+
+    public int getNumOfSubQueries() {
+        return subQueryScores.length;
+    }
+}
Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,7 @@ public HybridQueryScorer(final Weight weight, final List<Scorer> subScorers) thr`
`75`	`75`	`sumMatchCost += w.matchCost * costWeight;`
`76`	`76`	`}`
`77`	`77`	`}`
`78`		`- if (!hasApproximation) { // no sub scorer supports approximations`
	`78`	`+ if (hasApproximation == false) { // no sub scorer supports approximations`
`79`	`79`	`twoPhase = null;`
`80`	`80`	`} else {`
`81`	`81`	`final float matchCost = sumMatchCost / sumApproxCost;`
`@@ -284,7 +284,7 @@ public boolean matches() throws IOException {`
`284`	`284`	`wrapper.next = verifiedMatches;`
`285`	`285`	`verifiedMatches = wrapper;`
`286`	`286`
`287`		`- if (!needsScores) {`
	`287`	`+ if (needsScores == false) {`
`288`	`288`	`// we can stop here`
`289`	`289`	`return true;`
`290`	`290`	`}`