Skip to content

Commit 6f42e8b

Browse files
authored
Backport sorted BytesRef to TermInSet #17714 to 2.x (#17916)
* Pass in order terms as sorted to TermInSetQuery() (#17714) * pass in order terms as sorted to TermInSetQuery() Signed-off-by: Mikhail Khludnev <[email protected]> * slightly more elegant solution Signed-off-by: Mikhail Khludnev <[email protected]> * Attempting mocking TermInSetQ constructor. Signed-off-by: Mikhail Khludnev <[email protected]> * Handle ids as well. Signed-off-by: Mikhail Khludnev <[email protected]> * forbidden api Signed-off-by: Mikhail Khludnev <[email protected]> * make unnecessary method slow but correct. Signed-off-by: Mikhail Khludnev <[email protected]> * make unnecessary method slow but correct. Signed-off-by: Mikhail Khludnev <[email protected]> * Polish test coverage Signed-off-by: Mikhail Khludnev <[email protected]> * CHANGELOG.md Signed-off-by: Mikhail Khludnev <[email protected]> * assertThrows Signed-off-by: Mikhail Khludnev <[email protected]> * spotlessApply Signed-off-by: Mikhail Khludnev <[email protected]> * coverage tests and refactoring Signed-off-by: Mikhail Khludnev <[email protected]> * javadoc Signed-off-by: Mikhail Khludnev <[email protected]> * javadoc Signed-off-by: Mikhail Khludnev <[email protected]> * mark nocommit Signed-off-by: Mikhail Khludnev <[email protected]> * one more nocommit test Signed-off-by: Mikhail Khludnev <[email protected]> * forbidden api Signed-off-by: Mikhail Khludnev <[email protected]> * no commit for out of line tests Signed-off-by: Mikhail Khludnev <[email protected]> * Review Signed-off-by: Mikhail Khludnev <[email protected]> --------- Signed-off-by: Mikhail Khludnev <[email protected]> Signed-off-by: Mikhail Khludnev <[email protected]> * Fix test Signed-off-by: Mikhail Khludnev <[email protected]> * Fix BytesRefsCollectionBuilderTests.testBuildSortedNotSorted Followup for #17714: Remove redundant tests (#17902) * Remove redundant tests Signed-off-by: Mikhail Khludnev <[email protected]> * Fix empty collection test Signed-off-by: Mikhail Khludnev <[email protected]> --------- Signed-off-by: Mikhail Khludnev <[email protected]> --------- Signed-off-by: Mikhail Khludnev <[email protected]> Signed-off-by: Mikhail Khludnev <[email protected]>
1 parent b8df3de commit 6f42e8b

File tree

7 files changed

+313
-19
lines changed

7 files changed

+313
-19
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1212
- [Rule Based Auto-tagging] Add in-memory attribute value store ([#17342](https://github.com/opensearch-project/OpenSearch/pull/17342))
1313
- [Rule Based Auto-tagging] Add in-memory rule processing service ([#17365](https://github.com/opensearch-project/OpenSearch/pull/17365))
1414
- Change priority for scheduling reroute during timeout([#16445](https://github.com/opensearch-project/OpenSearch/pull/16445))
15+
- Faster `terms_query` with already sorted terms ([#17714](https://github.com/opensearch-project/OpenSearch/pull/17714))
1516

1617
### Dependencies
1718
- Bump `dnsjava:dnsjava` from 3.6.2 to 3.6.3 ([#17231](https://github.com/opensearch-project/OpenSearch/pull/17231))
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.mapper;
10+
11+
import org.apache.lucene.search.TermInSetQuery;
12+
import org.apache.lucene.util.BytesRef;
13+
14+
import java.util.AbstractSet;
15+
import java.util.ArrayList;
16+
import java.util.Collection;
17+
import java.util.Comparator;
18+
import java.util.Iterator;
19+
import java.util.List;
20+
import java.util.SortedSet;
21+
import java.util.function.Consumer;
22+
import java.util.function.Function;
23+
import java.util.function.Supplier;
24+
25+
/**
26+
* Purposed for passing terms into {@link TermInSetQuery}.
27+
* If the given terms are sorted already, it wrap it with a SortedSet stub.
28+
* Otherwise, it passes terms as list.
29+
*/
30+
public class BytesRefsCollectionBuilder implements Consumer<BytesRef>, Supplier<Collection<BytesRef>> {
31+
32+
/**
33+
* Strategy for building BytesRef collection.
34+
* */
35+
protected interface ConsumerStrategy extends Function<BytesRef, ConsumerStrategy>, Supplier<Collection<BytesRef>> {}
36+
37+
public BytesRefsCollectionBuilder(int sizeExpected) {
38+
terms = new ArrayList<>(sizeExpected);
39+
}
40+
41+
protected final List<BytesRef> terms;
42+
protected ConsumerStrategy delegate = createStartStrategy();
43+
44+
@Override
45+
public void accept(BytesRef bytesRef) {
46+
delegate = delegate.apply(bytesRef);
47+
}
48+
49+
@Override
50+
public Collection<BytesRef> get() {
51+
Collection<BytesRef> result = delegate.get();
52+
delegate = createFrozenStrategy(result);
53+
return result;
54+
}
55+
56+
protected ConsumerStrategy createStartStrategy() {
57+
return new ConsumerStrategy() {
58+
@Override
59+
public ConsumerStrategy apply(BytesRef firstBytes) {
60+
terms.add(firstBytes); // firstly, just store
61+
return createSortedStrategy(firstBytes);
62+
}
63+
64+
@Override
65+
public Collection<BytesRef> get() {
66+
return terms; // empty list
67+
}
68+
};
69+
}
70+
71+
protected ConsumerStrategy createSortedStrategy(BytesRef firstBytes) {
72+
return new ConsumerStrategy() {
73+
BytesRef prev = firstBytes;
74+
75+
@Override
76+
public ConsumerStrategy apply(BytesRef bytesRef) {
77+
terms.add(bytesRef);
78+
if (bytesRef.compareTo(prev) >= 0) { // keep checking sorted
79+
prev = bytesRef;
80+
return this;
81+
} else { // isn't sorted
82+
return createUnsortedStrategy();
83+
}
84+
}
85+
86+
@Override
87+
public Collection<BytesRef> get() {
88+
return new SortedBytesSet(terms);
89+
}
90+
};
91+
}
92+
93+
protected ConsumerStrategy createUnsortedStrategy() {
94+
return new ConsumerStrategy() {
95+
@Override
96+
public ConsumerStrategy apply(BytesRef bytesRef) { // just storing
97+
terms.add(bytesRef);
98+
return this;
99+
}
100+
101+
@Override
102+
public Collection<BytesRef> get() {
103+
return terms;
104+
}
105+
};
106+
}
107+
108+
protected ConsumerStrategy createFrozenStrategy(Collection<BytesRef> result) {
109+
return new ConsumerStrategy() {
110+
111+
@Override
112+
public ConsumerStrategy apply(BytesRef bytesRef) {
113+
throw new IllegalStateException("already build");
114+
}
115+
116+
@Override
117+
public Collection<BytesRef> get() {
118+
return result;
119+
}
120+
};
121+
}
122+
123+
/**
124+
* {@link SortedSet<BytesRef>} for passing into TermInSetQuery()
125+
* */
126+
protected static class SortedBytesSet extends AbstractSet<BytesRef> implements SortedSet<BytesRef> {
127+
128+
private final List<BytesRef> bytesRefs;
129+
130+
public SortedBytesSet(List<BytesRef> bytesRefs) {
131+
this.bytesRefs = bytesRefs;
132+
}
133+
134+
@Override
135+
public Iterator<BytesRef> iterator() {
136+
return bytesRefs.iterator();
137+
}
138+
139+
@Override
140+
public int size() {
141+
return bytesRefs.size();
142+
}
143+
144+
@Override
145+
public Comparator<? super BytesRef> comparator() {
146+
return null;
147+
}
148+
149+
@Override
150+
public SortedSet<BytesRef> subSet(BytesRef fromElement, BytesRef toElement) {
151+
throw new UnsupportedOperationException();
152+
}
153+
154+
@Override
155+
public SortedSet<BytesRef> headSet(BytesRef toElement) {
156+
throw new UnsupportedOperationException();
157+
}
158+
159+
@Override
160+
public SortedSet<BytesRef> tailSet(BytesRef fromElement) {
161+
throw new UnsupportedOperationException();
162+
}
163+
164+
@Override
165+
public BytesRef first() {
166+
throw new UnsupportedOperationException();
167+
}
168+
169+
@Override
170+
public BytesRef last() {
171+
throw new UnsupportedOperationException();
172+
}
173+
174+
/**
175+
* Dedicated for {@link TermInSetQuery#TermInSetQuery(String, Collection)}.
176+
*/
177+
@Override
178+
public <T> T[] toArray(T[] a) {
179+
return bytesRefs.toArray(a);
180+
}
181+
}
182+
}

server/src/main/java/org/opensearch/index/mapper/IdFieldMapper.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,15 +163,15 @@ public Query existsQuery(QueryShardContext context) {
163163
@Override
164164
public Query termsQuery(List<?> values, QueryShardContext context) {
165165
failIfNotIndexed();
166-
BytesRef[] bytesRefs = new BytesRef[values.size()];
167-
for (int i = 0; i < bytesRefs.length; i++) {
166+
BytesRefsCollectionBuilder bytesRefs = new BytesRefsCollectionBuilder(values.size());
167+
for (int i = 0; i < values.size(); i++) {
168168
Object idObject = values.get(i);
169169
if (idObject instanceof BytesRef) {
170170
idObject = ((BytesRef) idObject).utf8ToString();
171171
}
172-
bytesRefs[i] = Uid.encodeId(idObject.toString());
172+
bytesRefs.accept(Uid.encodeId(idObject.toString()));
173173
}
174-
return new TermInSetQuery(name(), bytesRefs);
174+
return new TermInSetQuery(name(), bytesRefs.get());
175175
}
176176

177177
@Override

server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -447,23 +447,26 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
447447
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
448448
return super.termsQuery(values, context);
449449
}
450-
BytesRef[] iBytesRefs = new BytesRef[values.size()];
451-
BytesRef[] dVByteRefs = new BytesRef[values.size()];
452-
for (int i = 0; i < iBytesRefs.length; i++) {
453-
iBytesRefs[i] = indexedValueForSearch(values.get(i));
454-
dVByteRefs[i] = indexedValueForSearch(rewriteForDocValue(values.get(i)));
450+
BytesRefsCollectionBuilder iBytesRefs = new BytesRefsCollectionBuilder(values.size());
451+
BytesRefsCollectionBuilder dVByteRefs = new BytesRefsCollectionBuilder(values.size());
452+
for (int i = 0; i < values.size(); i++) {
453+
BytesRef idxBytes = indexedValueForSearch(values.get(i));
454+
iBytesRefs.accept(idxBytes);
455+
BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(values.get(i)));
456+
dVByteRefs.accept(dvBytes);
455457
}
456-
Query indexQuery = new TermInSetQuery(name(), iBytesRefs);
457-
Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), dVByteRefs);
458+
Query indexQuery = new TermInSetQuery(name(), iBytesRefs.get());
459+
Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), dVByteRefs.get());
458460
return new IndexOrDocValuesQuery(indexQuery, dvQuery);
459461
}
460462
// if we only have doc_values enabled, we construct a new query with doc_values re-written
461463
if (hasDocValues()) {
462-
BytesRef[] bytesRefs = new BytesRef[values.size()];
463-
for (int i = 0; i < bytesRefs.length; i++) {
464-
bytesRefs[i] = indexedValueForSearch(rewriteForDocValue(values.get(i)));
464+
BytesRefsCollectionBuilder bytesCollector = new BytesRefsCollectionBuilder(values.size());
465+
for (int i = 0; i < values.size(); i++) {
466+
BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(values.get(i)));
467+
bytesCollector.accept(dvBytes);
465468
}
466-
return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesRefs);
469+
return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesCollector.get());
467470
}
468471
// has index enabled, we're going to return the query as is
469472
return super.termsQuery(values, context);

server/src/main/java/org/opensearch/index/mapper/TermBasedFieldType.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,12 @@ public Query termQuery(Object value, QueryShardContext context) {
9393
@Override
9494
public Query termsQuery(List<?> values, QueryShardContext context) {
9595
failIfNotIndexed();
96-
BytesRef[] bytesRefs = new BytesRef[values.size()];
97-
for (int i = 0; i < bytesRefs.length; i++) {
98-
bytesRefs[i] = indexedValueForSearch(values.get(i));
96+
BytesRefsCollectionBuilder bytesCollector = new BytesRefsCollectionBuilder(values.size());
97+
for (int i = 0; i < values.size(); i++) {
98+
BytesRef elem = indexedValueForSearch(values.get(i));
99+
bytesCollector.accept(elem);
99100
}
100-
return new TermInSetQuery(name(), bytesRefs);
101+
return new TermInSetQuery(name(), bytesCollector.get());
101102
}
102103

103104
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.mapper;
10+
11+
import org.apache.lucene.util.BytesRef;
12+
import org.opensearch.test.OpenSearchTestCase;
13+
14+
import java.util.Arrays;
15+
import java.util.Collection;
16+
import java.util.Iterator;
17+
import java.util.List;
18+
import java.util.SortedSet;
19+
import java.util.stream.Collectors;
20+
import java.util.stream.Stream;
21+
22+
public class BytesRefsCollectionBuilderTests extends OpenSearchTestCase {
23+
24+
public void testBuildSortedNotSorted() {
25+
String[] seedStrings = generateRandomStringArray(10, 10, false, true);
26+
List<BytesRef> bytesRefList = Arrays.stream(seedStrings).map(BytesRef::new).collect(Collectors.toList());
27+
List<BytesRef> sortedBytesRefs = bytesRefList.stream().sorted().collect(Collectors.toList());
28+
29+
Collection<BytesRef> sortedSet = assertCollectionBuilt(sortedBytesRefs);
30+
assertCollectionBuilt(bytesRefList);
31+
32+
assertTrue(sortedSet.isEmpty() || sortedSet instanceof SortedSet);
33+
if (!sortedSet.isEmpty()) {
34+
assertNull(((SortedSet<BytesRef>) sortedSet).comparator());
35+
}
36+
}
37+
38+
public void testBuildFooBar() {
39+
String[] reverseOrderStrings = new String[] { "foo", "bar" };
40+
List<BytesRef> bytesRefList = Arrays.stream(reverseOrderStrings).map(BytesRef::new).collect(Collectors.toList());
41+
List<BytesRef> sortedBytesRefs = bytesRefList.stream().sorted().collect(Collectors.toList());
42+
43+
Collection<BytesRef> sortedSet = assertCollectionBuilt(sortedBytesRefs);
44+
Collection<BytesRef> reverseList = assertCollectionBuilt(bytesRefList);
45+
46+
assertTrue(sortedSet instanceof SortedSet);
47+
assertNull(((SortedSet<BytesRef>) sortedSet).comparator());
48+
49+
assertTrue(reverseList instanceof List);
50+
}
51+
52+
public void testFrozen() {
53+
BytesRefsCollectionBuilder builder = new BytesRefsCollectionBuilder(1);
54+
String[] seedStrings = generateRandomStringArray(5, 10, false, true);
55+
Arrays.stream(seedStrings).map(BytesRef::new).forEachOrdered(builder);
56+
Collection<BytesRef> bytesRefCollection = builder.get();
57+
assertNotNull(bytesRefCollection);
58+
assertEquals(seedStrings.length, bytesRefCollection.size());
59+
assertThrows(IllegalStateException.class, () -> builder.accept(new BytesRef("illegal state")));
60+
assertSame(bytesRefCollection, builder.get());
61+
}
62+
63+
private static Collection<BytesRef> assertCollectionBuilt(List<BytesRef> sortedBytesRefs) {
64+
BytesRefsCollectionBuilder builder = new BytesRefsCollectionBuilder(1);
65+
sortedBytesRefs.stream().forEachOrdered(builder);
66+
Collection<BytesRef> bytesRefCollection = builder.get();
67+
assertEquals(bytesRefCollection.size(), sortedBytesRefs.size());
68+
for (Iterator<BytesRef> iterator = bytesRefCollection.iterator(), iterator2 = sortedBytesRefs.iterator(); iterator.hasNext()
69+
|| iterator2.hasNext();) {
70+
assertTrue(iterator.next().bytesEquals(iterator2.next()));
71+
}
72+
return bytesRefCollection;
73+
}
74+
75+
public void testCoverUnsupported() {
76+
BytesRefsCollectionBuilder builder = new BytesRefsCollectionBuilder(1);
77+
Stream.of("in", "order").map(BytesRef::new).forEachOrdered(builder);
78+
SortedSet<BytesRef> bytesRefCollection = (SortedSet<BytesRef>) builder.get();
79+
assertThrows(UnsupportedOperationException.class, () -> bytesRefCollection.subSet(new BytesRef("a"), new BytesRef("z")));
80+
assertThrows(UnsupportedOperationException.class, () -> bytesRefCollection.headSet(new BytesRef("a")));
81+
assertThrows(UnsupportedOperationException.class, () -> bytesRefCollection.tailSet(new BytesRef("a")));
82+
assertThrows(UnsupportedOperationException.class, bytesRefCollection::first);
83+
assertThrows(UnsupportedOperationException.class, bytesRefCollection::last);
84+
}
85+
}

server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
import java.util.Collections;
8282
import java.util.List;
8383
import java.util.Map;
84+
import java.util.stream.Collectors;
8485

8586
public class KeywordFieldTypeTests extends FieldTypeTestCase {
8687

@@ -198,6 +199,27 @@ public void testTermsQuery() {
198199
);
199200
}
200201

202+
public void testTermsSortedQuery() {
203+
String[] seedStrings = generateRandomStringArray(10, 10, false, true);
204+
List<BytesRef> bytesRefList = Arrays.stream(seedStrings).map(BytesRef::new).collect(Collectors.toList());
205+
List<String> sortedStrings = bytesRefList.stream().sorted().map(BytesRef::utf8ToString).collect(Collectors.toList());
206+
207+
MappedFieldType ft = new KeywordFieldType("field");
208+
Query expected = new IndexOrDocValuesQuery(
209+
new TermInSetQuery("field", bytesRefList),
210+
new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", bytesRefList)
211+
);
212+
assertEquals(expected, ft.termsQuery(sortedStrings, MOCK_QSC_ENABLE_INDEX_DOC_VALUES));
213+
214+
MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap());
215+
Query expectedIndex = new TermInSetQuery("field", bytesRefList);
216+
assertEquals(expectedIndex, onlyIndexed.termsQuery(sortedStrings, null));
217+
218+
MappedFieldType onlyDocValues = new KeywordFieldType("field", false, true, Collections.emptyMap());
219+
Query expectedDocValues = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", bytesRefList);
220+
assertEquals(expectedDocValues, onlyDocValues.termsQuery(sortedStrings, null));
221+
}
222+
201223
public void testExistsQuery() {
202224
{
203225
KeywordFieldType ft = new KeywordFieldType("field");

0 commit comments

Comments
 (0)