Skip to content

Commit c967d83

Browse files
author
Shailesh Singh
committed
Fix Bug - handle unsigned long in assertion of LongHashSet
Signed-off-by: Shailesh Singh <[email protected]>
1 parent 2847695 commit c967d83

File tree

2 files changed

+141
-3
lines changed

2 files changed

+141
-3
lines changed
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.apache.lucene.util;
10+
11+
import org.apache.lucene.util.packed.PackedInts;
12+
import org.opensearch.common.Numbers;
13+
14+
import java.util.Arrays;
15+
import java.util.Objects;
16+
import java.util.stream.Collectors;
17+
import java.util.stream.LongStream;
18+
19+
/** Set of longs, optimized for docvalues usage */
20+
public final class UnsignedLongHashSet implements Accountable {
21+
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(UnsignedLongHashSet.class);
22+
23+
private static final long MISSING = Numbers.MIN_UNSIGNED_LONG_VALUE_AS_LONG;
24+
25+
final long[] table;
26+
final int mask;
27+
final boolean hasMissingValue;
28+
final int size;
29+
/** minimum value in the set, or Long.MAX_VALUE for an empty set */
30+
public final long minValue;
31+
/** maximum value in the set, or Long.MIN_VALUE for an empty set */
32+
public final long maxValue;
33+
34+
/** Construct a set. Values must be in sorted order. */
35+
public UnsignedLongHashSet(long[] values) {
36+
int tableSize = Math.toIntExact(values.length * 3L / 2);
37+
tableSize = 1 << PackedInts.bitsRequired(tableSize); // make it a power of 2
38+
assert tableSize >= values.length * 3L / 2;
39+
table = new long[tableSize];
40+
Arrays.fill(table, MISSING);
41+
mask = tableSize - 1;
42+
boolean hasMissingValue = false;
43+
int size = 0;
44+
long previousValue = 0; // for assert
45+
for (long value : values) {
46+
if (value == MISSING) {
47+
size += hasMissingValue ? 0 : 1;
48+
hasMissingValue = true;
49+
} else if (add(value)) {
50+
++size;
51+
}
52+
assert Long.compareUnsigned(value, previousValue) >= 0 : " values must be provided in sorted order";
53+
previousValue = value;
54+
}
55+
this.hasMissingValue = hasMissingValue;
56+
this.size = size;
57+
this.minValue = values.length == 0 ? Numbers.MAX_UNSIGNED_LONG_VALUE_AS_LONG : values[0];
58+
this.maxValue = values.length == 0 ? Numbers.MIN_UNSIGNED_LONG_VALUE_AS_LONG : values[values.length - 1];
59+
}
60+
61+
private boolean add(long l) {
62+
assert l != MISSING;
63+
final int slot = Long.hashCode(l) & mask;
64+
for (int i = slot;; i = (i + 1) & mask) {
65+
if (table[i] == MISSING) {
66+
table[i] = l;
67+
return true;
68+
} else if (table[i] == l) {
69+
// already added
70+
return false;
71+
}
72+
}
73+
}
74+
75+
/**
76+
* check for membership in the set.
77+
*
78+
* <p>You should use {@link #minValue} and {@link #maxValue} to guide/terminate iteration before
79+
* calling this.
80+
*/
81+
public boolean contains(long l) {
82+
if (l == MISSING) {
83+
return hasMissingValue;
84+
}
85+
final int slot = Long.hashCode(l) & mask;
86+
for (int i = slot;; i = (i + 1) & mask) {
87+
if (table[i] == MISSING) {
88+
return false;
89+
} else if (table[i] == l) {
90+
return true;
91+
}
92+
}
93+
}
94+
95+
/** returns a stream of all values contained in this set */
96+
LongStream stream() {
97+
LongStream stream = Arrays.stream(table).filter(v -> v != MISSING);
98+
if (hasMissingValue) {
99+
stream = LongStream.concat(LongStream.of(MISSING), stream);
100+
}
101+
return stream;
102+
}
103+
104+
@Override
105+
public int hashCode() {
106+
return Objects.hash(size, minValue, maxValue, mask, hasMissingValue, Arrays.hashCode(table));
107+
}
108+
109+
@Override
110+
public boolean equals(Object obj) {
111+
if (obj != null && obj instanceof UnsignedLongHashSet) {
112+
UnsignedLongHashSet that = (UnsignedLongHashSet) obj;
113+
return size == that.size
114+
&& minValue == that.minValue
115+
&& maxValue == that.maxValue
116+
&& mask == that.mask
117+
&& hasMissingValue == that.hasMissingValue
118+
&& Arrays.equals(table, that.table);
119+
}
120+
return false;
121+
}
122+
123+
@Override
124+
public String toString() {
125+
return stream().mapToObj(String::valueOf).collect(Collectors.joining(", ", "[", "]"));
126+
}
127+
128+
/** number of elements in the set */
129+
public int size() {
130+
return size;
131+
}
132+
133+
@Override
134+
public long ramBytesUsed() {
135+
return BASE_RAM_BYTES + RamUsageEstimator.sizeOfObject(table);
136+
}
137+
}

server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
import org.apache.lucene.search.ScorerSupplier;
2626
import org.apache.lucene.search.TwoPhaseIterator;
2727
import org.apache.lucene.search.Weight;
28-
import org.apache.lucene.util.LongHashSet;
28+
29+
import org.apache.lucene.util.UnsignedLongHashSet;
2930

3031
import java.io.IOException;
3132
import java.math.BigInteger;
@@ -40,12 +41,12 @@
4041
public abstract class SortedUnsignedLongDocValuesSetQuery extends Query {
4142

4243
private final String field;
43-
private final LongHashSet numbers;
44+
private final UnsignedLongHashSet numbers;
4445

4546
SortedUnsignedLongDocValuesSetQuery(String field, BigInteger[] numbers) {
4647
this.field = Objects.requireNonNull(field);
4748
Arrays.sort(numbers);
48-
this.numbers = new LongHashSet(Arrays.stream(numbers).mapToLong(n -> n.longValue()).toArray());
49+
this.numbers = new UnsignedLongHashSet(Arrays.stream(numbers).mapToLong(n -> n.longValue()).toArray());
4950
}
5051

5152
@Override

0 commit comments

Comments
 (0)