Skip to content

Commit 132afc2

Browse files
authored
feat: allow sentence hits to be returned directly (quickwit-oss#153)
1 parent c83bc8d commit 132afc2

File tree

4 files changed

+38
-5
lines changed

4 files changed

+38
-5
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/searcher_frame_document.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,13 @@ impl SearchResult {
4444
Ok(s)
4545
}
4646

47+
#[getter]
48+
fn unique_sentences(&self, py: Python) -> PyResult<BTreeSet<u64>> {
49+
let s =
50+
BTreeSet::from_iter(self.hits.iter().map(|(d, f, s, score)| *s));
51+
Ok(s)
52+
}
53+
4754
#[getter]
4855
fn unique_docs_frames(&self, py: Python) -> PyResult<BTreeSet<(u64, u64)>> {
4956
let s = BTreeSet::from_iter(
@@ -70,6 +77,27 @@ impl SearchResult {
7077
}
7178
Ok((v1, v2))
7279
}
80+
81+
/// This is an optimization to allow Python callers to obtain vectors
82+
/// without having to do iteration to get them.
83+
#[getter]
84+
fn unique_docs_frames_sentences_unzipped(
85+
&self,
86+
py: Python,
87+
) -> PyResult<(Vec<u64>, Vec<u64>, Vec<u64>)> {
88+
let s = BTreeSet::from_iter(
89+
self.hits.iter().map(|(d, f, s, score)| (*d, *f, *s)),
90+
);
91+
let mut v1 = Vec::with_capacity(s.len());
92+
let mut v2 = Vec::with_capacity(s.len());
93+
let mut v3 = Vec::with_capacity(s.len());
94+
for (d, f, s) in s.into_iter() {
95+
v1.push(d);
96+
v2.push(f);
97+
v3.push(s);
98+
}
99+
Ok((v1, v2, v3))
100+
}
73101
}
74102

75103
#[pymethods]

tests/tantivy_kapiche_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def create_index(dir=None):
3434
# assume all tests will use the same documents for now
3535
# other methods may set up function-local indexes
3636
index = Index(schema(), dir)
37-
writer = index.writer()
37+
writer = index.writer(15_000_000, 1)
3838

3939
# 2 ways of adding documents
4040
# 1
@@ -179,7 +179,7 @@ def create_kapiche_index(dir=None):
179179
# assume all tests will use the same documents for now
180180
# other methods may set up function-local indexes
181181
index = Index(kapiche_schema(), dir)
182-
writer = index.writer()
182+
writer = index.writer(15_000_000, 1)
183183

184184
# 2 ways of adding documents
185185
# 1
@@ -513,7 +513,7 @@ def test_create_readers(self):
513513
class TestSearcher(object):
514514
def test_searcher_repr(self, ram_index, ram_index_numeric_fields):
515515
assert ram_index.searcher().num_docs == 3
516-
assert ram_index.searcher().num_segments == 2
516+
assert ram_index.searcher().num_segments == 1
517517
assert (
518518
repr(ram_index_numeric_fields.searcher())
519519
== "Searcher(num_docs=2, num_segments=2)"

tests/test_stat_collector.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,10 @@ def test_stat_searcher_filter(self, ram_kapiche_index):
7878
assert sorted(result.unique_docs_frames) == [(1, 1), (3, 5)]
7979
assert list(result.unique_docs) == [1, 3]
8080
assert list(result.unique_frames) == [1, 5]
81+
assert list(result.unique_sentences) == [1, 2, 7]
8182
print(f"{result.hits}")
8283
print(f"{result.unique_docs_frames}")
84+
print(f"{result.unique_sentences}")
8385

8486
def test_stat_searcher_filter_unzipped(self, ram_kapiche_index):
8587
index = ram_kapiche_index
@@ -92,6 +94,9 @@ def test_stat_searcher_filter_unzipped(self, ram_kapiche_index):
9294
assert list(result.unique_docs) == [1, 3]
9395
assert list(result.unique_frames) == [1, 5]
9496

97+
d, f, s = result.unique_docs_frames_sentences_unzipped
98+
assert sorted(s) == [1, 2, 7]
99+
95100

96101
def test_stat_searcher_memory():
97102
# Create index

0 commit comments

Comments
 (0)