Skip to content

Commit ff51f61

Browse files
authored
Merge pull request #45 from saukymo/master
Bump tantivy version 0.17.0
2 parents 27340e9 + 7efd477 commit ff51f61

File tree

8 files changed

+267
-87
lines changed

8 files changed

+267
-87
lines changed

Cargo.toml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "tantivy"
3-
version = "0.16.0"
3+
version = "0.17.0"
44
readme = "README.md"
55
authors = ["Damir Jelić <[email protected]>"]
66
edition = "2018"
@@ -11,14 +11,15 @@ name = "tantivy"
1111
crate-type = ["cdylib"]
1212

1313
[build-dependencies]
14-
pyo3-build-config = "0.15.1"
14+
pyo3-build-config = "0.16.3"
1515

1616
[dependencies]
1717
chrono = "0.4.19"
18-
tantivy = "0.16.1"
19-
itertools = "0.10.0"
20-
futures = "0.3.5"
18+
tantivy = "0.17"
19+
itertools = "0.10.3"
20+
futures = "0.3.21"
21+
serde_json = "1.0.64"
2122

2223
[dependencies.pyo3]
23-
version = "0.15.1"
24+
version = "0.16.3"
2425
features = ["extension-module"]

src/document.rs

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,37 @@ use chrono::{offset::TimeZone, Datelike, Timelike, Utc};
1414
use tantivy as tv;
1515

1616
use crate::{facet::Facet, to_pyerr};
17-
use pyo3::{PyMappingProtocol, PyObjectProtocol};
18-
use std::{collections::BTreeMap, fmt};
17+
use serde_json::Value as JsonValue;
18+
use std::{
19+
collections::{BTreeMap, HashMap},
20+
fmt,
21+
};
1922
use tantivy::schema::Value;
2023

24+
fn value_to_object(val: &JsonValue, py: Python<'_>) -> PyObject {
25+
match val {
26+
JsonValue::Null => py.None(),
27+
JsonValue::Bool(b) => b.to_object(py),
28+
JsonValue::Number(n) => match n {
29+
n if n.is_i64() => n.as_i64().to_object(py),
30+
n if n.is_u64() => n.as_u64().to_object(py),
31+
n if n.is_f64() => n.as_f64().to_object(py),
32+
_ => panic!("number too large"),
33+
},
34+
JsonValue::String(s) => s.to_object(py),
35+
JsonValue::Array(v) => {
36+
let inner: Vec<_> =
37+
v.iter().map(|x| value_to_object(x, py)).collect();
38+
inner.to_object(py)
39+
}
40+
JsonValue::Object(m) => {
41+
let inner: HashMap<_, _> =
42+
m.iter().map(|(k, v)| (k, value_to_object(v, py))).collect();
43+
inner.to_object(py)
44+
}
45+
}
46+
}
47+
2148
fn value_to_py(py: Python, value: &Value) -> PyResult<PyObject> {
2249
Ok(match value {
2350
Value::Str(text) => text.into_py(py),
@@ -42,6 +69,13 @@ fn value_to_py(py: Python, value: &Value) -> PyResult<PyObject> {
4269
)?
4370
.into_py(py),
4471
Value::Facet(f) => Facet { inner: f.clone() }.into_py(py),
72+
Value::JsonObject(json_object) => {
73+
let inner: HashMap<_, _> = json_object
74+
.iter()
75+
.map(|(k, v)| (k, value_to_object(&v, py)))
76+
.collect();
77+
inner.to_object(py)
78+
}
4579
})
4680
}
4781

@@ -58,6 +92,9 @@ fn value_to_string(value: &Value) -> String {
5892
// TODO implement me
5993
unimplemented!();
6094
}
95+
Value::JsonObject(json_object) => {
96+
serde_json::to_string(&json_object).unwrap()
97+
}
6198
}
6299
}
63100

@@ -293,6 +330,17 @@ impl Document {
293330
add_value(self, field_name, bytes);
294331
}
295332

333+
/// Add a bytes value to the document.
334+
///
335+
/// Args:
336+
/// field_name (str): The field for which we are adding the bytes.
337+
/// value (str): The json object that will be added to the document.
338+
fn add_json(&mut self, field_name: String, json: &str) {
339+
let json_object: serde_json::Value =
340+
serde_json::from_str(json).unwrap();
341+
add_value(self, field_name, json_object);
342+
}
343+
296344
/// Returns the number of added fields that have been added to the document
297345
#[getter]
298346
fn num_fields(&self) -> usize {
@@ -337,6 +385,16 @@ impl Document {
337385
.map(|value| value_to_py(py, value))
338386
.collect::<PyResult<Vec<_>>>()
339387
}
388+
389+
fn __getitem__(&self, field_name: &str) -> PyResult<Vec<PyObject>> {
390+
let gil = Python::acquire_gil();
391+
let py = gil.python();
392+
self.get_all(py, field_name)
393+
}
394+
395+
fn __repr__(&self) -> PyResult<String> {
396+
Ok(format!("{:?}", self))
397+
}
340398
}
341399

342400
impl Document {
@@ -350,19 +408,3 @@ impl Document {
350408
.flat_map(|values| values.iter())
351409
}
352410
}
353-
354-
#[pyproto]
355-
impl PyMappingProtocol for Document {
356-
fn __getitem__(&self, field_name: &str) -> PyResult<Vec<PyObject>> {
357-
let gil = Python::acquire_gil();
358-
let py = gil.python();
359-
self.get_all(py, field_name)
360-
}
361-
}
362-
363-
#[pyproto]
364-
impl PyObjectProtocol for Document {
365-
fn __repr__(&self) -> PyResult<String> {
366-
Ok(format!("{:?}", self))
367-
}
368-
}

src/facet.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use pyo3::{basic::PyObjectProtocol, prelude::*, types::PyType};
1+
use pyo3::{prelude::*, types::PyType};
22
use tantivy::schema;
33

44
/// A Facet represent a point in a given hierarchy.
@@ -63,10 +63,7 @@ impl Facet {
6363
fn to_path_str(&self) -> String {
6464
self.inner.to_string()
6565
}
66-
}
6766

68-
#[pyproto]
69-
impl PyObjectProtocol for Facet {
7067
fn __repr__(&self) -> PyResult<String> {
7168
Ok(format!("Facet({})", self.to_path_str()))
7269
}

src/index.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ impl IndexWriter {
4141
pub fn add_document(&mut self, doc: &Document) -> PyResult<u64> {
4242
let named_doc = NamedFieldDocument(doc.field_values.clone());
4343
let doc = self.schema.convert_named_doc(named_doc).map_err(to_pyerr)?;
44-
Ok(self.inner_index_writer.add_document(doc))
44+
self.inner_index_writer.add_document(doc).map_err(to_pyerr)
4545
}
4646

4747
/// Helper for the `add_document` method, but passing a json string.
@@ -55,7 +55,7 @@ impl IndexWriter {
5555
pub fn add_json(&mut self, json: &str) -> PyResult<u64> {
5656
let doc = self.schema.parse_document(json).map_err(to_pyerr)?;
5757
let opstamp = self.inner_index_writer.add_document(doc);
58-
Ok(opstamp)
58+
opstamp.map_err(to_pyerr)
5959
}
6060

6161
/// Commits all of the pending changes
@@ -134,6 +134,12 @@ impl IndexWriter {
134134
field_name
135135
)))
136136
}
137+
Value::JsonObject(_) => {
138+
return Err(exceptions::PyValueError::new_err(format!(
139+
"Field `{}` is json object type not deletable.",
140+
field_name
141+
)))
142+
}
137143
};
138144
Ok(self.inner_index_writer.delete_term(term))
139145
}
@@ -281,7 +287,7 @@ impl Index {
281287
#[staticmethod]
282288
fn exists(path: &str) -> PyResult<bool> {
283289
let directory = MmapDirectory::open(path).map_err(to_pyerr)?;
284-
Ok(tv::Index::exists(&directory).unwrap())
290+
tv::Index::exists(&directory).map_err(to_pyerr)
285291
}
286292

287293
/// The schema of the current index.
@@ -304,7 +310,7 @@ impl Index {
304310
///
305311
/// Args:
306312
/// query: the query, following the tantivy query language.
307-
/// default_fields (List[Field]): A list of fields used to search if no
313+
/// default_fields_names (List[Field]): A list of fields used to search if no
308314
/// field is specified in the query.
309315
///
310316
#[args(reload_policy = "RELOAD_POLICY")]

src/query.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use pyo3::{prelude::*, PyObjectProtocol};
1+
use pyo3::prelude::*;
22
use tantivy as tv;
33

44
/// Tantivy's Query
@@ -13,8 +13,8 @@ impl Query {
1313
}
1414
}
1515

16-
#[pyproto]
17-
impl PyObjectProtocol for Query {
16+
#[pymethods]
17+
impl Query {
1818
fn __repr__(&self) -> PyResult<String> {
1919
Ok(format!("Query({:?})", self.get()))
2020
}

src/schemabuilder.rs

Lines changed: 85 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -73,26 +73,11 @@ impl SchemaBuilder {
7373
index_option: &str,
7474
) -> PyResult<Self> {
7575
let builder = &mut self.builder;
76-
let index_option = match index_option {
77-
"position" => schema::IndexRecordOption::WithFreqsAndPositions,
78-
"freq" => schema::IndexRecordOption::WithFreqs,
79-
"basic" => schema::IndexRecordOption::Basic,
80-
_ => return Err(exceptions::PyValueError::new_err(
81-
"Invalid index option, valid choices are: 'basic', 'freq' and 'position'"
82-
))
83-
};
84-
85-
let indexing = schema::TextFieldIndexing::default()
86-
.set_tokenizer(tokenizer_name)
87-
.set_index_option(index_option);
88-
89-
let options =
90-
schema::TextOptions::default().set_indexing_options(indexing);
91-
let options = if stored {
92-
options.set_stored()
93-
} else {
94-
options
95-
};
76+
let options = SchemaBuilder::build_text_option(
77+
stored,
78+
tokenizer_name,
79+
index_option,
80+
)?;
9681

9782
if let Some(builder) = builder.write().unwrap().as_mut() {
9883
builder.add_text_field(name, options);
@@ -230,6 +215,55 @@ impl SchemaBuilder {
230215
Ok(self.clone())
231216
}
232217

218+
/// Add a new json field to the schema.
219+
///
220+
/// Args:
221+
/// name (str): the name of the field.
222+
/// stored (bool, optional): If true sets the field as stored, the
223+
/// content of the field can be later restored from a Searcher.
224+
/// Defaults to False.
225+
/// tokenizer_name (str, optional): The name of the tokenizer that
226+
/// should be used to process the field. Defaults to 'default'
227+
/// index_option (str, optional): Sets which information should be
228+
/// indexed with the tokens. Can be one of 'position', 'freq' or
229+
/// 'basic'. Defaults to 'position'. The 'basic' index_option
230+
/// records only the document ID, the 'freq' option records the
231+
/// document id and the term frequency, while the 'position' option
232+
/// records the document id, term frequency and the positions of
233+
/// the term occurrences in the document.
234+
///
235+
/// Returns the associated field handle.
236+
/// Raises a ValueError if there was an error with the field creation.
237+
#[args(
238+
stored = false,
239+
tokenizer_name = "TOKENIZER",
240+
index_option = "RECORD"
241+
)]
242+
fn add_json_field(
243+
&mut self,
244+
name: &str,
245+
stored: bool,
246+
tokenizer_name: &str,
247+
index_option: &str,
248+
) -> PyResult<Self> {
249+
let builder = &mut self.builder;
250+
let options = SchemaBuilder::build_text_option(
251+
stored,
252+
tokenizer_name,
253+
index_option,
254+
)?;
255+
256+
if let Some(builder) = builder.write().unwrap().as_mut() {
257+
builder.add_json_field(name, options);
258+
} else {
259+
return Err(exceptions::PyValueError::new_err(
260+
"Schema builder object isn't valid anymore.",
261+
));
262+
}
263+
264+
Ok(self.clone())
265+
}
266+
233267
/// Add a Facet field to the schema.
234268
/// Args:
235269
/// name (str): The name of the field.
@@ -289,8 +323,8 @@ impl SchemaBuilder {
289323
stored: bool,
290324
indexed: bool,
291325
fast: Option<&str>,
292-
) -> PyResult<schema::IntOptions> {
293-
let opts = schema::IntOptions::default();
326+
) -> PyResult<schema::NumericOptions> {
327+
let opts = schema::NumericOptions::default();
294328

295329
let opts = if stored { opts.set_stored() } else { opts };
296330
let opts = if indexed { opts.set_indexed() } else { opts };
@@ -317,4 +351,33 @@ impl SchemaBuilder {
317351

318352
Ok(opts)
319353
}
354+
355+
fn build_text_option(
356+
stored: bool,
357+
tokenizer_name: &str,
358+
index_option: &str,
359+
) -> PyResult<schema::TextOptions> {
360+
let index_option = match index_option {
361+
"position" => schema::IndexRecordOption::WithFreqsAndPositions,
362+
"freq" => schema::IndexRecordOption::WithFreqs,
363+
"basic" => schema::IndexRecordOption::Basic,
364+
_ => return Err(exceptions::PyValueError::new_err(
365+
"Invalid index option, valid choices are: 'basic', 'freq' and 'position'"
366+
))
367+
};
368+
369+
let indexing = schema::TextFieldIndexing::default()
370+
.set_tokenizer(tokenizer_name)
371+
.set_index_option(index_option);
372+
373+
let options =
374+
schema::TextOptions::default().set_indexing_options(indexing);
375+
let options = if stored {
376+
options.set_stored()
377+
} else {
378+
options
379+
};
380+
381+
Ok(options)
382+
}
320383
}

0 commit comments

Comments
 (0)