Skip to content

Commit 749f2fb

Browse files
authored
Merge pull request #1 from saukymo/bump_version
Bump version
2 parents 85956a0 + efdeda5 commit 749f2fb

File tree

3 files changed

+177
-29
lines changed

3 files changed

+177
-29
lines changed

src/document.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,17 @@ impl Document {
330330
add_value(self, field_name, bytes);
331331
}
332332

333+
/// Add a bytes value to the document.
334+
///
335+
/// Args:
336+
/// field_name (str): The field for which we are adding the bytes.
337+
/// value (str): The json object that will be added to the document.
338+
fn add_json(&mut self, field_name: String, json: &str) {
339+
let json_object: serde_json::Value =
340+
serde_json::from_str(json).unwrap();
341+
add_value(self, field_name, json_object);
342+
}
343+
333344
/// Returns the number of added fields that have been added to the document
334345
#[getter]
335346
fn num_fields(&self) -> usize {

src/schemabuilder.rs

Lines changed: 83 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -73,26 +73,11 @@ impl SchemaBuilder {
7373
index_option: &str,
7474
) -> PyResult<Self> {
7575
let builder = &mut self.builder;
76-
let index_option = match index_option {
77-
"position" => schema::IndexRecordOption::WithFreqsAndPositions,
78-
"freq" => schema::IndexRecordOption::WithFreqs,
79-
"basic" => schema::IndexRecordOption::Basic,
80-
_ => return Err(exceptions::PyValueError::new_err(
81-
"Invalid index option, valid choices are: 'basic', 'freq' and 'position'"
82-
))
83-
};
84-
85-
let indexing = schema::TextFieldIndexing::default()
86-
.set_tokenizer(tokenizer_name)
87-
.set_index_option(index_option);
88-
89-
let options =
90-
schema::TextOptions::default().set_indexing_options(indexing);
91-
let options = if stored {
92-
options.set_stored()
93-
} else {
94-
options
95-
};
76+
let options = SchemaBuilder::build_text_option(
77+
stored,
78+
tokenizer_name,
79+
index_option,
80+
)?;
9681

9782
if let Some(builder) = builder.write().unwrap().as_mut() {
9883
builder.add_text_field(name, options);
@@ -230,6 +215,55 @@ impl SchemaBuilder {
230215
Ok(self.clone())
231216
}
232217

218+
/// Add a new json field to the schema.
219+
///
220+
/// Args:
221+
/// name (str): the name of the field.
222+
/// stored (bool, optional): If true sets the field as stored, the
223+
/// content of the field can be later restored from a Searcher.
224+
/// Defaults to False.
225+
/// tokenizer_name (str, optional): The name of the tokenizer that
226+
/// should be used to process the field. Defaults to 'default'
227+
/// index_option (str, optional): Sets which information should be
228+
/// indexed with the tokens. Can be one of 'position', 'freq' or
229+
/// 'basic'. Defaults to 'position'. The 'basic' index_option
230+
/// records only the document ID, the 'freq' option records the
231+
/// document id and the term frequency, while the 'position' option
232+
/// records the document id, term frequency and the positions of
233+
/// the term occurrences in the document.
234+
///
235+
/// Returns the associated field handle.
236+
/// Raises a ValueError if there was an error with the field creation.
237+
#[args(
238+
stored = false,
239+
tokenizer_name = "TOKENIZER",
240+
index_option = "RECORD"
241+
)]
242+
fn add_json_field(
243+
&mut self,
244+
name: &str,
245+
stored: bool,
246+
tokenizer_name: &str,
247+
index_option: &str,
248+
) -> PyResult<Self> {
249+
let builder = &mut self.builder;
250+
let options = SchemaBuilder::build_text_option(
251+
stored,
252+
tokenizer_name,
253+
index_option,
254+
)?;
255+
256+
if let Some(builder) = builder.write().unwrap().as_mut() {
257+
builder.add_json_field(name, options);
258+
} else {
259+
return Err(exceptions::PyValueError::new_err(
260+
"Schema builder object isn't valid anymore.",
261+
));
262+
}
263+
264+
Ok(self.clone())
265+
}
266+
233267
/// Add a Facet field to the schema.
234268
/// Args:
235269
/// name (str): The name of the field.
@@ -317,4 +351,33 @@ impl SchemaBuilder {
317351

318352
Ok(opts)
319353
}
354+
355+
fn build_text_option(
356+
stored: bool,
357+
tokenizer_name: &str,
358+
index_option: &str,
359+
) -> PyResult<schema::TextOptions> {
360+
let index_option = match index_option {
361+
"position" => schema::IndexRecordOption::WithFreqsAndPositions,
362+
"freq" => schema::IndexRecordOption::WithFreqs,
363+
"basic" => schema::IndexRecordOption::Basic,
364+
_ => return Err(exceptions::PyValueError::new_err(
365+
"Invalid index option, valid choices are: 'basic', 'freq' and 'position'"
366+
))
367+
};
368+
369+
let indexing = schema::TextFieldIndexing::default()
370+
.set_tokenizer(tokenizer_name)
371+
.set_index_option(index_option);
372+
373+
let options =
374+
schema::TextOptions::default().set_indexing_options(indexing);
375+
let options = if stored {
376+
options.set_stored()
377+
} else {
378+
options
379+
};
380+
381+
Ok(options)
382+
}
320383
}

tests/tantivy_test.py

Lines changed: 83 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,13 @@
55

66

77
def schema():
8-
return SchemaBuilder().add_text_field("title", stored=True).add_text_field("body").build()
8+
return (
9+
SchemaBuilder()
10+
.add_text_field("title", stored=True)
11+
.add_text_field("body")
12+
.build()
13+
)
14+
915

1016
def create_index(dir=None):
1117
# assume all tests will use the same documents for now
@@ -99,7 +105,9 @@ def test_simple_search_in_ram(self, ram_index):
99105

100106
def test_and_query(self, ram_index):
101107
index = ram_index
102-
query = index.parse_query("title:men AND body:summer", default_field_names=["title", "body"])
108+
query = index.parse_query(
109+
"title:men AND body:summer", default_field_names=["title", "body"]
110+
)
103111
# look for an intersection of documents
104112
searcher = index.searcher()
105113
result = searcher.search(query, 10)
@@ -119,7 +127,8 @@ def test_and_query_parser_default_fields(self, ram_index):
119127
def test_and_query_parser_default_fields_undefined(self, ram_index):
120128
query = ram_index.parse_query("winter")
121129
assert (
122-
repr(query) == """Query(BooleanQuery { subqueries: [(Should, TermQuery(Term(type=Str, field=0, "winter"))), (Should, TermQuery(Term(type=Str, field=1, "winter")))] })"""
130+
repr(query)
131+
== """Query(BooleanQuery { subqueries: [(Should, TermQuery(Term(type=Str, field=0, "winter"))), (Should, TermQuery(Term(type=Str, field=1, "winter")))] })"""
123132
)
124133

125134
def test_query_errors(self, ram_index):
@@ -129,9 +138,11 @@ def test_query_errors(self, ram_index):
129138
index.parse_query("bod:men", ["title", "body"])
130139

131140
def test_order_by_search(self):
132-
schema = (SchemaBuilder()
141+
schema = (
142+
SchemaBuilder()
133143
.add_unsigned_field("order", fast="single")
134-
.add_text_field("title", stored=True).build()
144+
.add_text_field("title", stored=True)
145+
.build()
135146
)
136147

137148
index = Index(schema)
@@ -152,15 +163,13 @@ def test_order_by_search(self):
152163
doc.add_unsigned("order", 1)
153164
doc.add_text("title", "Another test title")
154165

155-
156166
writer.add_document(doc)
157167

158168
writer.commit()
159169
index.reload()
160170

161171
query = index.parse_query("test")
162172

163-
164173
searcher = index.searcher()
165174

166175
result = searcher.search(query, 10, offset=2, order_by_field="order")
@@ -184,9 +193,11 @@ def test_order_by_search(self):
184193
assert searched_doc["title"] == ["Test title"]
185194

186195
def test_order_by_search_without_fast_field(self):
187-
schema = (SchemaBuilder()
196+
schema = (
197+
SchemaBuilder()
188198
.add_unsigned_field("order")
189-
.add_text_field("title", stored=True).build()
199+
.add_text_field("title", stored=True)
200+
.build()
190201
)
191202

192203
index = Index(schema)
@@ -319,3 +330,66 @@ def test_document_with_facet(self):
319330
def test_document_error(self):
320331
with pytest.raises(ValueError):
321332
tantivy.Document(name={})
333+
334+
335+
class TestJsonField:
336+
def test_query_from_json_field(self):
337+
schema = (
338+
SchemaBuilder()
339+
.add_json_field(
340+
"attributes",
341+
stored=True,
342+
tokenizer_name="default",
343+
index_option="position",
344+
)
345+
.build()
346+
)
347+
348+
index = Index(schema)
349+
350+
writer = index.writer()
351+
352+
doc = Document()
353+
doc.add_json(
354+
"attributes",
355+
"""{
356+
"target": "submit-button",
357+
"cart": {"product_id": 103},
358+
"description": "the best vacuum cleaner ever"
359+
}""",
360+
)
361+
362+
writer.add_document(doc)
363+
364+
doc = Document()
365+
doc.add_json(
366+
"attributes",
367+
"""{
368+
"target": "submit-button",
369+
"cart": {"product_id": 133},
370+
"description": "das keyboard"
371+
}""",
372+
)
373+
374+
writer.add_document(doc)
375+
376+
writer.commit()
377+
index.reload()
378+
379+
query = index.parse_query("target:submit-button", ["attributes"])
380+
result = index.searcher().search(query, 2)
381+
assert len(result.hits) == 2
382+
383+
query = index.parse_query("target:submit", ["attributes"])
384+
result = index.searcher().search(query, 2)
385+
assert len(result.hits) == 2
386+
387+
# query = index.parse_query_for_attributes("cart.product_id:103")
388+
# result = index.searcher().search(query, 1)
389+
# assert len(result.hits) == 1
390+
391+
# query = index.parse_query_for_attributes(
392+
# "target:submit-button AND cart.product_id:133"
393+
# )
394+
# result = index.searcher().search(query, 2)
395+
# assert len(result.hits) == 1

0 commit comments

Comments
 (0)