Merge pull request #1 from saukymo/bump_version

saukymo · web-flow · commit 749f2fb17f28 · 2022-04-22T17:49:42.000+08:00
Bump version
diff --git a/src/document.rs b/src/document.rs
@@ -330,6 +330,17 @@ impl Document {
         add_value(self, field_name, bytes);
     }
 
+    /// Add a bytes value to the document.
+    ///
+    /// Args:
+    ///     field_name (str): The field for which we are adding the bytes.
+    ///     value (str): The json object that will be added to the document.
+    fn add_json(&mut self, field_name: String, json: &str) {
+        let json_object: serde_json::Value =
+            serde_json::from_str(json).unwrap();
+        add_value(self, field_name, json_object);
+    }
+
     /// Returns the number of added fields that have been added to the document
     #[getter]
     fn num_fields(&self) -> usize {
diff --git a/src/schemabuilder.rs b/src/schemabuilder.rs
@@ -73,26 +73,11 @@ impl SchemaBuilder {
         index_option: &str,
     ) -> PyResult<Self> {
         let builder = &mut self.builder;
-        let index_option = match index_option {
-            "position" => schema::IndexRecordOption::WithFreqsAndPositions,
-            "freq" => schema::IndexRecordOption::WithFreqs,
-            "basic" => schema::IndexRecordOption::Basic,
-            _ => return Err(exceptions::PyValueError::new_err(
-                "Invalid index option, valid choices are: 'basic', 'freq' and 'position'"
-            ))
-        };
-
-        let indexing = schema::TextFieldIndexing::default()
-            .set_tokenizer(tokenizer_name)
-            .set_index_option(index_option);
-
-        let options =
-            schema::TextOptions::default().set_indexing_options(indexing);
-        let options = if stored {
-            options.set_stored()
-        } else {
-            options
-        };
+        let options = SchemaBuilder::build_text_option(
+            stored,
+            tokenizer_name,
+            index_option,
+        )?;
 
         if let Some(builder) = builder.write().unwrap().as_mut() {
             builder.add_text_field(name, options);
@@ -230,6 +215,55 @@ impl SchemaBuilder {
         Ok(self.clone())
     }
 
+    /// Add a new json field to the schema.
+    ///
+    /// Args:
+    ///     name (str): the name of the field.
+    ///     stored (bool, optional): If true sets the field as stored, the
+    ///         content of the field can be later restored from a Searcher.
+    ///         Defaults to False.
+    ///     tokenizer_name (str, optional): The name of the tokenizer that
+    ///         should be used to process the field. Defaults to 'default'
+    ///     index_option (str, optional): Sets which information should be
+    ///         indexed with the tokens. Can be one of 'position', 'freq' or
+    ///         'basic'. Defaults to 'position'. The 'basic' index_option
+    ///         records only the document ID, the 'freq' option records the
+    ///         document id and the term frequency, while the 'position' option
+    ///         records the document id, term frequency and the positions of
+    ///         the term occurrences in the document.
+    ///
+    /// Returns the associated field handle.
+    /// Raises a ValueError if there was an error with the field creation.
+    #[args(
+        stored = false,
+        tokenizer_name = "TOKENIZER",
+        index_option = "RECORD"
+    )]
+    fn add_json_field(
+        &mut self,
+        name: &str,
+        stored: bool,
+        tokenizer_name: &str,
+        index_option: &str,
+    ) -> PyResult<Self> {
+        let builder = &mut self.builder;
+        let options = SchemaBuilder::build_text_option(
+            stored,
+            tokenizer_name,
+            index_option,
+        )?;
+
+        if let Some(builder) = builder.write().unwrap().as_mut() {
+            builder.add_json_field(name, options);
+        } else {
+            return Err(exceptions::PyValueError::new_err(
+                "Schema builder object isn't valid anymore.",
+            ));
+        }
+
+        Ok(self.clone())
+    }
+
     /// Add a Facet field to the schema.
     /// Args:
     ///     name (str): The name of the field.
@@ -317,4 +351,33 @@ impl SchemaBuilder {
 
         Ok(opts)
     }
+
+    fn build_text_option(
+        stored: bool,
+        tokenizer_name: &str,
+        index_option: &str,
+    ) -> PyResult<schema::TextOptions> {
+        let index_option = match index_option {
+            "position" => schema::IndexRecordOption::WithFreqsAndPositions,
+            "freq" => schema::IndexRecordOption::WithFreqs,
+            "basic" => schema::IndexRecordOption::Basic,
+            _ => return Err(exceptions::PyValueError::new_err(
+                "Invalid index option, valid choices are: 'basic', 'freq' and 'position'"
+            ))
+        };
+
+        let indexing = schema::TextFieldIndexing::default()
+            .set_tokenizer(tokenizer_name)
+            .set_index_option(index_option);
+
+        let options =
+            schema::TextOptions::default().set_indexing_options(indexing);
+        let options = if stored {
+            options.set_stored()
+        } else {
+            options
+        };
+
+        Ok(options)
+    }
 }
diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py
@@ -5,7 +5,13 @@
 
 
 def schema():
-    return SchemaBuilder().add_text_field("title", stored=True).add_text_field("body").build()
+    return (
+        SchemaBuilder()
+        .add_text_field("title", stored=True)
+        .add_text_field("body")
+        .build()
+    )
+
 
 def create_index(dir=None):
     # assume all tests will use the same documents for now
@@ -99,7 +105,9 @@ def test_simple_search_in_ram(self, ram_index):
 
     def test_and_query(self, ram_index):
         index = ram_index
-        query = index.parse_query("title:men AND body:summer", default_field_names=["title", "body"])
+        query = index.parse_query(
+            "title:men AND body:summer", default_field_names=["title", "body"]
+        )
         # look for an intersection of documents
         searcher = index.searcher()
         result = searcher.search(query, 10)
@@ -119,7 +127,8 @@ def test_and_query_parser_default_fields(self, ram_index):
     def test_and_query_parser_default_fields_undefined(self, ram_index):
         query = ram_index.parse_query("winter")
         assert (
-            repr(query) == """Query(BooleanQuery { subqueries: [(Should, TermQuery(Term(type=Str, field=0, "winter"))), (Should, TermQuery(Term(type=Str, field=1, "winter")))] })"""
+            repr(query)
+            == """Query(BooleanQuery { subqueries: [(Should, TermQuery(Term(type=Str, field=0, "winter"))), (Should, TermQuery(Term(type=Str, field=1, "winter")))] })"""
         )
 
     def test_query_errors(self, ram_index):
@@ -129,9 +138,11 @@ def test_query_errors(self, ram_index):
             index.parse_query("bod:men", ["title", "body"])
 
     def test_order_by_search(self):
-        schema = (SchemaBuilder()
+        schema = (
+            SchemaBuilder()
             .add_unsigned_field("order", fast="single")
-            .add_text_field("title", stored=True).build()
+            .add_text_field("title", stored=True)
+            .build()
         )
 
         index = Index(schema)
@@ -152,15 +163,13 @@ def test_order_by_search(self):
         doc.add_unsigned("order", 1)
         doc.add_text("title", "Another test title")
 
-
         writer.add_document(doc)
 
         writer.commit()
         index.reload()
 
         query = index.parse_query("test")
 
-
         searcher = index.searcher()
 
         result = searcher.search(query, 10, offset=2, order_by_field="order")
@@ -184,9 +193,11 @@ def test_order_by_search(self):
         assert searched_doc["title"] == ["Test title"]
 
     def test_order_by_search_without_fast_field(self):
-        schema = (SchemaBuilder()
+        schema = (
+            SchemaBuilder()
             .add_unsigned_field("order")
-            .add_text_field("title", stored=True).build()
+            .add_text_field("title", stored=True)
+            .build()
         )
 
         index = Index(schema)
@@ -319,3 +330,66 @@ def test_document_with_facet(self):
     def test_document_error(self):
         with pytest.raises(ValueError):
             tantivy.Document(name={})
+
+
+class TestJsonField:
+    def test_query_from_json_field(self):
+        schema = (
+            SchemaBuilder()
+            .add_json_field(
+                "attributes",
+                stored=True,
+                tokenizer_name="default",
+                index_option="position",
+            )
+            .build()
+        )
+
+        index = Index(schema)
+
+        writer = index.writer()
+
+        doc = Document()
+        doc.add_json(
+            "attributes",
+            """{
+                "target": "submit-button",
+                "cart": {"product_id": 103},
+                "description": "the best vacuum cleaner ever"
+            }""",
+        )
+
+        writer.add_document(doc)
+
+        doc = Document()
+        doc.add_json(
+            "attributes",
+            """{
+                "target": "submit-button",
+                "cart": {"product_id": 133},
+                "description": "das keyboard"
+            }""",
+        )
+
+        writer.add_document(doc)
+
+        writer.commit()
+        index.reload()
+
+        query = index.parse_query("target:submit-button", ["attributes"])
+        result = index.searcher().search(query, 2)
+        assert len(result.hits) == 2
+
+        query = index.parse_query("target:submit", ["attributes"])
+        result = index.searcher().search(query, 2)
+        assert len(result.hits) == 2
+
+        # query = index.parse_query_for_attributes("cart.product_id:103")
+        # result = index.searcher().search(query, 1)
+        # assert len(result.hits) == 1
+
+        # query = index.parse_query_for_attributes(
+        #     "target:submit-button AND cart.product_id:133"
+        # )
+        # result = index.searcher().search(query, 2)
+        # assert len(result.hits) == 1