Skip to content

Commit 3219743

Browse files
authored
Support fast text fields (#180)
1 parent 8e589c9 commit 3219743

File tree

1 file changed

+35
-0
lines changed

1 file changed

+35
-0
lines changed

src/schemabuilder.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ pub(crate) struct SchemaBuilder {
2929
pub(crate) builder: Arc<RwLock<Option<schema::SchemaBuilder>>>,
3030
}
3131

32+
const NO_TOKENIZER_NAME: &str = "raw";
3233
const TOKENIZER: &str = "default";
3334
const RECORD: &str = "position";
3435

@@ -53,6 +54,14 @@ impl SchemaBuilder {
5354
/// stored (bool, optional): If true sets the field as stored, the
5455
/// content of the field can be later restored from a Searcher.
5556
/// Defaults to False.
57+
/// fast (bool, optional): Set the text options as a fast field. A
58+
/// fast field is a column-oriented fashion storage for tantivy.
59+
/// Text fast fields will have the term ids stored in the fast
60+
/// field. The fast field will be a multivalued fast field.
61+
/// It is recommended to use the "raw" tokenizer, since it will
62+
/// store the original text unchanged. The "default" tokenizer will
63+
/// store the terms as lower case and this will be reflected in the
64+
/// dictionary.
5665
/// tokenizer_name (str, optional): The name of the tokenizer that
5766
/// should be used to process the field. Defaults to 'default'
5867
/// index_option (str, optional): Sets which information should be
@@ -68,19 +77,22 @@ impl SchemaBuilder {
6877
#[pyo3(signature = (
6978
name,
7079
stored = false,
80+
fast = false,
7181
tokenizer_name = TOKENIZER,
7282
index_option = RECORD
7383
))]
7484
fn add_text_field(
7585
&mut self,
7686
name: &str,
7787
stored: bool,
88+
fast: bool,
7889
tokenizer_name: &str,
7990
index_option: &str,
8091
) -> PyResult<Self> {
8192
let builder = &mut self.builder;
8293
let options = SchemaBuilder::build_text_option(
8394
stored,
95+
fast,
8496
tokenizer_name,
8597
index_option,
8698
)?;
@@ -296,6 +308,14 @@ impl SchemaBuilder {
296308
/// stored (bool, optional): If true sets the field as stored, the
297309
/// content of the field can be later restored from a Searcher.
298310
/// Defaults to False.
311+
/// fast (bool, optional): Set the text options as a fast field. A
312+
/// fast field is a column-oriented fashion storage for tantivy.
313+
/// Text fast fields will have the term ids stored in the fast
314+
/// field. The fast field will be a multivalued fast field.
315+
/// It is recommended to use the "raw" tokenizer, since it will
316+
/// store the original text unchanged. The "default" tokenizer will
317+
/// store the terms as lower case and this will be reflected in the
318+
/// dictionary.
299319
/// tokenizer_name (str, optional): The name of the tokenizer that
300320
/// should be used to process the field. Defaults to 'default'
301321
/// index_option (str, optional): Sets which information should be
@@ -311,19 +331,22 @@ impl SchemaBuilder {
311331
#[pyo3(signature = (
312332
name,
313333
stored = false,
334+
fast = false,
314335
tokenizer_name = TOKENIZER,
315336
index_option = RECORD
316337
))]
317338
fn add_json_field(
318339
&mut self,
319340
name: &str,
320341
stored: bool,
342+
fast: bool,
321343
tokenizer_name: &str,
322344
index_option: &str,
323345
) -> PyResult<Self> {
324346
let builder = &mut self.builder;
325347
let options = SchemaBuilder::build_text_option(
326348
stored,
349+
fast,
327350
tokenizer_name,
328351
index_option,
329352
)?;
@@ -482,6 +505,7 @@ impl SchemaBuilder {
482505

483506
fn build_text_option(
484507
stored: bool,
508+
fast: bool,
485509
tokenizer_name: &str,
486510
index_option: &str,
487511
) -> PyResult<schema::TextOptions> {
@@ -506,6 +530,17 @@ impl SchemaBuilder {
506530
options
507531
};
508532

533+
let options = if fast {
534+
let text_tokenizer = if tokenizer_name != NO_TOKENIZER_NAME {
535+
Some(tokenizer_name)
536+
} else {
537+
None
538+
};
539+
options.set_fast(text_tokenizer)
540+
} else {
541+
options
542+
};
543+
509544
Ok(options)
510545
}
511546
}

0 commit comments

Comments
 (0)