@@ -29,6 +29,7 @@ pub(crate) struct SchemaBuilder {
29
29
pub ( crate ) builder : Arc < RwLock < Option < schema:: SchemaBuilder > > > ,
30
30
}
31
31
32
+ const NO_TOKENIZER_NAME : & str = "raw" ;
32
33
const TOKENIZER : & str = "default" ;
33
34
const RECORD : & str = "position" ;
34
35
@@ -53,6 +54,14 @@ impl SchemaBuilder {
53
54
/// stored (bool, optional): If true sets the field as stored, the
54
55
/// content of the field can be later restored from a Searcher.
55
56
/// Defaults to False.
57
+ /// fast (bool, optional): Set the text options as a fast field. A
58
+ /// fast field is a column-oriented fashion storage for tantivy.
59
+ /// Text fast fields will have the term ids stored in the fast
60
+ /// field. The fast field will be a multivalued fast field.
61
+ /// It is recommended to use the "raw" tokenizer, since it will
62
+ /// store the original text unchanged. The "default" tokenizer will
63
+ /// store the terms as lower case and this will be reflected in the
64
+ /// dictionary.
56
65
/// tokenizer_name (str, optional): The name of the tokenizer that
57
66
/// should be used to process the field. Defaults to 'default'
58
67
/// index_option (str, optional): Sets which information should be
@@ -68,19 +77,22 @@ impl SchemaBuilder {
68
77
#[ pyo3( signature = (
69
78
name,
70
79
stored = false ,
80
+ fast = false ,
71
81
tokenizer_name = TOKENIZER ,
72
82
index_option = RECORD
73
83
) ) ]
74
84
fn add_text_field (
75
85
& mut self ,
76
86
name : & str ,
77
87
stored : bool ,
88
+ fast : bool ,
78
89
tokenizer_name : & str ,
79
90
index_option : & str ,
80
91
) -> PyResult < Self > {
81
92
let builder = & mut self . builder ;
82
93
let options = SchemaBuilder :: build_text_option (
83
94
stored,
95
+ fast,
84
96
tokenizer_name,
85
97
index_option,
86
98
) ?;
@@ -296,6 +308,14 @@ impl SchemaBuilder {
296
308
/// stored (bool, optional): If true sets the field as stored, the
297
309
/// content of the field can be later restored from a Searcher.
298
310
/// Defaults to False.
311
+ /// fast (bool, optional): Set the text options as a fast field. A
312
+ /// fast field is a column-oriented fashion storage for tantivy.
313
+ /// Text fast fields will have the term ids stored in the fast
314
+ /// field. The fast field will be a multivalued fast field.
315
+ /// It is recommended to use the "raw" tokenizer, since it will
316
+ /// store the original text unchanged. The "default" tokenizer will
317
+ /// store the terms as lower case and this will be reflected in the
318
+ /// dictionary.
299
319
/// tokenizer_name (str, optional): The name of the tokenizer that
300
320
/// should be used to process the field. Defaults to 'default'
301
321
/// index_option (str, optional): Sets which information should be
@@ -311,19 +331,22 @@ impl SchemaBuilder {
311
331
#[ pyo3( signature = (
312
332
name,
313
333
stored = false ,
334
+ fast = false ,
314
335
tokenizer_name = TOKENIZER ,
315
336
index_option = RECORD
316
337
) ) ]
317
338
fn add_json_field (
318
339
& mut self ,
319
340
name : & str ,
320
341
stored : bool ,
342
+ fast : bool ,
321
343
tokenizer_name : & str ,
322
344
index_option : & str ,
323
345
) -> PyResult < Self > {
324
346
let builder = & mut self . builder ;
325
347
let options = SchemaBuilder :: build_text_option (
326
348
stored,
349
+ fast,
327
350
tokenizer_name,
328
351
index_option,
329
352
) ?;
@@ -482,6 +505,7 @@ impl SchemaBuilder {
482
505
483
506
fn build_text_option (
484
507
stored : bool ,
508
+ fast : bool ,
485
509
tokenizer_name : & str ,
486
510
index_option : & str ,
487
511
) -> PyResult < schema:: TextOptions > {
@@ -506,6 +530,17 @@ impl SchemaBuilder {
506
530
options
507
531
} ;
508
532
533
+ let options = if fast {
534
+ let text_tokenizer = if tokenizer_name != NO_TOKENIZER_NAME {
535
+ Some ( tokenizer_name)
536
+ } else {
537
+ None
538
+ } ;
539
+ options. set_fast ( text_tokenizer)
540
+ } else {
541
+ options
542
+ } ;
543
+
509
544
Ok ( options)
510
545
}
511
546
}
0 commit comments