Skip to content

deps: remove thread_local dependency #749

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 12, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -51,7 +51,10 @@ use_std = ["std"]
# Enables all performance features.
perf = ["perf-cache", "perf-dfa", "perf-inline", "perf-literal"]
# Enables fast caching. (If disabled, caching is still used, but is slower.)
perf-cache = ["thread_local"]
# Currently, this feature has no effect. It used to remove the thread_local
# dependency and use a slower internal cache, but now the default cache has
# been improved and thread_local is no longer a dependency at all.
perf-cache = []
# Enables use of a lazy DFA when possible.
perf-dfa = []
# Enables aggressive use of inlining.
@@ -110,11 +113,6 @@ optional = true
version = "2.2.1"
optional = true

# For managing regex caches quickly across multiple threads.
[dependencies.thread_local]
version = "1"
optional = true

# For parsing regular expressions.
[dependencies.regex-syntax]
path = "regex-syntax"
@@ -125,9 +123,9 @@ default-features = false
# For examples.
lazy_static = "1"
# For property based tests.
quickcheck = { version = "0.8", default-features = false }
quickcheck = { version = "1.0.3", default-features = false }
# For generating random test data.
rand = "0.6.5"
rand = { version = "0.8.3", default-features = false, features = ["getrandom", "small_rng"] }
# To check README's example
# TODO: Re-enable this once the MSRV is 1.43 or greater.
# See: https://github.com/rust-lang/regex/issues/684
6 changes: 3 additions & 3 deletions bench/src/rust_compile.rs
Original file line number Diff line number Diff line change
@@ -48,20 +48,20 @@ fn compile_small_full(b: &mut Bencher) {
#[bench]
fn compile_huge(b: &mut Bencher) {
b.iter(|| {
let re = Parser::new().parse(r"\p{L}{100}").unwrap();
let re = Parser::new().parse(r"\p{L}{50}").unwrap();
Compiler::new().size_limit(1 << 30).compile(&[re]).unwrap()
});
}

#[bench]
fn compile_huge_bytes(b: &mut Bencher) {
b.iter(|| {
let re = Parser::new().parse(r"\p{L}{100}").unwrap();
let re = Parser::new().parse(r"\p{L}{50}").unwrap();
Compiler::new().size_limit(1 << 30).bytes(true).compile(&[re]).unwrap()
});
}

#[bench]
fn compile_huge_full(b: &mut Bencher) {
b.iter(|| regex!(r"\p{L}{100}"));
b.iter(|| regex!(r"\p{L}{50}"));
}
4 changes: 2 additions & 2 deletions src/backtrack.rs
Original file line number Diff line number Diff line change
@@ -115,8 +115,8 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
// Then we reset all existing allocated space to 0.
// Finally, we request more space if we need it.
//
// This is all a little circuitous, but doing this unsafely
// doesn't seem to have a measurable impact on performance.
// This is all a little circuitous, but doing this using unchecked
// operations doesn't seem to have a measurable impact on performance.
// (Probably because backtracking is limited to such small
// inputs/regexes in the first place.)
let visited_len =
100 changes: 0 additions & 100 deletions src/cache.rs

This file was deleted.

20 changes: 15 additions & 5 deletions src/dfa.rs
Original file line number Diff line number Diff line change
@@ -848,7 +848,7 @@ impl<'a> Fsm<'a> {
/// next_si transitions to the next state, where the transition input
/// corresponds to text[i].
///
/// This elides bounds checks, and is therefore unsafe.
/// This elides bounds checks, and is therefore not safe.
#[cfg_attr(feature = "perf-inline", inline(always))]
unsafe fn next_si(&self, si: StatePtr, text: &[u8], i: usize) -> StatePtr {
// What is the argument for safety here?
@@ -1688,7 +1688,7 @@ impl Transitions {
self.num_byte_classes * mem::size_of::<StatePtr>()
}

/// Like `next`, but uses unchecked access and is therefore unsafe.
/// Like `next`, but uses unchecked access and is therefore not safe.
unsafe fn next_unchecked(&self, si: StatePtr, cls: usize) -> StatePtr {
debug_assert!((si as usize) < self.table.len());
debug_assert!(cls < self.num_byte_classes);
@@ -1895,12 +1895,22 @@ mod tests {
push_inst_ptr, read_vari32, read_varu32, write_vari32, write_varu32,
State, StateFlags,
};
use quickcheck::{quickcheck, QuickCheck, StdGen};
use quickcheck::{quickcheck, Gen, QuickCheck};
use std::sync::Arc;

#[test]
fn prop_state_encode_decode() {
fn p(ips: Vec<u32>, flags: u8) -> bool {
fn p(mut ips: Vec<u32>, flags: u8) -> bool {
// It looks like our encoding scheme can't handle instruction
// pointers at or above 2**31. We should fix that, but it seems
// unlikely to occur in real code due to the amount of memory
// required for such a state machine. So for now, we just clamp
// our test data.
for ip in &mut ips {
if *ip >= 1 << 31 {
*ip = (1 << 31) - 1;
}
}
let mut data = vec![flags];
let mut prev = 0;
for &ip in ips.iter() {
@@ -1914,7 +1924,7 @@ mod tests {
expected == got && state.flags() == StateFlags(flags)
}
QuickCheck::new()
.gen(StdGen::new(self::rand::thread_rng(), 10_000))
.gen(Gen::new(10_000))
.quickcheck(p as fn(Vec<u32>, u8) -> bool);
}

34 changes: 24 additions & 10 deletions src/exec.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::cell::RefCell;
use std::collections::HashMap;
use std::panic::AssertUnwindSafe;
use std::sync::Arc;

#[cfg(feature = "perf-literal")]
@@ -9,14 +10,14 @@ use syntax::hir::Hir;
use syntax::ParserBuilder;

use backtrack;
use cache::{Cached, CachedGuard};
use compile::Compiler;
#[cfg(feature = "perf-dfa")]
use dfa;
use error::Error;
use input::{ByteInput, CharInput};
use literal::LiteralSearcher;
use pikevm;
use pool::{Pool, PoolGuard};
use prog::Program;
use re_builder::RegexOptions;
use re_bytes;
@@ -34,8 +35,8 @@ use utf8::next_utf8;
pub struct Exec {
/// All read only state.
ro: Arc<ExecReadOnly>,
/// Caches for the various matching engines.
cache: Cached<ProgramCache>,
/// A pool of reusable values for the various matching engines.
pool: Pool<ProgramCache>,
}

/// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This
@@ -46,7 +47,7 @@ pub struct ExecNoSync<'c> {
/// All read only state.
ro: &'c Arc<ExecReadOnly>,
/// Caches for the various matching engines.
cache: CachedGuard<'c, ProgramCache>,
cache: PoolGuard<'c, ProgramCache>,
}

/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8].
@@ -302,7 +303,8 @@ impl ExecBuilder {
ac: None,
match_type: MatchType::Nothing,
});
return Ok(Exec { ro: ro, cache: Cached::new() });
let pool = ExecReadOnly::new_pool(&ro);
return Ok(Exec { ro: ro, pool });
}
let parsed = self.parse()?;
let mut nfa = Compiler::new()
@@ -342,7 +344,8 @@ impl ExecBuilder {
ro.match_type = ro.choose_match_type(self.match_type);

let ro = Arc::new(ro);
Ok(Exec { ro: ro, cache: Cached::new() })
let pool = ExecReadOnly::new_pool(&ro);
Ok(Exec { ro, pool })
}

#[cfg(feature = "perf-literal")]
@@ -1254,10 +1257,9 @@ impl Exec {
/// Get a searcher that isn't Sync.
#[cfg_attr(feature = "perf-inline", inline(always))]
pub fn searcher(&self) -> ExecNoSync {
let create = || RefCell::new(ProgramCacheInner::new(&self.ro));
ExecNoSync {
ro: &self.ro, // a clone is too expensive here! (and not needed)
cache: self.cache.get_or(create),
cache: self.pool.get(),
}
}

@@ -1309,7 +1311,8 @@ impl Exec {

impl Clone for Exec {
fn clone(&self) -> Exec {
Exec { ro: self.ro.clone(), cache: Cached::new() }
let pool = ExecReadOnly::new_pool(&self.ro);
Exec { ro: self.ro.clone(), pool }
}
}

@@ -1442,6 +1445,13 @@ impl ExecReadOnly {
let lcs_len = self.suffixes.lcs().char_len();
lcs_len >= 3 && lcs_len > self.dfa.prefixes.lcp().char_len()
}

fn new_pool(ro: &Arc<ExecReadOnly>) -> Pool<ProgramCache> {
let ro = ro.clone();
Pool::new(Box::new(move || {
AssertUnwindSafe(RefCell::new(ProgramCacheInner::new(&ro)))
}))
}
}

#[derive(Clone, Copy, Debug)]
@@ -1500,7 +1510,11 @@ enum MatchNfaType {

/// `ProgramCache` maintains reusable allocations for each matching engine
/// available to a particular program.
pub type ProgramCache = RefCell<ProgramCacheInner>;
///
/// We declare this as unwind safe since it's a cache that's only used for
/// performance purposes. If a panic occurs, it is (or should be) always safe
/// to continue using the same regex object.
pub type ProgramCache = AssertUnwindSafe<RefCell<ProgramCacheInner>>;

#[derive(Debug)]
pub struct ProgramCacheInner {
3 changes: 2 additions & 1 deletion src/expand.rs
Original file line number Diff line number Diff line change
@@ -144,7 +144,8 @@ fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
}
// We just verified that the range 0..cap_end is valid ASCII, so it must
// therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
// check with either unsafe or by parsing the number straight from &[u8].
// check via an unchecked conversion or by parsing the number straight from
// &[u8].
let cap =
str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name");
Some(CaptureRef {
14 changes: 6 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -523,11 +523,6 @@ All features below are enabled by default.
Enables all performance related features. This feature is enabled by default
and will always cover all features that improve performance, even if more
are added in the future.
* **perf-cache** -
Enables the use of very fast thread safe caching for internal match state.
When this is disabled, caching is still used, but with a slower and simpler
implementation. Disabling this drops the `thread_local` and `lazy_static`
dependencies.
* **perf-dfa** -
Enables the use of a lazy DFA for matching. The lazy DFA is used to compile
portions of a regex to a very fast DFA on an as-needed basis. This can
@@ -542,6 +537,11 @@ All features below are enabled by default.
Enables the use of literal optimizations for speeding up matches. In some
cases, literal optimizations can result in speedups of _several_ orders of
magnitude. Disabling this drops the `aho-corasick` and `memchr` dependencies.
* **perf-cache** -
This feature used to enable a faster internal cache at the cost of using
additional dependencies, but this is no longer an option. A fast internal
cache is now used unconditionally with no additional dependencies. This may
change in the future.

### Unicode features

@@ -631,8 +631,6 @@ extern crate memchr;
#[cfg_attr(feature = "perf-literal", macro_use)]
extern crate quickcheck;
extern crate regex_syntax as syntax;
#[cfg(feature = "perf-cache")]
extern crate thread_local;

// #[cfg(doctest)]
// doc_comment::doctest!("../README.md");
@@ -749,7 +747,6 @@ pub mod bytes {
}

mod backtrack;
mod cache;
mod compile;
#[cfg(feature = "perf-dfa")]
mod dfa;
@@ -764,6 +761,7 @@ mod literal;
#[cfg(feature = "pattern")]
mod pattern;
mod pikevm;
mod pool;
mod prog;
mod re_builder;
mod re_bytes;
Loading