Skip to content

Commit 5bede68

Browse files
authored
Merge pull request #22 from mtak-/bloom-filteer
Bloom filter fallback
2 parents 3199de9 + 0411984 commit 5bede68

File tree

7 files changed

+285
-160
lines changed

7 files changed

+285
-160
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ stats = []
3131
[dependencies]
3232
cfg-if = "0.1.9"
3333
crossbeam-utils = "0.6.5"
34+
fxhash = "0.2.1"
3435
lazy_static = "1.3.0"
3536
lock_api = "0.2.0"
3637
parking_lot = "0.8.0"

benches/single_threaded_scaling.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ mod single_threaded_scaling {
1111
use swym::{tcell::TCell, thread_key, tx::Ordering};
1212
use test::Bencher;
1313

14-
/// this demonstrates issues with the writelog
14+
/// This should reveal performance cliffs and regressions in the write log.
1515
macro_rules! write_count {
1616
($name:ident, $lock_name:ident, $atomic_name:ident, $amount:expr) => {
1717
#[bench]
@@ -75,16 +75,17 @@ mod single_threaded_scaling {
7575
write_008, lock_write_008, atomic_write_008, 8;
7676
write_016, lock_write_016, atomic_write_016, 16;
7777
write_032, lock_write_032, atomic_write_032, 32;
78-
write_064, lock_write_064, atomic_write_064, 64;
78+
write_063, lock_write_063, atomic_write_063, 63;
7979

8080
// start to hit bloom filter failure here
81+
write_064, lock_write_064, atomic_write_064, 64;
8182
write_065, lock_write_065, atomic_write_065, 65;
8283
write_066, lock_write_066, atomic_write_066, 66;
8384
write_067, lock_write_067, atomic_write_067, 67;
8485
write_068, lock_write_068, atomic_write_068, 68;
8586

8687
write_128, lock_write_128, atomic_write_128, 128;
87-
88-
write_256, lock_write_256, atomic_write_256, 256
88+
write_256, lock_write_256, atomic_write_256, 256;
89+
write_512, lock_write_512, atomic_write_512, 512
8990
}
9091
}

src/internal.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@ pub mod alloc;
77
#[macro_use]
88
pub mod phoenix_tls;
99

10-
pub mod commit;
10+
pub mod bloom;
11+
mod commit;
12+
mod gc;
13+
mod parking;
14+
1115
pub mod epoch;
12-
pub mod gc;
13-
pub mod parking;
1416
pub mod read_log;
1517
pub mod tcell_erased;
1618
pub mod thread;

src/internal/bloom.rs

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
//! A simple 64bit bloom filter that falls back to an actual HashMap.
2+
//!
3+
//!
4+
//!
5+
//! Potentially relevant paper: http://www.eecg.toronto.edu/~steffan/papers/jeffrey_spaa11.pdf
6+
7+
use core::{
8+
cell::{Cell, UnsafeCell},
9+
marker::PhantomData,
10+
mem,
11+
num::NonZeroUsize,
12+
};
13+
use fxhash::FxHashMap;
14+
use std::collections::hash_map::Entry;
15+
16+
#[derive(Copy, Clone, PartialEq, Eq)]
17+
pub enum Contained {
18+
No,
19+
Maybe,
20+
}
21+
22+
type Filter = usize;
23+
24+
const OVERFLOWED: Filter = !0;
25+
26+
#[derive(Debug)]
27+
pub struct Bloom<'tcell, K> {
28+
filter: Cell<Filter>,
29+
overflow: UnsafeCell<FxHashMap<*const K, usize>>,
30+
phantom: PhantomData<&'tcell K>,
31+
}
32+
33+
impl<'tcell, K> Bloom<'tcell, K> {
34+
#[inline]
35+
pub fn new() -> Self {
36+
Bloom {
37+
filter: Cell::new(0),
38+
overflow: Default::default(),
39+
phantom: PhantomData,
40+
}
41+
}
42+
43+
fn overflow(&self) -> &mut FxHashMap<*const K, usize> {
44+
unsafe { &mut *self.overflow.get() }
45+
}
46+
47+
#[inline]
48+
fn has_overflowed(&self) -> bool {
49+
self.filter.get() == OVERFLOWED
50+
}
51+
52+
#[inline]
53+
pub fn clear(&mut self) {
54+
let filter = *self.filter.get_mut();
55+
if filter == OVERFLOWED {
56+
self.overflow().clear()
57+
}
58+
*self.filter.get_mut() = 0;
59+
debug_assert!(
60+
self.overflow().is_empty(),
61+
"`clear` failed to empty the container"
62+
);
63+
debug_assert!(self.is_empty(), "`clear` failed to empty the container");
64+
debug_assert!(
65+
!self.has_overflowed(),
66+
"`clear` failed to reset to `Inline` storage"
67+
);
68+
}
69+
70+
#[inline]
71+
pub fn is_empty(&self) -> bool {
72+
self.filter.get() == 0
73+
}
74+
75+
#[inline]
76+
pub fn to_overflow(&self, offsets: impl Iterator<Item = (&'tcell K, usize)>) {
77+
if self.filter.get() != OVERFLOWED {
78+
self.run_overflow(offsets)
79+
}
80+
}
81+
82+
#[inline(never)]
83+
#[cold]
84+
fn run_overflow(&self, offsets: impl Iterator<Item = (&'tcell K, usize)>) {
85+
self.filter.set(OVERFLOWED);
86+
let overflow = self.overflow();
87+
overflow.extend(offsets.map(|(k, v)| (k as *const K, v)));
88+
}
89+
90+
#[inline]
91+
pub fn contained(&self, key: &K) -> Contained {
92+
let bit = bloom_bit(key);
93+
94+
if unlikely!(self.filter.get() & bit.0.get() != 0) {
95+
Contained::Maybe
96+
} else {
97+
Contained::No
98+
}
99+
}
100+
101+
// If this returns Maybe, then there's no guarantee the value was inserted. At that time,
102+
// overflowing is required.
103+
#[inline]
104+
pub fn insert_inline(&self, key: &'tcell K) -> Contained {
105+
let filter = self.filter.get();
106+
let bit = bloom_bit(key);
107+
108+
if unlikely!(filter & bit.0.get() != 0) {
109+
Contained::Maybe
110+
} else {
111+
let new_filter = filter | bit.0.get();
112+
if new_filter != OVERFLOWED {
113+
self.filter.set(new_filter);
114+
Contained::No
115+
} else {
116+
Contained::Maybe
117+
}
118+
}
119+
}
120+
121+
#[inline]
122+
pub fn overflow_get(&self, key: &K) -> Option<usize> {
123+
debug_assert!(self.has_overflowed());
124+
self.overflow().get(&(key as _)).cloned()
125+
}
126+
127+
#[inline]
128+
pub fn overflow_entry(&mut self, key: &K) -> Entry<'_, *const K, usize> {
129+
debug_assert!(self.has_overflowed());
130+
self.overflow().entry(key as _)
131+
}
132+
}
133+
134+
#[inline]
135+
const fn calc_shift<T>() -> usize {
136+
(mem::align_of::<T>() > 1) as usize
137+
+ (mem::align_of::<T>() > 2) as usize
138+
+ (mem::align_of::<T>() > 4) as usize
139+
+ (mem::align_of::<T>() > 8) as usize
140+
+ 1 // In practice this +1 results in less failures, however it's not "correct". Any TCell with a
141+
// meaningful value happens to have a minimum size of mem::size_of::<usize>() * 2 which might
142+
// explain why the +1 is helpful for certain workloads.
143+
}
144+
145+
#[inline]
146+
fn bloom_bit<T>(value: *const T) -> BloomBit {
147+
let shift = calc_shift::<T>();
148+
let raw_hash: usize = value as usize >> shift;
149+
let result = 1 << (raw_hash & (mem::size_of::<NonZeroUsize>() * 8 - 1));
150+
debug_assert!(result > 0, "bloom_hash should not return 0");
151+
let hash = unsafe { NonZeroUsize::new_unchecked(result) };
152+
BloomBit(hash)
153+
}
154+
155+
#[derive(Copy, Clone, PartialEq, Eq)]
156+
struct BloomBit(NonZeroUsize);

0 commit comments

Comments
 (0)