Skip to content

Commit 7812ea0

Browse files
rawlersdroege
authored andcommitted
interp: RollingBuffer for samples, to enable continous array access
Save samples with shadow-buffering to enable continous fixed-length view into the buffer. For any offset, there will be a correct continous view of the entire circular buffer. This turns the inner loop of filter application from N*4 + M*4, into a predictable 12*4 operation. This avoids some branching, and gives the LLVM optimizer better information to work with. (For example, allowing 512-bit operations)
1 parent d092b36 commit 7812ea0

File tree

1 file changed

+64
-18
lines changed

1 file changed

+64
-18
lines changed

src/interp.rs

Lines changed: 64 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,70 @@ use std::f64::consts::PI;
2525
const ALMOST_ZERO: f64 = 0.000001;
2626
const TAPS: usize = 48;
2727

28+
// Workaround for missing const-generics
29+
trait ArrayBuf<Item>: std::borrow::BorrowMut<[Item]> {
30+
const SIZE: usize;
31+
}
32+
33+
impl<Item> ArrayBuf<Item> for [Item; 24] {
34+
const SIZE: usize = 24;
35+
}
36+
37+
impl<Item> ArrayBuf<Item> for [Item; 12] {
38+
const SIZE: usize = 12;
39+
}
40+
41+
/// A circular buffer offering fixed-length continous views into data
42+
/// This is enabled by writing data twice, also to a "shadow"-buffer following the primary buffer,
43+
/// The tradeoff is writing all data twice, the gain is giving the compiler continuous view with
44+
/// predictable length into the data, unlocking some more optimizations
45+
#[derive(Clone, Debug)]
46+
struct RollingBuffer<A, T> {
47+
buf: [T; TAPS],
48+
position: usize,
49+
_phantom: std::marker::PhantomData<A>,
50+
}
51+
52+
impl<A: ArrayBuf<T>, T: Default + Copy> RollingBuffer<A, T> {
53+
fn new() -> Self {
54+
assert!(A::SIZE * 2 <= TAPS);
55+
56+
let buf: [T; TAPS] = [Default::default(); TAPS];
57+
58+
Self {
59+
buf,
60+
position: A::SIZE,
61+
_phantom: Default::default(),
62+
}
63+
}
64+
65+
#[inline(always)]
66+
fn push_front(&mut self, v: T) {
67+
if self.position == 0 {
68+
self.position = A::SIZE - 1;
69+
} else {
70+
self.position -= 1;
71+
}
72+
unsafe {
73+
*self.buf.get_unchecked_mut(self.position) = v;
74+
*self.buf.get_unchecked_mut(self.position + A::SIZE) = v;
75+
}
76+
}
77+
}
78+
79+
impl<A, T> AsRef<A> for RollingBuffer<A, T> {
80+
#[inline(always)]
81+
fn as_ref(&self) -> &A {
82+
unsafe { &*(self.buf.get_unchecked(self.position) as *const T as *const A) }
83+
}
84+
}
85+
2886
macro_rules! interp_impl {
2987
( $name:ident, $factor:expr ) => {
3088
#[derive(Debug, Clone)]
3189
pub struct $name<F: FrameAccumulator> {
3290
filter: [[f32; $factor]; (TAPS / $factor)],
33-
buffer: [F; (TAPS / $factor)],
34-
buffer_pos: usize,
91+
buffer: RollingBuffer<[F; TAPS / $factor], F>,
3592
}
3693

3794
impl<F> Default for $name<F>
@@ -73,40 +130,29 @@ macro_rules! interp_impl {
73130

74131
Self {
75132
filter,
76-
buffer: Default::default(),
77-
buffer_pos: (TAPS / $factor) - 1,
133+
buffer: RollingBuffer::new(),
78134
}
79135
}
80136

81137
pub fn interpolate(&mut self, frame: F) -> [F; $factor] {
82138
// Write in Frames in reverse, to enable forward-scanning with filter
83-
self.buffer_pos = (self.buffer_pos + self.buffer.len() - 1) % self.buffer.len();
84-
self.buffer[self.buffer_pos] = frame;
139+
self.buffer.push_front(frame);
85140

86141
let mut output: [F; $factor] = Default::default();
87142

88-
let mut filterp = 0;
143+
let buf = self.buffer.as_ref();
89144

90-
for input_frame in &self.buffer[self.buffer_pos..] {
91-
let filter_coeffs = &self.filter[filterp];
92-
for (output_frame, coeff) in Iterator::zip(output.iter_mut(), filter_coeffs) {
93-
output_frame.scale_add(input_frame, *coeff);
94-
}
95-
filterp += 1;
96-
}
97-
for input_frame in &self.buffer[..self.buffer_pos] {
98-
let filter_coeffs = &self.filter[filterp];
145+
for (filter_coeffs, input_frame) in Iterator::zip(self.filter.iter(), buf) {
99146
for (output_frame, coeff) in Iterator::zip(output.iter_mut(), filter_coeffs) {
100147
output_frame.scale_add(input_frame, *coeff);
101148
}
102-
filterp += 1;
103149
}
104150

105151
output
106152
}
107153

108154
pub fn reset(&mut self) {
109-
self.buffer = Default::default();
155+
self.buffer = RollingBuffer::new();
110156
}
111157
}
112158
};

0 commit comments

Comments
 (0)