Skip to content

Commit 34bffe2

Browse files
authored
Merge pull request #12 from dtolnay/fast
Write u128 using only two divisions
2 parents 23d280d + 3e47651 commit 34bffe2

File tree

2 files changed

+86
-26
lines changed

2 files changed

+86
-26
lines changed

src/lib.rs

Lines changed: 74 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,7 @@ const MAX_LEN: usize = 40; // i128::MIN (including minus sign)
4040
// Adaptation of the original implementation at
4141
// https://github.com/rust-lang/rust/blob/b8214dc6c6fc20d0a660fb5700dca9ebf51ebe89/src/libcore/fmt/num.rs#L188-L266
4242
macro_rules! impl_Integer {
43-
($($t:ident),* as $conv_fn:ident) =>
44-
(impl_Integer!(
45-
$($t),* as $conv_fn,
46-
|n:$conv_fn| (n / 10000, (n % 10000) as isize)
47-
););
48-
49-
($($t:ident),* as $conv_fn:ident, $divmod_10000:expr) => ($(
43+
($($t:ident),* as $conv_fn:ident) => ($(
5044
impl Integer for $t {
5145
fn write<W: io::Write>(self, mut wr: W) -> io::Result<usize> {
5246
let mut buf = unsafe { mem::uninitialized() };
@@ -74,13 +68,11 @@ macro_rules! impl_Integer {
7468
// eagerly decode 4 characters at a time
7569
if <$t>::max_value() as u64 >= 10000 {
7670
while n >= 10000 {
77-
// division with remainder on u128 is badly optimized by LLVM.
78-
// see “udiv128.rs” for more info.
79-
let (q, r) = $divmod_10000(n);
80-
n = q;
71+
let rem = (n % 10000) as isize;
72+
n /= 10000;
8173

82-
let d1 = (r / 100) << 1;
83-
let d2 = (r % 100) << 1;
74+
let d1 = (rem / 100) << 1;
75+
let d2 = (rem % 100) << 1;
8476
curr -= 4;
8577
ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2);
8678
ptr::copy_nonoverlapping(lut_ptr.offset(d2), buf_ptr.offset(curr + 2), 2);
@@ -128,5 +120,73 @@ impl_Integer!(isize, usize as u16);
128120
impl_Integer!(isize, usize as u32);
129121
#[cfg(target_pointer_width = "64")]
130122
impl_Integer!(isize, usize as u64);
123+
124+
#[cfg(all(feature = "i128"))]
125+
macro_rules! impl_Integer128 {
126+
($($t:ident),*) => {$(
127+
impl Integer for $t {
128+
fn write<W: io::Write>(self, mut wr: W) -> io::Result<usize> {
129+
let mut buf = unsafe { mem::uninitialized() };
130+
let bytes = self.write_to(&mut buf);
131+
try!(wr.write_all(bytes));
132+
Ok(bytes.len())
133+
}
134+
}
135+
136+
impl IntegerPrivate for $t {
137+
#[allow(unused_comparisons)]
138+
fn write_to(self, buf: &mut [u8; MAX_LEN]) -> &[u8] {
139+
let is_nonnegative = self >= 0;
140+
let n = if is_nonnegative {
141+
self as u128
142+
} else {
143+
// convert the negative num to positive by summing 1 to it's 2 complement
144+
(!(self as u128)).wrapping_add(1)
145+
};
146+
let mut curr = buf.len() as isize;
147+
let buf_ptr = buf.as_mut_ptr();
148+
149+
unsafe {
150+
// Divide by 10^19 which is the highest power less than 2^64.
151+
let (n, rem) = udiv128::udivmod_1e19(n);
152+
curr -= rem.write_to(buf).len() as isize;
153+
154+
if n != 0 {
155+
// Memset the base10 leading zeros of rem.
156+
let target = buf.len() as isize - 19;
157+
ptr::write_bytes(buf_ptr.offset(target), b'0', (curr - target) as usize);
158+
curr = target;
159+
160+
// Divide by 10^19 again.
161+
let (n, rem) = udiv128::udivmod_1e19(n);
162+
let buf2 = buf_ptr.offset(curr - buf.len() as isize) as *mut _;
163+
curr -= rem.write_to(&mut *buf2).len() as isize;
164+
165+
if n != 0 {
166+
// Memset the leading zeros.
167+
let target = buf.len() as isize - 38;
168+
ptr::write_bytes(buf_ptr.offset(target), b'0', (curr - target) as usize);
169+
curr = target;
170+
171+
// There is at most one digit left
172+
// because u128::max / 10^19 / 10^19 is 3.
173+
curr -= 1;
174+
*buf_ptr.offset(curr) = (n as u8) + b'0';
175+
}
176+
}
177+
178+
if !is_nonnegative {
179+
curr -= 1;
180+
*buf_ptr.offset(curr) = b'-';
181+
}
182+
183+
let len = buf.len() - curr as usize;
184+
slice::from_raw_parts(buf_ptr.offset(curr), len)
185+
}
186+
}
187+
}
188+
)*};
189+
}
190+
131191
#[cfg(all(feature = "i128"))]
132-
impl_Integer!(i128, u128 as u128, udiv128::udivmod_10000);
192+
impl_Integer128!(i128, u128);

src/udiv128.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,25 +21,25 @@
2121
// (https://github.com/rust-lang/rust/issues/44545) and to allow function
2222
// inlining which doesn’t happen with the intrinsic.
2323

24-
pub fn udivmod_10000(n: u128) -> (u128, isize) {
24+
pub fn udivmod_1e19(n: u128) -> (u128, u64) {
25+
let d = 10_000_000_000_000_000_000_u64; // 10^19
26+
2527
let high = (n >> 64) as u64;
2628
if high == 0 {
2729
let low = n as u64;
28-
return ((low / 10000) as u128, (low % 10000) as isize);
30+
return ((low / d) as u128, low % d);
2931
}
3032

31-
let leading_zeros_10000 = 114;
32-
debug_assert_eq!(leading_zeros_10000, 10000u128.leading_zeros());
33-
let sr = 1 + leading_zeros_10000 - high.leading_zeros();
33+
let sr = 65 - high.leading_zeros();
3434

35-
// 52 <= sr <= 115
35+
// 2 <= sr <= 65
3636
let mut q: u128 = n << (128 - sr);
3737
let mut r: u128 = n >> sr;
3838
let mut carry: u64 = 0;
3939

4040
// Don't use a range because they may generate references to memcpy in unoptimized code
4141
//
42-
// Loop invariants: r < 10000; carry is 0 or 1
42+
// Loop invariants: r < d; carry is 0 or 1
4343
let mut i = 0;
4444
while i < sr {
4545
i += 1;
@@ -49,14 +49,14 @@ pub fn udivmod_10000(n: u128) -> (u128, isize) {
4949
q = (q << 1) | carry as u128;
5050

5151
// carry = 0
52-
// if r >= 10000 {
53-
// r -= 10000;
52+
// if r >= d {
53+
// r -= d;
5454
// carry = 1;
5555
// }
56-
let s = 10000u128.wrapping_sub(r).wrapping_sub(1) as i128 >> 127;
56+
let s = (d as u128).wrapping_sub(r).wrapping_sub(1) as i128 >> 127;
5757
carry = (s & 1) as u64;
58-
r -= 10000u128 & s as u128;
58+
r -= (d as u128) & s as u128;
5959
}
6060

61-
((q << 1) | carry as u128, r as isize)
61+
((q << 1) | carry as u128, r as u64)
6262
}

0 commit comments

Comments
 (0)