Skip to content

Commit 32b1d10

Browse files
committed
SIMD-optimized is_ascii based on rust-lang#30740
1 parent 3908913 commit 32b1d10

File tree

1 file changed

+50
-2
lines changed

1 file changed

+50
-2
lines changed

src/libstd/ascii.rs

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ impl AsciiExt for str {
164164

165165
#[inline]
166166
fn is_ascii(&self) -> bool {
167-
self.bytes().all(|b| b.is_ascii())
167+
self.as_bytes().is_ascii()
168168
}
169169

170170
#[inline]
@@ -204,7 +204,55 @@ impl AsciiExt for [u8] {
204204
type Owned = Vec<u8>;
205205
#[inline]
206206
fn is_ascii(&self) -> bool {
207-
self.iter().all(|b| b.is_ascii())
207+
// NOTE: This is largely copied from core::str::run_utf8_validation.
208+
209+
// use truncation to fit u64 into usize
210+
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
211+
212+
/// Return `true` if any byte in the word `x` is nonascii (>= 128).
213+
#[inline]
214+
fn contains_nonascii(x: usize) -> bool {
215+
(x & NONASCII_MASK) != 0
216+
}
217+
218+
let mut offset = 0;
219+
let len = self.len();
220+
while offset < len {
221+
if self[offset] >= 128 {
222+
return false;
223+
}
224+
// When the pointer is aligned, read 2 words of data per iteration
225+
// until we find a word containing a non-ascii byte.
226+
let usize_bytes = mem::size_of::<usize>();
227+
let bytes_per_iteration = 2 * usize_bytes;
228+
let ptr = self.as_ptr();
229+
let align = (ptr as usize + offset) & (usize_bytes - 1);
230+
if align == 0 {
231+
if len >= bytes_per_iteration {
232+
while offset <= len - bytes_per_iteration {
233+
unsafe {
234+
let u = *(ptr.offset(offset as isize) as *const usize);
235+
let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize);
236+
237+
// return if there is a nonascii byte
238+
let zu = contains_nonascii(u);
239+
let zv = contains_nonascii(v);
240+
if zu || zv {
241+
return false;
242+
}
243+
}
244+
offset += bytes_per_iteration;
245+
}
246+
}
247+
// step from the point where the wordwise loop stopped
248+
while offset < len && self[offset] < 128 {
249+
offset += 1;
250+
}
251+
} else {
252+
offset += 1;
253+
}
254+
}
255+
true
208256
}
209257

210258
#[inline]

0 commit comments

Comments
 (0)