Skip to content

Run-time feature detection for AES-NI and TSC #312

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion coresimd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#![allow(unused_features)]
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
const_atomic_usize_new, stmt_expr_attributes, core_intrinsics,
integer_atomics, stmt_expr_attributes, core_intrinsics,
crate_in_paths)]
#![cfg_attr(test, feature(proc_macro, test, attr_literals))]
#![cfg_attr(feature = "cargo-clippy",
Expand Down
8 changes: 4 additions & 4 deletions coresimd/src/runtime/aarch64.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//! Run-time feature detection on ARM Aarch64.
use runtime::bit;
use runtime::cache;
use runtime::arch::HasFeature;

#[macro_export]
Expand Down Expand Up @@ -32,12 +32,12 @@ pub enum __Feature {
pmull,
}

pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
let mut value: usize = 0;
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
let mut value = cache::Initializer::default();
{
let mut enable_feature = |f| {
if x.has_feature(&f) {
value = bit::set(value, f as u32);
value.set(f as u32);
}
};
enable_feature(__Feature::asimd);
Expand Down
8 changes: 4 additions & 4 deletions coresimd/src/runtime/arm.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//! Run-time feature detection on ARM Aarch32.
use runtime::bit;
use runtime::cache;
use runtime::arch::HasFeature;

#[macro_export]
Expand Down Expand Up @@ -28,12 +28,12 @@ pub enum __Feature {
pmull,
}

pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
let mut value: usize = 0;
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
let mut value = cache::Initializer::default();
{
let mut enable_feature = |f| {
if x.has_feature(&f) {
value = bit::set(value, f as u32);
value.set(f as u32);
}
};
enable_feature(__Feature::neon);
Expand Down
10 changes: 3 additions & 7 deletions coresimd/src/runtime/bit.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
//! Bit manipulation utilities

/// Sets the `bit` of `x`.
pub const fn set(x: usize, bit: u32) -> usize {
x | 1 << bit
}
//! Bit manipulation utilities.

/// Tests the `bit` of `x`.
pub const fn test(x: usize, bit: u32) -> bool {
pub fn test(x: usize, bit: u32) -> bool {
debug_assert!(bit < 32, "bit index out-of-bounds");
x & (1 << bit) != 0
}
142 changes: 129 additions & 13 deletions coresimd/src/runtime/cache.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,147 @@
//! Cache of run-time feature detection
//! Caches run-time feature detection so that it only needs to be computed
//! once.

use core::sync::atomic::{AtomicUsize, Ordering};
use core::usize;
use core::sync::atomic::Ordering;

use super::bit;
#[cfg(target_pointer_width = "64")]
use core::sync::atomic::AtomicU64;

/// This global variable is a bitset used to cache the features supported by
/// the
/// CPU.
static CACHE: AtomicUsize = AtomicUsize::new(usize::MAX);
#[cfg(target_pointer_width = "32")]
use core::sync::atomic::AtomicU32;

/// Sets the `bit` of `x`.
pub const fn set_bit(x: u64, bit: u32) -> u64 {
x | 1 << bit
}

/// Tests the `bit` of `x`.
pub const fn test_bit(x: u64, bit: u32) -> bool {
x & (1 << bit) != 0
}

/// Maximum number of features that can be cached.
const CACHE_CAPACITY: u32 = 63;

/// This type is used to initialize the cache
pub struct Initializer(u64);

impl Default for Initializer {
fn default() -> Self {
Initializer(0)
}
}

impl Initializer {
/// Tests the `bit` of the cache.
pub fn test(&self, bit: u32) -> bool {
// FIXME: this way of making sure that the cache is large enough is
// brittle.
debug_assert!(
bit < CACHE_CAPACITY,
"too many features, time to increase the cache size!"
);
test_bit(self.0, bit)
}
/// Sets the `bit` of the cache.
pub fn set(&mut self, bit: u32) {
// FIXME: this way of making sure that the cache is large enough is
// brittle.
debug_assert!(
bit < CACHE_CAPACITY,
"too many features, time to increase the cache size!"
);
let v = self.0;
self.0 = set_bit(v, bit);
}
}

/// This global variable is a cache of the features supported by the CPU.
static CACHE: Cache = Cache::uninitialized();

/// Feature cache with capacity for `CACHE_CAPACITY` features.
///
/// Note: the last feature bit is used to represent an
/// uninitialized cache.
#[cfg(target_pointer_width = "64")]
struct Cache(AtomicU64);

#[cfg(target_pointer_width = "64")]
impl Cache {
/// Creates an uninitialized cache.
const fn uninitialized() -> Self {
Cache(AtomicU64::new(u64::max_value()))
}
/// Is the cache uninitialized?
pub fn is_uninitialized(&self) -> bool {
self.0.load(Ordering::Relaxed) == u64::max_value()
}

/// Is the `bit` in the cache set?
pub fn test(&self, bit: u32) -> bool {
test_bit(CACHE.0.load(Ordering::Relaxed), bit)
}

/// Initializes the cache.
pub fn initialize(&self, value: Initializer) {
self.0.store(value.0, Ordering::Relaxed);
}
}

/// Feature cache with capacity for `CACHE_CAPACITY` features.
///
/// Note: the last feature bit is used to represent an
/// uninitialized cache.
#[cfg(target_pointer_width = "32")]
struct Cache(AtomicU32, AtomicU32);

#[cfg(target_pointer_width = "32")]
impl Cache {
/// Creates an uninitialized cache.
const fn uninitialized() -> Self {
Cache(
AtomicU32::new(u32::max_value()),
AtomicU32::new(u32::max_value()),
)
}
/// Is the cache uninitialized?
pub fn is_uninitialized(&self) -> bool {
self.1.load(Ordering::Relaxed) == u32::max_value()
}

/// Is the `bit` in the cache set?
pub fn test(&self, bit: u32) -> bool {
if bit < 32 {
test_bit(CACHE.0.load(Ordering::Relaxed) as u64, bit)
} else {
test_bit(CACHE.1.load(Ordering::Relaxed) as u64, bit - 32)
}
}

/// Initializes the cache.
pub fn initialize(&self, value: Initializer) {
let lo: u32 = value.0 as u32;
let hi: u32 = (value.0 >> 32) as u32;
self.0.store(lo, Ordering::Relaxed);
self.1.store(hi, Ordering::Relaxed);
}
}

/// Test the `bit` of the storage. If the storage has not been initialized,
/// initializes it with the result of `f()`.
///
/// On its first invocation, it detects the CPU features and caches them in the
/// `FEATURES` global variable as an `AtomicUsize`.
/// `FEATURES` global variable as an `AtomicU64`.
///
/// It uses the `__Feature` variant to index into this variable as a bitset. If
/// the bit is set, the feature is enabled, and otherwise it is disabled.
///
/// PLEASE: do not use this, it is an implementation detail subject to change.
pub fn test<F>(bit: u32, f: F) -> bool
where
F: FnOnce() -> usize,
F: FnOnce() -> Initializer,
{
if CACHE.load(Ordering::Relaxed) == usize::MAX {
CACHE.store(f(), Ordering::Relaxed);
if CACHE.is_uninitialized() {
CACHE.initialize(f());
}
bit::test(CACHE.load(Ordering::Relaxed), bit)
CACHE.test(bit)
}
8 changes: 4 additions & 4 deletions coresimd/src/runtime/powerpc64.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//! Run-time feature detection on PowerPC64.
use runtime::bit;
use runtime::cache;
use runtime::arch::HasFeature;

#[macro_export]
Expand Down Expand Up @@ -33,12 +33,12 @@ pub enum __Feature {
power8,
}

pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
let mut value: usize = 0;
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
let mut value = cache::Initializer::default();
{
let mut enable_feature = |f| {
if x.has_feature(&f) {
value = bit::set(value, f as u32);
value.set(f as u32);
}
};
enable_feature(__Feature::altivec);
Expand Down
29 changes: 23 additions & 6 deletions coresimd/src/runtime/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

use core::mem;

use super::bit;
use super::{bit, cache};

/// This macro maps the string-literal feature names to values of the
/// `__Feature` enum at compile-time. The feature names used are the same as
Expand All @@ -29,6 +29,12 @@ use super::bit;
#[macro_export]
#[doc(hidden)]
macro_rules! __unstable_detect_feature {
("aes", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::aes{}) };
("tsc", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::tsc{}) };
("mmx", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::mmx{}) };
Expand Down Expand Up @@ -168,6 +174,10 @@ macro_rules! __unstable_detect_feature {
#[allow(non_camel_case_types)]
#[repr(u8)]
pub enum __Feature {
/// AES (Advanced Encryption Standard New Instructions AES-NI)
aes,
/// TSC (Time Stamp Counter)
tsc,
/// MMX
mmx,
/// SSE (Streaming SIMD Extensions)
Expand Down Expand Up @@ -232,7 +242,8 @@ pub enum __Feature {
xsaves,
/// XSAVEC (Save Processor Extended States Compacted)
xsavec,
#[doc(hidden)] __NonExhaustive,
#[doc(hidden)]
__NonExhaustive,
}

/// Run-time feature detection on x86 works by using the CPUID instruction.
Expand All @@ -250,10 +261,10 @@ pub enum __Feature {
/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
pub fn detect_features() -> usize {
pub fn detect_features() -> cache::Initializer {
use vendor::{__cpuid, __cpuid_count, has_cpuid, CpuidResult};
use vendor::_xgetbv;
let mut value: usize = 0;
let mut value = cache::Initializer::default();

// If the x86 CPU does not support the CPUID instruction then it is too
// old to support any of the currently-detectable features.
Expand Down Expand Up @@ -329,7 +340,7 @@ pub fn detect_features() -> usize {
// borrows value till the end of this scope:
let mut enable = |r, rb, f| {
if bit::test(r as usize, rb) {
value = bit::set(value, f as u32);
value.set(f as u32);
}
};

Expand All @@ -339,8 +350,10 @@ pub fn detect_features() -> usize {
enable(proc_info_ecx, 19, __Feature::sse4_1);
enable(proc_info_ecx, 20, __Feature::sse4_2);
enable(proc_info_ecx, 23, __Feature::popcnt);
enable(proc_info_edx, 24, __Feature::fxsr);
enable(proc_info_ecx, 25, __Feature::aes);
enable(proc_info_edx, 4, __Feature::tsc);
enable(proc_info_edx, 23, __Feature::mmx);
enable(proc_info_edx, 24, __Feature::fxsr);
enable(proc_info_edx, 25, __Feature::sse);
enable(proc_info_edx, 26, __Feature::sse2);

Expand Down Expand Up @@ -449,6 +462,8 @@ mod tests {

#[test]
fn dump() {
println!("aes: {:?}", cfg_feature_enabled!("aes"));
println!("tsc: {:?}", cfg_feature_enabled!("tsc"));
println!("sse: {:?}", cfg_feature_enabled!("sse"));
println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
Expand Down Expand Up @@ -488,6 +503,8 @@ mod tests {
#[test]
fn compare_with_cupid() {
let information = cupid::master().unwrap();
assert_eq!(cfg_feature_enabled!("aes"), information.aesni());
assert_eq!(cfg_feature_enabled!("tsc"), information.tsc());
assert_eq!(cfg_feature_enabled!("sse"), information.sse());
assert_eq!(cfg_feature_enabled!("sse2"), information.sse2());
assert_eq!(cfg_feature_enabled!("sse3"), information.sse3());
Expand Down
Loading