diff --git a/coresimd/src/lib.rs b/coresimd/src/lib.rs
index a3c02fcaff..dfca161ccc 100644
--- a/coresimd/src/lib.rs
+++ b/coresimd/src/lib.rs
@@ -13,7 +13,7 @@
 #![allow(unused_features)]
 #![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
            simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
-           const_atomic_usize_new, stmt_expr_attributes, core_intrinsics,
+           integer_atomics, stmt_expr_attributes, core_intrinsics,
            crate_in_paths)]
 #![cfg_attr(test, feature(proc_macro, test, attr_literals))]
 #![cfg_attr(feature = "cargo-clippy",
diff --git a/coresimd/src/runtime/aarch64.rs b/coresimd/src/runtime/aarch64.rs
index fbbf856855..941a614171 100644
--- a/coresimd/src/runtime/aarch64.rs
+++ b/coresimd/src/runtime/aarch64.rs
@@ -1,5 +1,5 @@
 //! Run-time feature detection on ARM Aarch64.
-use runtime::bit;
+use runtime::cache;
 use runtime::arch::HasFeature;
 
 #[macro_export]
@@ -32,12 +32,12 @@ pub enum __Feature {
     pmull,
 }
 
-pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
-    let mut value: usize = 0;
+pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
+    let mut value = cache::Initializer::default();
     {
         let mut enable_feature = |f| {
             if x.has_feature(&f) {
-                value = bit::set(value, f as u32);
+                value.set(f as u32);
             }
         };
         enable_feature(__Feature::asimd);
diff --git a/coresimd/src/runtime/arm.rs b/coresimd/src/runtime/arm.rs
index 4c4dbb4cbc..a4e9e89e51 100644
--- a/coresimd/src/runtime/arm.rs
+++ b/coresimd/src/runtime/arm.rs
@@ -1,5 +1,5 @@
 //! Run-time feature detection on ARM Aarch32.
-use runtime::bit;
+use runtime::cache;
 use runtime::arch::HasFeature;
 
 #[macro_export]
@@ -28,12 +28,12 @@ pub enum __Feature {
     pmull,
 }
 
-pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
-    let mut value: usize = 0;
+pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
+    let mut value = cache::Initializer::default();
     {
         let mut enable_feature = |f| {
             if x.has_feature(&f) {
-                value = bit::set(value, f as u32);
+                value.set(f as u32);
             }
         };
         enable_feature(__Feature::neon);
diff --git a/coresimd/src/runtime/bit.rs b/coresimd/src/runtime/bit.rs
index 42483e5225..e7b0ab61b3 100644
--- a/coresimd/src/runtime/bit.rs
+++ b/coresimd/src/runtime/bit.rs
@@ -1,11 +1,7 @@
-//! Bit manipulation utilities
-
-/// Sets the `bit` of `x`.
-pub const fn set(x: usize, bit: u32) -> usize {
-    x | 1 << bit
-}
+//! Bit manipulation utilities.
 
 /// Tests the `bit` of `x`.
-pub const fn test(x: usize, bit: u32) -> bool {
+pub fn test(x: usize, bit: u32) -> bool {
+    debug_assert!(bit < 32, "bit index out-of-bounds");
     x & (1 << bit) != 0
 }
diff --git a/coresimd/src/runtime/cache.rs b/coresimd/src/runtime/cache.rs
index bb247fb531..ef4acf0a9c 100644
--- a/coresimd/src/runtime/cache.rs
+++ b/coresimd/src/runtime/cache.rs
@@ -1,20 +1,136 @@
-//! Cache of run-time feature detection
+//! Caches run-time feature detection so that it only needs to be computed
+//! once.
 
-use core::sync::atomic::{AtomicUsize, Ordering};
-use core::usize;
+use core::sync::atomic::Ordering;
 
-use super::bit;
+#[cfg(target_pointer_width = "64")]
+use core::sync::atomic::AtomicU64;
 
-/// This global variable is a bitset used to cache the features supported by
-/// the
-/// CPU.
-static CACHE: AtomicUsize = AtomicUsize::new(usize::MAX);
+#[cfg(target_pointer_width = "32")]
+use core::sync::atomic::AtomicU32;
+
+/// Sets the `bit` of `x`.
+pub const fn set_bit(x: u64, bit: u32) -> u64 {
+    x | 1 << bit
+}
+
+/// Tests the `bit` of `x`.
+pub const fn test_bit(x: u64, bit: u32) -> bool {
+    x & (1 << bit) != 0
+}
+
+/// Maximum number of features that can be cached.
+const CACHE_CAPACITY: u32 = 63;
+
+/// This type is used to initialize the cache
+pub struct Initializer(u64);
+
+impl Default for Initializer {
+    fn default() -> Self {
+        Initializer(0)
+    }
+}
+
+impl Initializer {
+    /// Tests the `bit` of the cache.
+    pub fn test(&self, bit: u32) -> bool {
+        // FIXME: this way of making sure that the cache is large enough is
+        // brittle.
+        debug_assert!(
+            bit < CACHE_CAPACITY,
+            "too many features, time to increase the cache size!"
+        );
+        test_bit(self.0, bit)
+    }
+    /// Sets the `bit` of the cache.
+    pub fn set(&mut self, bit: u32) {
+        // FIXME: this way of making sure that the cache is large enough is
+        // brittle.
+        debug_assert!(
+            bit < CACHE_CAPACITY,
+            "too many features, time to increase the cache size!"
+        );
+        let v = self.0;
+        self.0 = set_bit(v, bit);
+    }
+}
+
+/// This global variable is a cache of the features supported by the CPU.
+static CACHE: Cache = Cache::uninitialized();
+
+/// Feature cache with capacity for `CACHE_CAPACITY` features.
+///
+/// Note: the last feature bit is used to represent an
+/// uninitialized cache.
+#[cfg(target_pointer_width = "64")]
+struct Cache(AtomicU64);
+
+#[cfg(target_pointer_width = "64")]
+impl Cache {
+    /// Creates an uninitialized cache.
+    const fn uninitialized() -> Self {
+        Cache(AtomicU64::new(u64::max_value()))
+    }
+    /// Is the cache uninitialized?
+    pub fn is_uninitialized(&self) -> bool {
+        self.0.load(Ordering::Relaxed) == u64::max_value()
+    }
+
+    /// Is the `bit` in the cache set?
+    pub fn test(&self, bit: u32) -> bool {
+        test_bit(CACHE.0.load(Ordering::Relaxed), bit)
+    }
+
+    /// Initializes the cache.
+    pub fn initialize(&self, value: Initializer) {
+        self.0.store(value.0, Ordering::Relaxed);
+    }
+}
+
+/// Feature cache with capacity for `CACHE_CAPACITY` features.
+///
+/// Note: the last feature bit is used to represent an
+/// uninitialized cache.
+#[cfg(target_pointer_width = "32")]
+struct Cache(AtomicU32, AtomicU32);
+
+#[cfg(target_pointer_width = "32")]
+impl Cache {
+    /// Creates an uninitialized cache.
+    const fn uninitialized() -> Self {
+        Cache(
+            AtomicU32::new(u32::max_value()),
+            AtomicU32::new(u32::max_value()),
+        )
+    }
+    /// Is the cache uninitialized?
+    pub fn is_uninitialized(&self) -> bool {
+        self.1.load(Ordering::Relaxed) == u32::max_value()
+    }
+
+    /// Is the `bit` in the cache set?
+    pub fn test(&self, bit: u32) -> bool {
+        if bit < 32 {
+            test_bit(CACHE.0.load(Ordering::Relaxed) as u64, bit)
+        } else {
+            test_bit(CACHE.1.load(Ordering::Relaxed) as u64, bit - 32)
+        }
+    }
+
+    /// Initializes the cache.
+    pub fn initialize(&self, value: Initializer) {
+        let lo: u32 = value.0 as u32;
+        let hi: u32 = (value.0 >> 32) as u32;
+        self.0.store(lo, Ordering::Relaxed);
+        self.1.store(hi, Ordering::Relaxed);
+    }
+}
 
 /// Test the `bit` of the storage. If the storage has not been initialized,
 /// initializes it with the result of `f()`.
 ///
 /// On its first invocation, it detects the CPU features and caches them in the
-/// `FEATURES` global variable as an `AtomicUsize`.
+/// `FEATURES` global variable as an `AtomicU64`.
 ///
 /// It uses the `__Feature` variant to index into this variable as a bitset. If
 /// the bit is set, the feature is enabled, and otherwise it is disabled.
@@ -22,10 +138,10 @@ static CACHE: AtomicUsize = AtomicUsize::new(usize::MAX);
 /// PLEASE: do not use this, it is an implementation detail subject to change.
 pub fn test<F>(bit: u32, f: F) -> bool
 where
-    F: FnOnce() -> usize,
+    F: FnOnce() -> Initializer,
 {
-    if CACHE.load(Ordering::Relaxed) == usize::MAX {
-        CACHE.store(f(), Ordering::Relaxed);
+    if CACHE.is_uninitialized() {
+        CACHE.initialize(f());
     }
-    bit::test(CACHE.load(Ordering::Relaxed), bit)
+    CACHE.test(bit)
 }
diff --git a/coresimd/src/runtime/powerpc64.rs b/coresimd/src/runtime/powerpc64.rs
index 8ee02e4185..16ed59a717 100644
--- a/coresimd/src/runtime/powerpc64.rs
+++ b/coresimd/src/runtime/powerpc64.rs
@@ -1,5 +1,5 @@
 //! Run-time feature detection on PowerPC64.
-use runtime::bit;
+use runtime::cache;
 use runtime::arch::HasFeature;
 
 #[macro_export]
@@ -33,12 +33,12 @@ pub enum __Feature {
     power8,
 }
 
-pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
-    let mut value: usize = 0;
+pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
+    let mut value = cache::Initializer::default();
     {
         let mut enable_feature = |f| {
             if x.has_feature(&f) {
-                value = bit::set(value, f as u32);
+                value.set(f as u32);
             }
         };
         enable_feature(__Feature::altivec);
diff --git a/coresimd/src/runtime/x86.rs b/coresimd/src/runtime/x86.rs
index a994e61c42..9b97deef29 100644
--- a/coresimd/src/runtime/x86.rs
+++ b/coresimd/src/runtime/x86.rs
@@ -18,7 +18,7 @@
 
 use core::mem;
 
-use super::bit;
+use super::{bit, cache};
 
 /// This macro maps the string-literal feature names to values of the
 /// `__Feature` enum at compile-time. The feature names used are the same as
@@ -29,6 +29,12 @@ use super::bit;
 #[macro_export]
 #[doc(hidden)]
 macro_rules! __unstable_detect_feature {
+    ("aes", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::aes{})  };
+    ("tsc", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::tsc{})  };
     ("mmx", $unstable_detect_feature:path) => {
         $unstable_detect_feature(
             $crate::__vendor_runtime::__Feature::mmx{})  };
@@ -168,6 +174,10 @@ macro_rules! __unstable_detect_feature {
 #[allow(non_camel_case_types)]
 #[repr(u8)]
 pub enum __Feature {
+    /// AES (Advanced Encryption Standard New Instructions AES-NI)
+    aes,
+    /// TSC (Time Stamp Counter)
+    tsc,
     /// MMX
     mmx,
     /// SSE (Streaming SIMD Extensions)
@@ -232,7 +242,8 @@ pub enum __Feature {
     xsaves,
     /// XSAVEC (Save Processor Extended States Compacted)
     xsavec,
-    #[doc(hidden)] __NonExhaustive,
+    #[doc(hidden)]
+    __NonExhaustive,
 }
 
 /// Run-time feature detection on x86 works by using the CPUID instruction.
@@ -250,10 +261,10 @@ pub enum __Feature {
 /// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
 /// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
-pub fn detect_features() -> usize {
+pub fn detect_features() -> cache::Initializer {
     use vendor::{__cpuid, __cpuid_count, has_cpuid, CpuidResult};
     use vendor::_xgetbv;
-    let mut value: usize = 0;
+    let mut value = cache::Initializer::default();
 
     // If the x86 CPU does not support the CPUID instruction then it is too
     // old to support any of the currently-detectable features.
@@ -329,7 +340,7 @@ pub fn detect_features() -> usize {
         // borrows value till the end of this scope:
         let mut enable = |r, rb, f| {
             if bit::test(r as usize, rb) {
-                value = bit::set(value, f as u32);
+                value.set(f as u32);
             }
         };
 
@@ -339,8 +350,10 @@ pub fn detect_features() -> usize {
         enable(proc_info_ecx, 19, __Feature::sse4_1);
         enable(proc_info_ecx, 20, __Feature::sse4_2);
         enable(proc_info_ecx, 23, __Feature::popcnt);
-        enable(proc_info_edx, 24, __Feature::fxsr);
+        enable(proc_info_ecx, 25, __Feature::aes);
+        enable(proc_info_edx, 4, __Feature::tsc);
         enable(proc_info_edx, 23, __Feature::mmx);
+        enable(proc_info_edx, 24, __Feature::fxsr);
         enable(proc_info_edx, 25, __Feature::sse);
         enable(proc_info_edx, 26, __Feature::sse2);
 
@@ -449,6 +462,8 @@ mod tests {
 
     #[test]
     fn dump() {
+        println!("aes: {:?}", cfg_feature_enabled!("aes"));
+        println!("tsc: {:?}", cfg_feature_enabled!("tsc"));
         println!("sse: {:?}", cfg_feature_enabled!("sse"));
         println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
         println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
@@ -488,6 +503,8 @@ mod tests {
     #[test]
     fn compare_with_cupid() {
         let information = cupid::master().unwrap();
+        assert_eq!(cfg_feature_enabled!("aes"), information.aesni());
+        assert_eq!(cfg_feature_enabled!("tsc"), information.tsc());
         assert_eq!(cfg_feature_enabled!("sse"), information.sse());
         assert_eq!(cfg_feature_enabled!("sse2"), information.sse2());
         assert_eq!(cfg_feature_enabled!("sse3"), information.sse3());
diff --git a/coresimd/src/v128.rs b/coresimd/src/v128.rs
index cc5888f616..cf98340d83 100644
--- a/coresimd/src/v128.rs
+++ b/coresimd/src/v128.rs
@@ -42,86 +42,14 @@ define_impl! {
     x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
 }
 
-define_from!(
-    u64x2,
-    i64x2,
-    u32x4,
-    i32x4,
-    u16x8,
-    i16x8,
-    u8x16,
-    i8x16
-);
-define_from!(
-    i64x2,
-    u64x2,
-    u32x4,
-    i32x4,
-    u16x8,
-    i16x8,
-    u8x16,
-    i8x16
-);
-define_from!(
-    u32x4,
-    u64x2,
-    i64x2,
-    i32x4,
-    u16x8,
-    i16x8,
-    u8x16,
-    i8x16
-);
-define_from!(
-    i32x4,
-    u64x2,
-    i64x2,
-    u32x4,
-    u16x8,
-    i16x8,
-    u8x16,
-    i8x16
-);
-define_from!(
-    u16x8,
-    u64x2,
-    i64x2,
-    u32x4,
-    i32x4,
-    i16x8,
-    u8x16,
-    i8x16
-);
-define_from!(
-    i16x8,
-    u64x2,
-    i64x2,
-    u32x4,
-    i32x4,
-    u16x8,
-    u8x16,
-    i8x16
-);
-define_from!(
-    u8x16,
-    u64x2,
-    i64x2,
-    u32x4,
-    i32x4,
-    u16x8,
-    i16x8,
-    i8x16
-);
-define_from!(
-    i8x16,
-    u64x2,
-    i64x2,
-    u32x4,
-    i32x4,
-    u16x8,
-    i16x8,
-    u8x16
-);
+define_from!(u64x2, i64x2, u32x4, i32x4, u16x8, i16x8, u8x16, i8x16);
+define_from!(i64x2, u64x2, u32x4, i32x4, u16x8, i16x8, u8x16, i8x16);
+define_from!(u32x4, u64x2, i64x2, i32x4, u16x8, i16x8, u8x16, i8x16);
+define_from!(i32x4, u64x2, i64x2, u32x4, u16x8, i16x8, u8x16, i8x16);
+define_from!(u16x8, u64x2, i64x2, u32x4, i32x4, i16x8, u8x16, i8x16);
+define_from!(i16x8, u64x2, i64x2, u32x4, i32x4, u16x8, u8x16, i8x16);
+define_from!(u8x16, u64x2, i64x2, u32x4, i32x4, u16x8, i16x8, i8x16);
+define_from!(i8x16, u64x2, i64x2, u32x4, i32x4, u16x8, i16x8, u8x16);
 
 define_common_ops!(
     f64x2,
diff --git a/coresimd/src/v256.rs b/coresimd/src/v256.rs
index 1cbe7bdf26..1f02bad0f2 100644
--- a/coresimd/src/v256.rs
+++ b/coresimd/src/v256.rs
@@ -66,86 +66,14 @@ define_impl! {
     x24, x25, x26, x27, x28, x29, x30, x31
 }
 
-define_from!(
-    u64x4,
-    i64x4,
-    u32x8,
-    i32x8,
-    u16x16,
-    i16x16,
-    u8x32,
-    i8x32
-);
-define_from!(
-    i64x4,
-    u64x4,
-    u32x8,
-    i32x8,
-    u16x16,
-    i16x16,
-    u8x32,
-    i8x32
-);
-define_from!(
-    u32x8,
-    u64x4,
-    i64x4,
-    i32x8,
-    u16x16,
-    i16x16,
-    u8x32,
-    i8x32
-);
-define_from!(
-    i32x8,
-    u64x4,
-    i64x4,
-    u32x8,
-    u16x16,
-    i16x16,
-    u8x32,
-    i8x32
-);
-define_from!(
-    u16x16,
-    u64x4,
-    i64x4,
-    u32x8,
-    i32x8,
-    i16x16,
-    u8x32,
-    i8x32
-);
-define_from!(
-    i16x16,
-    u64x4,
-    i64x4,
-    u32x8,
-    i32x8,
-    u16x16,
-    u8x32,
-    i8x32
-);
-define_from!(
-    u8x32,
-    u64x4,
-    i64x4,
-    u32x8,
-    i32x8,
-    u16x16,
-    i16x16,
-    i8x32
-);
-define_from!(
-    i8x32,
-    u64x4,
-    i64x4,
-    u32x8,
-    i32x8,
-    u16x16,
-    i16x16,
-    u8x32
-);
+define_from!(u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, u8x32, i8x32);
+define_from!(i64x4, u64x4, u32x8, i32x8, u16x16, i16x16, u8x32, i8x32);
+define_from!(u32x8, u64x4, i64x4, i32x8, u16x16, i16x16, u8x32, i8x32);
+define_from!(i32x8, u64x4, i64x4, u32x8, u16x16, i16x16, u8x32, i8x32);
+define_from!(u16x16, u64x4, i64x4, u32x8, i32x8, i16x16, u8x32, i8x32);
+define_from!(i16x16, u64x4, i64x4, u32x8, i32x8, u16x16, u8x32, i8x32);
+define_from!(u8x32, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, i8x32);
+define_from!(i8x32, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, u8x32);
 
 define_common_ops!(
     f64x4,
diff --git a/coresimd/src/x86/i386/mod.rs b/coresimd/src/x86/i386/mod.rs
index 9f32390924..4e0adbae72 100644
--- a/coresimd/src/x86/i386/mod.rs
+++ b/coresimd/src/x86/i386/mod.rs
@@ -3,7 +3,6 @@
 mod eflags;
 pub use self::eflags::*;
 
-
 #[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
 mod fxsr;
 #[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
diff --git a/coresimd/src/x86/i586/avx.rs b/coresimd/src/x86/i586/avx.rs
index cba133c734..3552fe820b 100644
--- a/coresimd/src/x86/i586/avx.rs
+++ b/coresimd/src/x86/i586/avx.rs
@@ -956,7 +956,6 @@ pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 {
 #[target_feature(enable = "avx,sse")]
 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
 pub unsafe fn _mm_permute_ps(a: __m128, imm8: i32) -> __m128 {
-
     let imm8 = (imm8 & 0xFF) as u8;
     macro_rules! shuffle4 {
         ($a:expr, $b:expr, $c:expr, $d:expr) => {
@@ -1094,7 +1093,9 @@ pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d {
 #[inline]
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x5))]
-pub unsafe fn _mm256_permute2f128_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
+pub unsafe fn _mm256_permute2f128_ps(
+    a: __m256, b: __m256, imm8: i32
+) -> __m256 {
     macro_rules! call {
         ($imm8:expr) => { vperm2f128ps256(a, b, $imm8) }
     }
@@ -1106,7 +1107,9 @@ pub unsafe fn _mm256_permute2f128_ps(a: __m256, b: __m256, imm8: i32) -> __m256
 #[inline]
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))]
-pub unsafe fn _mm256_permute2f128_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
+pub unsafe fn _mm256_permute2f128_pd(
+    a: __m256d, b: __m256d, imm8: i32
+) -> __m256d {
     macro_rules! call {
         ($imm8:expr) => { vperm2f128pd256(a, b, $imm8) }
     }
@@ -1195,7 +1198,9 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 {
 #[inline]
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
-pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d, imm8: i32) -> __m256d {
+pub unsafe fn _mm256_insertf128_pd(
+    a: __m256d, b: __m128d, imm8: i32
+) -> __m256d {
     match imm8 & 1 {
         0 => simd_shuffle4(a, _mm256_castpd128_pd256(b), [4, 5, 2, 3]),
         _ => simd_shuffle4(a, _mm256_castpd128_pd256(b), [0, 1, 4, 5]),
@@ -1391,7 +1396,9 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
 #[inline]
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovpd))]
-pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
+pub unsafe fn _mm256_maskload_pd(
+    mem_addr: *const f64, mask: __m256i
+) -> __m256d {
     maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
 }
 
@@ -1400,7 +1407,9 @@ pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d
 #[inline]
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovpd))]
-pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
+pub unsafe fn _mm256_maskstore_pd(
+    mem_addr: *mut f64, mask: __m256i, a: __m256d
+) {
     maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
 }
 
@@ -1429,7 +1438,9 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
 #[inline]
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovps))]
-pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
+pub unsafe fn _mm256_maskload_ps(
+    mem_addr: *const f32, mask: __m256i
+) -> __m256 {
     maskloadps256(mem_addr as *const i8, mask.as_i32x8())
 }
 
@@ -1438,7 +1449,9 @@ pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256
 #[inline]
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovps))]
-pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
+pub unsafe fn _mm256_maskstore_ps(
+    mem_addr: *mut f32, mask: __m256i, a: __m256
+) {
     maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
 }
 
@@ -2755,7 +2768,8 @@ mod tests {
         let result_closest = _mm256_round_ps(a, 0b00000000);
         let result_down = _mm256_round_ps(a, 0b00000001);
         let result_up = _mm256_round_ps(a, 0b00000010);
-        let expected_closest = _mm256_setr_ps(2., 2., 4., -1., 2., 2., 4., -1.);
+        let expected_closest =
+            _mm256_setr_ps(2., 2., 4., -1., 2., 2., 4., -1.);
         let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
         let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
         assert_eq_m256(result_closest, expected_closest);
@@ -2865,7 +2879,8 @@ mod tests {
         let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
         let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         let r = _mm256_dp_ps(a, b, 0xFF);
-        let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.);
+        let e =
+            _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.);
         assert_eq_m256(r, e);
     }
 
@@ -3282,11 +3297,17 @@ mod tests {
 
     #[simd_test = "avx"]
     unsafe fn test_mm256_insert_epi16() {
-        let a =
-            _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm256_setr_epi16(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         let r = _mm256_insert_epi16(a, 0, 15);
-        let e =
-            _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = _mm256_setr_epi16(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 0,
+        );
         assert_eq_m256i(r, e);
     }
 
@@ -3865,10 +3886,12 @@ mod tests {
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
         );
-        assert_eq_m256i(
-            r,
-            _mm256_setr_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = _mm256_setr_epi16(
+            16, 15, 14, 13, 12, 11, 10, 9, 8,
+            7, 6, 5, 4, 3, 2, 1,
         );
+        assert_eq_m256i(r, e);
     }
 
     #[simd_test = "avx"]
@@ -3922,10 +3945,12 @@ mod tests {
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
         );
-        assert_eq_m256i(
-            r,
-            _mm256_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = _mm256_setr_epi16(
+            1, 2, 3, 4, 5, 6, 7, 8,
+            9, 10, 11, 12, 13, 14, 15, 16,
         );
+        assert_eq_m256i(r, e);
     }
 
     #[simd_test = "avx"]
@@ -4196,8 +4221,11 @@ mod tests {
             17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
         );
-        let lo =
-            _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let lo = _mm_setr_epi8(
+            1, 2, 3, 4, 5, 6, 7, 8,
+            9, 10, 11, 12, 13, 14, 15, 16,
+        );
         let r = _mm256_loadu2_m128i(
             &hi as *const _ as *const _,
             &lo as *const _ as *const _,
diff --git a/coresimd/src/x86/i586/avx2.rs b/coresimd/src/x86/i586/avx2.rs
index 540031009e..aff7e4d16c 100644
--- a/coresimd/src/x86/i586/avx2.rs
+++ b/coresimd/src/x86/i586/avx2.rs
@@ -201,7 +201,10 @@ pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vandnps))]
 pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
     let all_ones = _mm256_set1_epi8(-1);
-    mem::transmute(simd_and(simd_xor(a.as_i64x4(), all_ones.as_i64x4()), b.as_i64x4()))
+    mem::transmute(simd_and(
+        simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
+        b.as_i64x4(),
+    ))
 }
 
 /// Average packed unsigned 16-bit integers in `a` and `b`.
@@ -256,7 +259,9 @@ pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpblendd, imm8 = 9))]
-pub unsafe fn _mm256_blend_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
+pub unsafe fn _mm256_blend_epi32(
+    a: __m256i, b: __m256i, imm8: i32
+) -> __m256i {
     let imm8 = (imm8 & 0xFF) as u8;
     let a = a.as_i32x8();
     let b = b.as_i32x8();
@@ -308,7 +313,9 @@ pub unsafe fn _mm256_blend_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpblendw, imm8 = 9))]
-pub unsafe fn _mm256_blend_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
+pub unsafe fn _mm256_blend_epi16(
+    a: __m256i, b: __m256i, imm8: i32
+) -> __m256i {
     let imm8 = (imm8 & 0xFF) as u8;
     let a = a.as_i16x16();
     let b = b.as_i16x16();
@@ -362,7 +369,9 @@ pub unsafe fn _mm256_blend_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpblendvb))]
-pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
+pub unsafe fn _mm256_blendv_epi8(
+    a: __m256i, b: __m256i, mask: __m256i
+) -> __m256i {
     mem::transmute(pblendvb(a.as_i8x32(), b.as_i8x32(), mask.as_i8x32()))
 }
 
@@ -776,7 +785,8 @@ pub unsafe fn _mm_i32gather_epi32(
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
 pub unsafe fn _mm_mask_i32gather_epi32(
-    src: __m128i, slice: *const i32, offsets: __m128i, mask: __m128i, scale: i32
+    src: __m128i, slice: *const i32, offsets: __m128i, mask: __m128i,
+    scale: i32,
 ) -> __m128i {
     let src = src.as_i32x4();
     let mask = mask.as_i32x4();
@@ -817,7 +827,8 @@ pub unsafe fn _mm256_i32gather_epi32(
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
 pub unsafe fn _mm256_mask_i32gather_epi32(
-    src: __m256i, slice: *const i32, offsets: __m256i, mask: __m256i, scale: i32
+    src: __m256i, slice: *const i32, offsets: __m256i, mask: __m256i,
+    scale: i32,
 ) -> __m256i {
     let src = src.as_i32x8();
     let mask = mask.as_i32x8();
@@ -932,7 +943,8 @@ pub unsafe fn _mm_i32gather_epi64(
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
 pub unsafe fn _mm_mask_i32gather_epi64(
-    src: __m128i, slice: *const i64, offsets: __m128i, mask: __m128i, scale: i32
+    src: __m128i, slice: *const i64, offsets: __m128i, mask: __m128i,
+    scale: i32,
 ) -> __m128i {
     let src = src.as_i64x2();
     let mask = mask.as_i64x2();
@@ -973,7 +985,8 @@ pub unsafe fn _mm256_i32gather_epi64(
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
 pub unsafe fn _mm256_mask_i32gather_epi64(
-    src: __m256i, slice: *const i64, offsets: __m128i, mask: __m256i, scale: i32
+    src: __m256i, slice: *const i64, offsets: __m128i, mask: __m256i,
+    scale: i32,
 ) -> __m256i {
     let src = src.as_i64x4();
     let mask = mask.as_i64x4();
@@ -1013,7 +1026,8 @@ pub unsafe fn _mm_i32gather_pd(
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
 pub unsafe fn _mm_mask_i32gather_pd(
-    src: __m128d, slice: *const f64, offsets: __m128i, mask: __m128d, scale: i32
+    src: __m128d, slice: *const f64, offsets: __m128i, mask: __m128d,
+    scale: i32,
 ) -> __m128d {
     let offsets = offsets.as_i32x4();
     let slice = slice as *const i8;
@@ -1050,7 +1064,8 @@ pub unsafe fn _mm256_i32gather_pd(
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
 pub unsafe fn _mm256_mask_i32gather_pd(
-    src: __m256d, slice: *const f64, offsets: __m128i, mask: __m256d, scale: i32
+    src: __m256d, slice: *const f64, offsets: __m128i, mask: __m256d,
+    scale: i32,
 ) -> __m256d {
     let offsets = offsets.as_i32x4();
     let slice = slice as *const i8;
@@ -1088,7 +1103,8 @@ pub unsafe fn _mm_i64gather_epi32(
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
 pub unsafe fn _mm_mask_i64gather_epi32(
-    src: __m128i, slice: *const i32, offsets: __m128i, mask: __m128i, scale: i32
+    src: __m128i, slice: *const i32, offsets: __m128i, mask: __m128i,
+    scale: i32,
 ) -> __m128i {
     let src = src.as_i32x4();
     let mask = mask.as_i32x4();
@@ -1129,7 +1145,8 @@ pub unsafe fn _mm256_i64gather_epi32(
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
 pub unsafe fn _mm256_mask_i64gather_epi32(
-    src: __m128i, slice: *const i32, offsets: __m256i, mask: __m128i, scale: i32
+    src: __m128i, slice: *const i32, offsets: __m256i, mask: __m128i,
+    scale: i32,
 ) -> __m128i {
     let src = src.as_i32x4();
     let mask = mask.as_i32x4();
@@ -1244,7 +1261,8 @@ pub unsafe fn _mm_i64gather_epi64(
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
 pub unsafe fn _mm_mask_i64gather_epi64(
-    src: __m128i, slice: *const i64, offsets: __m128i, mask: __m128i, scale: i32
+    src: __m128i, slice: *const i64, offsets: __m128i, mask: __m128i,
+    scale: i32,
 ) -> __m128i {
     let src = src.as_i64x2();
     let mask = mask.as_i64x2();
@@ -1285,7 +1303,8 @@ pub unsafe fn _mm256_i64gather_epi64(
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
 pub unsafe fn _mm256_mask_i64gather_epi64(
-    src: __m256i, slice: *const i64, offsets: __m256i, mask: __m256i, scale: i32
+    src: __m256i, slice: *const i64, offsets: __m256i, mask: __m256i,
+    scale: i32,
 ) -> __m256i {
     let src = src.as_i64x4();
     let mask = mask.as_i64x4();
@@ -1325,7 +1344,8 @@ pub unsafe fn _mm_i64gather_pd(
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
 pub unsafe fn _mm_mask_i64gather_pd(
-    src: __m128d, slice: *const f64, offsets: __m128i, mask: __m128d, scale: i32
+    src: __m128d, slice: *const f64, offsets: __m128i, mask: __m128d,
+    scale: i32,
 ) -> __m128d {
     let slice = slice as *const i8;
     let offsets = offsets.as_i64x2();
@@ -1362,7 +1382,8 @@ pub unsafe fn _mm256_i64gather_pd(
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
 pub unsafe fn _mm256_mask_i64gather_pd(
-    src: __m256d, slice: *const f64, offsets: __m256i, mask: __m256d, scale: i32
+    src: __m256d, slice: *const f64, offsets: __m256i, mask: __m256d,
+    scale: i32,
 ) -> __m256d {
     let slice = slice as *const i8;
     let offsets = offsets.as_i64x4();
@@ -1416,7 +1437,9 @@ pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovd))]
-pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
+pub unsafe fn _mm_maskload_epi32(
+    mem_addr: *const i32, mask: __m128i
+) -> __m128i {
     mem::transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
 }
 
@@ -1438,7 +1461,9 @@ pub unsafe fn _mm256_maskload_epi32(
 #[inline]
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovq))]
-pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
+pub unsafe fn _mm_maskload_epi64(
+    mem_addr: *const i64, mask: __m128i
+) -> __m128i {
     mem::transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
 }
 
@@ -1460,7 +1485,9 @@ pub unsafe fn _mm256_maskload_epi64(
 #[inline]
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovd))]
-pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
+pub unsafe fn _mm_maskstore_epi32(
+    mem_addr: *mut i32, mask: __m128i, a: __m128i
+) {
     maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
 }
 
@@ -1482,7 +1509,9 @@ pub unsafe fn _mm256_maskstore_epi32(
 #[inline]
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovq))]
-pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
+pub unsafe fn _mm_maskstore_epi64(
+    mem_addr: *mut i64, mask: __m128i, a: __m128i
+) {
     maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
 }
 
@@ -1625,7 +1654,9 @@ pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vmpsadbw, imm8 = 0))]
-pub unsafe fn _mm256_mpsadbw_epu8(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
+pub unsafe fn _mm256_mpsadbw_epu8(
+    a: __m256i, b: __m256i, imm8: i32
+) -> __m256i {
     let a = a.as_u8x32();
     let b = b.as_u8x32();
     macro_rules! call {
@@ -2733,7 +2764,11 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpckhdq))]
 pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
-    let r: i32x8 = simd_shuffle8(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
+    let r: i32x8 = simd_shuffle8(
+        a.as_i32x8(),
+        b.as_i32x8(),
+        [2, 10, 3, 11, 6, 14, 7, 15],
+    );
     mem::transmute(r)
 }
 
@@ -2769,7 +2804,8 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpckldq))]
 pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
-    let r: i32x8 = simd_shuffle8(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
+    let r: i32x8 =
+        simd_shuffle8(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
     mem::transmute(r)
 }
 
@@ -3113,19 +3149,23 @@ extern "C" {
     ) -> i64x4;
     #[link_name = "llvm.x86.avx2.gather.d.pd"]
     fn pgatherdpd(
-        src: __m128d, slice: *const i8, offsets: i32x4, mask: __m128d, scale: i8
+        src: __m128d, slice: *const i8, offsets: i32x4, mask: __m128d,
+        scale: i8,
     ) -> __m128d;
     #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
     fn vpgatherdpd(
-        src: __m256d, slice: *const i8, offsets: i32x4, mask: __m256d, scale: i8
+        src: __m256d, slice: *const i8, offsets: i32x4, mask: __m256d,
+        scale: i8,
     ) -> __m256d;
     #[link_name = "llvm.x86.avx2.gather.q.pd"]
     fn pgatherqpd(
-        src: __m128d, slice: *const i8, offsets: i64x2, mask: __m128d, scale: i8
+        src: __m128d, slice: *const i8, offsets: i64x2, mask: __m128d,
+        scale: i8,
     ) -> __m128d;
     #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
     fn vpgatherqpd(
-        src: __m256d, slice: *const i8, offsets: i64x4, mask: __m256d, scale: i8
+        src: __m256d, slice: *const i8, offsets: i64x4, mask: __m256d,
+        scale: i8,
     ) -> __m256d;
     #[link_name = "llvm.x86.avx2.gather.d.ps"]
     fn pgatherdps(
@@ -3228,10 +3268,16 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_add_epi16() {
-        let a =
-            _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b =
-            _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm256_setr_epi16(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let b = _mm256_setr_epi16(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         let r = _mm256_add_epi16(a, b);
         #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = _mm256_setr_epi16(
@@ -3313,8 +3359,11 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_adds_epi16() {
-        let a =
-            _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm256_setr_epi16(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = _mm256_setr_epi16(
             32, 33, 34, 35, 36, 37, 38, 39,
@@ -3383,8 +3432,11 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_adds_epu16() {
-        let a =
-            _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm256_setr_epi16(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = _mm256_setr_epi16(
             32, 33, 34, 35, 36, 37, 38, 39,
@@ -3468,7 +3520,8 @@ mod tests {
     #[simd_test = "avx2"]
     unsafe fn test_mm256_blend_epi16() {
         let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
-        let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3,3, 3, 3, 3, 3);
+        let e =
+            _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
         let r = _mm256_blend_epi16(a, b, 0x01 as i32);
         assert_eq_m256i(r, e);
 
@@ -3604,10 +3657,16 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_cmpeq_epi16() {
-        let a =
-            _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b =
-            _mm256_setr_epi16(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm256_setr_epi16(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let b = _mm256_setr_epi16(
+            15, 14, 2, 12, 11, 10, 9, 8,
+            7, 6, 5, 4, 3, 2, 1, 0,
+        );
         let r = _mm256_cmpeq_epi16(a, b);
         assert_eq_m256i(r, _mm256_insert_epi16(_mm256_set1_epi16(0), !0, 2));
     }
@@ -3627,10 +3686,7 @@ mod tests {
         let a = _mm256_setr_epi64x(0, 1, 2, 3);
         let b = _mm256_setr_epi64x(3, 2, 2, 0);
         let r = _mm256_cmpeq_epi64(a, b);
-        assert_eq_m256i(
-            r,
-            _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 2),
-        );
+        assert_eq_m256i(r, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 2));
     }
 
     #[simd_test = "avx2"]
@@ -3662,33 +3718,42 @@ mod tests {
         let a = _mm256_insert_epi64(_mm256_set1_epi64x(0), 5, 0);
         let b = _mm256_set1_epi64x(0);
         let r = _mm256_cmpgt_epi64(a, b);
-        assert_eq_m256i(
-            r,
-            _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 0),
-        );
+        assert_eq_m256i(r, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 0));
     }
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_cvtepi8_epi16() {
-        let a =
-            _mm_setr_epi8(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
-        let r =
-            _mm256_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            0, 0, -1, 1, -2, 2, -3, 3,
+            -4, 4, -5, 5, -6, 6, -7, 7,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let r = _mm256_setr_epi16(
+            0, 0, -1, 1, -2, 2, -3, 3,
+            -4, 4, -5, 5, -6, 6, -7, 7,
+        );
         assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
     }
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_cvtepi8_epi32() {
-        let a =
-            _mm_setr_epi8(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            0, 0, -1, 1, -2, 2, -3, 3,
+            -4, 4, -5, 5, -6, 6, -7, 7,
+        );
         let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
         assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
     }
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_cvtepi8_epi64() {
-        let a =
-            _mm_setr_epi8(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            0, 0, -1, 1, -2, 2, -3, 3,
+            -4, 4, -5, 5, -6, 6, -7, 7,
+        );
         let r = _mm256_setr_epi64x(0, 0, -1, 1);
         assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
     }
@@ -3737,25 +3802,37 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_cvtepu8_epi16() {
-        let a =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r =
-            _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let r = _mm256_setr_epi16(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
     }
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_cvtepu8_epi32() {
-        let a =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
     }
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_cvtepu8_epi64() {
-        let a =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         let r = _mm256_setr_epi64x(0, 1, 2, 3);
         assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
     }
@@ -3773,7 +3850,8 @@ mod tests {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_hadd_epi16(a, b);
-        let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
+        let e =
+            _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
         assert_eq_m256i(r, e);
     }
 
@@ -3793,8 +3871,11 @@ mod tests {
         let a = _mm256_insert_epi16(a, 1, 1);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_hadds_epi16(a, b);
-        let e =
-            _mm256_setr_epi16(0x7FFF, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = _mm256_setr_epi16(
+            0x7FFF, 4, 4, 4, 8, 8, 8, 8,
+            4, 4, 4, 4, 8, 8, 8, 8,
+        );
         assert_eq_m256i(r, e);
     }
 
@@ -4139,7 +4220,8 @@ mod tests {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(4);
         let r = _mm256_packs_epi32(a, b);
-        let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
+        let e =
+            _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
 
         assert_eq_m256i(r, e);
     }
@@ -4165,7 +4247,8 @@ mod tests {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(4);
         let r = _mm256_packus_epi32(a, b);
-        let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
+        let e =
+            _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
 
         assert_eq_m256i(r, e);
     }
@@ -4266,7 +4349,7 @@ mod tests {
     unsafe fn test_mm256_slli_epi16() {
         assert_eq_m256i(
             _mm256_slli_epi16(_mm256_set1_epi16(0xFF), 4),
-            _mm256_set1_epi16(0xFF0)
+            _mm256_set1_epi16(0xFF0),
         );
     }
 
@@ -4274,7 +4357,7 @@ mod tests {
     unsafe fn test_mm256_slli_epi32() {
         assert_eq_m256i(
             _mm256_slli_epi32(_mm256_set1_epi32(0xFFFF), 4),
-            _mm256_set1_epi32(0xFFFF0)
+            _mm256_set1_epi32(0xFFFF0),
         );
     }
 
@@ -4282,7 +4365,7 @@ mod tests {
     unsafe fn test_mm256_slli_epi64() {
         assert_eq_m256i(
             _mm256_slli_epi64(_mm256_set1_epi64x(0xFFFFFFFF), 4),
-            _mm256_set1_epi64x(0xFFFFFFFF0)
+            _mm256_set1_epi64x(0xFFFFFFFF0),
         );
     }
 
@@ -4349,7 +4432,7 @@ mod tests {
     unsafe fn test_mm256_srai_epi16() {
         assert_eq_m256i(
             _mm256_srai_epi16(_mm256_set1_epi16(-1), 1),
-            _mm256_set1_epi16(-1)
+            _mm256_set1_epi16(-1),
         );
     }
 
@@ -4357,7 +4440,7 @@ mod tests {
     unsafe fn test_mm256_srai_epi32() {
         assert_eq_m256i(
             _mm256_srai_epi32(_mm256_set1_epi32(-1), 1),
-            _mm256_set1_epi32(-1)
+            _mm256_set1_epi32(-1),
         );
     }
 
@@ -4427,7 +4510,7 @@ mod tests {
     unsafe fn test_mm256_srli_epi16() {
         assert_eq_m256i(
             _mm256_srli_epi16(_mm256_set1_epi16(0xFF), 4),
-            _mm256_set1_epi16(0xF)
+            _mm256_set1_epi16(0xF),
         );
     }
 
@@ -4435,7 +4518,7 @@ mod tests {
     unsafe fn test_mm256_srli_epi32() {
         assert_eq_m256i(
             _mm256_srli_epi32(_mm256_set1_epi32(0xFFFF), 4),
-            _mm256_set1_epi32(0xFFF)
+            _mm256_set1_epi32(0xFFF),
         );
     }
 
@@ -4443,7 +4526,7 @@ mod tests {
     unsafe fn test_mm256_srli_epi64() {
         assert_eq_m256i(
             _mm256_srli_epi64(_mm256_set1_epi64x(0xFFFFFFFF), 4),
-            _mm256_set1_epi64x(0xFFFFFFF)
+            _mm256_set1_epi64x(0xFFFFFFF),
         );
     }
 
@@ -4639,7 +4722,8 @@ mod tests {
     unsafe fn test_mm256_permutevar8x32_epi32() {
         let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
         let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
-        let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
+        let expected =
+            _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
         let r = _mm256_permutevar8x32_epi32(a, b);
         assert_eq_m256i(r, expected);
     }
@@ -4739,7 +4823,10 @@ mod tests {
             _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
             4,
         );
-        assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
+        assert_eq_m256i(
+            r,
+            _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256),
+        );
     }
 
     #[simd_test = "avx2"]
@@ -4789,7 +4876,10 @@ mod tests {
             _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4),
             4,
         );
-        assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
+        assert_eq_m256(
+            r,
+            _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0),
+        );
     }
 
     #[simd_test = "avx2"]
@@ -4810,7 +4900,7 @@ mod tests {
         );
         assert_eq_m256(
             r,
-            _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0)
+            _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
         );
     }
 
@@ -4821,11 +4911,8 @@ mod tests {
             arr[i as usize] = i;
         }
         // A multiplier of 8 is word-addressing for i64s
-        let r = _mm_i32gather_epi64(
-            arr.as_ptr(),
-            _mm_setr_epi32(0, 16, 0, 0),
-            8,
-        );
+        let r =
+            _mm_i32gather_epi64(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0), 8);
         assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
     }
 
@@ -4887,8 +4974,7 @@ mod tests {
             j += 1.0;
         }
         // A multiplier of 8 is word-addressing for f64s
-        let r =
-            _mm_i32gather_pd(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0), 8);
+        let r = _mm_i32gather_pd(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0), 8);
         assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
     }
 
@@ -5220,8 +5306,11 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_extract_epi16() {
-        let a =
-            _mm256_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm256_setr_epi16(
+            -1, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         let r1 = _mm256_extract_epi16(a, 0);
         let r2 = _mm256_extract_epi16(a, 19);
         assert_eq!(r1, -1);
diff --git a/coresimd/src/x86/i586/rdtsc.rs b/coresimd/src/x86/i586/rdtsc.rs
index 6c02a2d03b..a385dd9a01 100644
--- a/coresimd/src/x86/i586/rdtsc.rs
+++ b/coresimd/src/x86/i586/rdtsc.rs
@@ -1,3 +1,5 @@
+//! RDTSC instructions.
+
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
@@ -22,7 +24,7 @@ pub unsafe fn _rdtsc() -> i64 {
 }
 
 /// Reads the current value of the processor’s time-stamp counter and
-/// the IA32_TSC_AUX MSR.
+/// the `IA32_TSC_AUX MSR`.
 ///
 /// The processor monotonically increments the time-stamp counter MSR
 /// every clock cycle and resets it to 0 whenever the processor is
diff --git a/coresimd/src/x86/i586/sse.rs b/coresimd/src/x86/i586/sse.rs
index 3960360588..5eb92b6a07 100644
--- a/coresimd/src/x86/i586/sse.rs
+++ b/coresimd/src/x86/i586/sse.rs
@@ -994,9 +994,9 @@ pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
     _mm_load1_ps(p)
 }
 
-/// Load four `f32` values from *aligned* memory into a `__m128`. If the pointer
-/// is not aligned to a 128-bit boundary (16 bytes) a general protection fault
-/// will be triggered (fatal program crash).
+/// Load four `f32` values from *aligned* memory into a `__m128`. If the
+/// pointer is not aligned to a 128-bit boundary (16 bytes) a general
+/// protection fault will be triggered (fatal program crash).
 ///
 /// Use [`_mm_loadu_ps`](fn._mm_loadu_ps.html) for potentially unaligned
 /// memory.
@@ -1031,7 +1031,8 @@ pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
     dst
 }
 
-/// Load four `f32` values from aligned memory into a `__m128` in reverse order.
+/// Load four `f32` values from aligned memory into a `__m128` in reverse
+/// order.
 ///
 /// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general
 /// protection fault will be triggered (fatal program crash).
@@ -1307,8 +1308,8 @@ pub unsafe fn _mm_getcsr() -> u32 {
 /// check if an operation caused some overflow:
 ///
 /// ```rust,ignore
-/// _MM_SET_EXCEPTION_STATE(0);  // clear all exception flags
-/// // perform calculations
+/// _MM_SET_EXCEPTION_STATE(0); // clear all exception flags
+///                             // perform calculations
 /// if _MM_GET_EXCEPTION_STATE() & _MM_EXCEPT_OVERFLOW != 0 {
 ///     // handle overflow
 /// }
@@ -1331,7 +1332,7 @@ pub unsafe fn _mm_getcsr() -> u32 {
 /// exception, use:
 ///
 /// ```rust,ignore
-/// _mm_setcsr(_mm_getcsr() & !_MM_MASK_UNDERFLOW);  // unmask underflow
+/// _mm_setcsr(_mm_getcsr() & !_MM_MASK_UNDERFLOW); // unmask underflow
 /// exception
 /// ```
 ///
@@ -1374,8 +1375,8 @@ pub unsafe fn _mm_getcsr() -> u32 {
 /// `_MM_GET_FLUSH_ZERO_MODE()` and `_MM_SET_FLUSH_ZERO_MODE()`:
 ///
 /// ```rust,ignore
-/// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF);  // turn off (default)
-/// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);  // turn on
+/// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF); // turn off (default)
+/// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // turn on
 /// ```
 ///
 #[inline]
@@ -2357,14 +2358,9 @@ mod tests {
             let r = _mm_comieq_ss(a, b);
 
             assert_eq!(
-                ee[i],
-                r,
+                ee[i], r,
                 "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a,
-                b,
-                r,
-                ee[i],
-                i
+                a, b, r, ee[i], i
             );
         }
     }
@@ -2383,14 +2379,9 @@ mod tests {
             let r = _mm_comilt_ss(a, b);
 
             assert_eq!(
-                ee[i],
-                r,
+                ee[i], r,
                 "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a,
-                b,
-                r,
-                ee[i],
-                i
+                a, b, r, ee[i], i
             );
         }
     }
@@ -2409,14 +2400,9 @@ mod tests {
             let r = _mm_comile_ss(a, b);
 
             assert_eq!(
-                ee[i],
-                r,
+                ee[i], r,
                 "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a,
-                b,
-                r,
-                ee[i],
-                i
+                a, b, r, ee[i], i
             );
         }
     }
@@ -2435,14 +2421,9 @@ mod tests {
             let r = _mm_comige_ss(a, b);
 
             assert_eq!(
-                ee[i],
-                r,
+                ee[i], r,
                 "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a,
-                b,
-                r,
-                ee[i],
-                i
+                a, b, r, ee[i], i
             );
         }
     }
@@ -2461,14 +2442,9 @@ mod tests {
             let r = _mm_comineq_ss(a, b);
 
             assert_eq!(
-                ee[i],
-                r,
+                ee[i], r,
                 "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a,
-                b,
-                r,
-                ee[i],
-                i
+                a, b, r, ee[i], i
             );
         }
     }
@@ -2487,14 +2463,9 @@ mod tests {
             let r = _mm_ucomieq_ss(a, b);
 
             assert_eq!(
-                ee[i],
-                r,
+                ee[i], r,
                 "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a,
-                b,
-                r,
-                ee[i],
-                i
+                a, b, r, ee[i], i
             );
         }
     }
@@ -2513,14 +2484,9 @@ mod tests {
             let r = _mm_ucomilt_ss(a, b);
 
             assert_eq!(
-                ee[i],
-                r,
+                ee[i], r,
                 "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a,
-                b,
-                r,
-                ee[i],
-                i
+                a, b, r, ee[i], i
             );
         }
     }
@@ -2539,14 +2505,9 @@ mod tests {
             let r = _mm_ucomile_ss(a, b);
 
             assert_eq!(
-                ee[i],
-                r,
+                ee[i], r,
                 "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a,
-                b,
-                r,
-                ee[i],
-                i
+                a, b, r, ee[i], i
             );
         }
     }
@@ -2565,14 +2526,9 @@ mod tests {
             let r = _mm_ucomigt_ss(a, b);
 
             assert_eq!(
-                ee[i],
-                r,
+                ee[i], r,
                 "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a,
-                b,
-                r,
-                ee[i],
-                i
+                a, b, r, ee[i], i
             );
         }
     }
@@ -2591,14 +2547,9 @@ mod tests {
             let r = _mm_ucomige_ss(a, b);
 
             assert_eq!(
-                ee[i],
-                r,
+                ee[i], r,
                 "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a,
-                b,
-                r,
-                ee[i],
-                i
+                a, b, r, ee[i], i
             );
         }
     }
@@ -2617,14 +2568,9 @@ mod tests {
             let r = _mm_ucomineq_ss(a, b);
 
             assert_eq!(
-                ee[i],
-                r,
+                ee[i], r,
                 "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a,
-                b,
-                r,
-                ee[i],
-                i
+                a, b, r, ee[i], i
             );
         }
     }
@@ -2652,24 +2598,14 @@ mod tests {
             let s2 = _MM_GET_EXCEPTION_STATE();
 
             assert_eq!(
-                ee[i],
-                r1,
+                ee[i], r1,
                 "_mm_comeq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a,
-                b,
-                r1,
-                ee[i],
-                i
+                a, b, r1, ee[i], i
             );
             assert_eq!(
-                ee[i],
-                r2,
+                ee[i], r2,
                 "_mm_ucomeq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a,
-                b,
-                r2,
-                ee[i],
-                i
+                a, b, r2, ee[i], i
             );
             assert_eq!(
                 s1,
@@ -2691,19 +2627,16 @@ mod tests {
     #[simd_test = "sse"]
     unsafe fn test_mm_cvtss_si32() {
         let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
-        let result = &[42i32, -3, i32::min_value(), 0, i32::min_value(), 2147483520];
+        let result =
+            &[42i32, -3, i32::min_value(), 0, i32::min_value(), 2147483520];
         for i in 0..inputs.len() {
             let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
             let e = result[i];
             let r = _mm_cvtss_si32(x);
             assert_eq!(
-                e,
-                r,
+                e, r,
                 "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}",
-                i,
-                x,
-                r,
-                e
+                i, x, r, e
             );
         }
     }
@@ -2727,13 +2660,9 @@ mod tests {
             let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
             let r = _mm_cvttss_si32(x);
             assert_eq!(
-                e,
-                r,
+                e, r,
                 "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}",
-                i,
-                x,
-                r,
-                e
+                i, x, r, e
             );
         }
     }
@@ -2904,7 +2833,8 @@ mod tests {
         }
 
         let r = _mm_load_ps(p);
-        let e = _mm_add_ps(_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_set1_ps(fixup));
+        let e =
+            _mm_add_ps(_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_set1_ps(fixup));
         assert_eq_m128(r, e);
     }
 
@@ -2934,7 +2864,8 @@ mod tests {
         }
 
         let r = _mm_loadr_ps(p);
-        let e = _mm_add_ps(_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_set1_ps(fixup));
+        let e =
+            _mm_add_ps(_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_set1_ps(fixup));
         assert_eq_m128(r, e);
     }
 
@@ -3146,8 +3077,7 @@ mod tests {
         let exp = _mm_setr_ps(1.1e-41, 0.0, 0.0, 1.0);
         assert_eq_m128(r, exp);
 
-        let underflow =
-            _MM_GET_EXCEPTION_STATE() & _MM_EXCEPT_UNDERFLOW != 0;
+        let underflow = _MM_GET_EXCEPTION_STATE() & _MM_EXCEPT_UNDERFLOW != 0;
         assert_eq!(underflow, true);
     }
 
diff --git a/coresimd/src/x86/i586/sse2.rs b/coresimd/src/x86/i586/sse2.rs
index ab4a574e2d..da2906e115 100644
--- a/coresimd/src/x86/i586/sse2.rs
+++ b/coresimd/src/x86/i586/sse2.rs
@@ -1,6 +1,5 @@
 //! Streaming SIMD Extensions 2 (SSE2)
 
-
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
@@ -878,7 +877,9 @@ pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(maskmovdqu))]
-pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
+pub unsafe fn _mm_maskmoveu_si128(
+    a: __m128i, mask: __m128i, mem_addr: *mut i8
+) {
     maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
 }
 
@@ -1195,7 +1196,11 @@ pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpckhwd))]
 pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
-    let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
+    let x = simd_shuffle8(
+        a.as_i16x8(),
+        b.as_i16x8(),
+        [4, 12, 5, 13, 6, 14, 7, 15],
+    );
     mem::transmute::<i16x8, _>(x)
 }
 
@@ -1204,7 +1209,11 @@ pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpckhdq))]
 pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
-    mem::transmute::<i32x4, _>(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7]))
+    mem::transmute::<i32x4, _>(simd_shuffle4(
+        a.as_i32x4(),
+        b.as_i32x4(),
+        [2, 6, 3, 7],
+    ))
 }
 
 /// Unpack and interleave 64-bit integers from the high half of `a` and `b`.
@@ -1212,7 +1221,11 @@ pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpckhqdq))]
 pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
-    mem::transmute::<i64x2, _>(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [1, 3]))
+    mem::transmute::<i64x2, _>(simd_shuffle2(
+        a.as_i64x2(),
+        b.as_i64x2(),
+        [1, 3],
+    ))
 }
 
 /// Unpack and interleave 8-bit integers from the low half of `a` and `b`.
@@ -1232,7 +1245,8 @@ pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpcklwd))]
 pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
-    let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
+    let x =
+        simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
     mem::transmute::<i16x8, _>(x)
 }
 
@@ -1241,7 +1255,11 @@ pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpckldq))]
 pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
-    mem::transmute::<i32x4, _>(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5]))
+    mem::transmute::<i32x4, _>(simd_shuffle4(
+        a.as_i32x4(),
+        b.as_i32x4(),
+        [0, 4, 1, 5],
+    ))
 }
 
 /// Unpack and interleave 64-bit integers from the low half of `a` and `b`.
@@ -1249,7 +1267,11 @@ pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpcklqdq))]
 pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
-    mem::transmute::<i64x2, _>(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [0, 2]))
+    mem::transmute::<i64x2, _>(simd_shuffle2(
+        a.as_i64x2(),
+        b.as_i64x2(),
+        [0, 2],
+    ))
 }
 
 /// Return a new vector with the low element of `a` replaced by the sum of the
@@ -2332,7 +2354,7 @@ mod tests {
     use stdsimd_test::simd_test;
     use test::black_box; // Used to inhibit constant-folding.
     use x86::*;
-	use v128::*;
+    use v128::*;
 
     #[simd_test = "sse2"]
     unsafe fn test_mm_pause() {
@@ -2357,8 +2379,24 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn test_mm_add_epi8() {
-        let a =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a = _mm_setr_epi8(
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+        );
         #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = _mm_setr_epi8(
             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -2408,8 +2446,24 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn test_mm_adds_epi8() {
-        let a =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a = _mm_setr_epi8(
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+        );
         #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = _mm_setr_epi8(
             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -2465,8 +2519,24 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn test_mm_adds_epu8() {
-        let a =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a = _mm_setr_epi8(
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+        );
         #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = _mm_setr_epi8(
             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -2715,8 +2785,24 @@ mod tests {
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
         );
         let r = _mm_slli_si128(a, 1);
-        let e =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let e = _mm_setr_epi8(
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+        );
         assert_eq_m128i(r, e);
 
         #[cfg_attr(rustfmt, rustfmt_skip)]
@@ -2958,8 +3044,24 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn test_mm_cmpeq_epi8() {
-        let a =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a = _mm_setr_epi8(
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+        );
         let b =
             _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_cmpeq_epi8(a, b);
@@ -3104,8 +3206,11 @@ mod tests {
         let r = _mm_set_epi8(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
         );
-        let e =
-            _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = _mm_setr_epi8(
+            15, 14, 13, 12, 11, 10, 9, 8,
+            7, 6, 5, 4, 3, 2, 1, 0,
+        );
         assert_eq_m128i(r, e);
     }
 
@@ -3151,8 +3256,11 @@ mod tests {
         let r = _mm_setr_epi8(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
         );
-        let e =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = _mm_setr_epi8(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         assert_eq_m128i(r, e);
     }
 
@@ -3186,7 +3294,11 @@ mod tests {
     #[simd_test = "sse2"]
     unsafe fn test_mm_maskmoveu_si128() {
         let a = _mm_set1_epi8(9);
-        let mask = _mm_set_epi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let mask = _mm_set_epi8(
+            0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0,
+        );
         let mut r = _mm_set1_epi8(0);
         _mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8);
         let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
@@ -3197,10 +3309,7 @@ mod tests {
     unsafe fn test_mm_store_si128() {
         let a = _mm_set1_epi8(9);
         let mut r = _mm_set1_epi8(0);
-        _mm_store_si128(
-            &mut r as *mut _ as *mut __m128i,
-            a,
-        );
+        _mm_store_si128(&mut r as *mut _ as *mut __m128i, a);
         assert_eq_m128i(r, a);
     }
 
@@ -3208,10 +3317,7 @@ mod tests {
     unsafe fn test_mm_storeu_si128() {
         let a = _mm_set1_epi8(9);
         let mut r = _mm_set1_epi8(0);
-        _mm_storeu_si128(
-            &mut r as *mut _ as *mut __m128i,
-            a,
-        );
+        _mm_storeu_si128(&mut r as *mut _ as *mut __m128i, a);
         assert_eq_m128i(r, a);
     }
 
@@ -3219,10 +3325,7 @@ mod tests {
     unsafe fn test_mm_storel_epi64() {
         let a = _mm_setr_epi64x(2, 9);
         let mut r = _mm_set1_epi8(0);
-        _mm_storel_epi64(
-            &mut r as *mut _ as *mut __m128i,
-            a,
-        );
+        _mm_storel_epi64(&mut r as *mut _ as *mut __m128i, a);
         assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
     }
 
@@ -3270,7 +3373,7 @@ mod tests {
         let r = _mm_packs_epi32(a, b);
         assert_eq_m128i(
             r,
-            _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF)
+            _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
         );
     }
 
@@ -3281,7 +3384,7 @@ mod tests {
         let r = _mm_packus_epi16(a, b);
         assert_eq_m128i(
             r,
-            _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0)
+            _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
         );
     }
 
@@ -3341,8 +3444,11 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn test_mm_unpackhi_epi8() {
-        let a =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = _mm_setr_epi8(
             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -3384,15 +3490,21 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn test_mm_unpacklo_epi8() {
-        let a =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = _mm_setr_epi8(
             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
         );
         let r = _mm_unpacklo_epi8(a, b);
-        let e =
-            _mm_setr_epi8(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = _mm_setr_epi8(
+            0, 16, 1, 17, 2, 18, 3, 19,
+            4, 20, 5, 21, 6, 22, 7, 23,
+        );
         assert_eq_m128i(r, e);
     }
 
@@ -4043,10 +4155,12 @@ mod tests {
         assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
 
         let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
-        assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
+        assert_eq_m128(
+            r,
+            _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0),
+        );
 
-        let r =
-            _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
+        let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
         assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
     }
 
@@ -4075,10 +4189,7 @@ mod tests {
         let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
         assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
 
-        let r = _mm_cvtpd_epi32(_mm_setr_pd(
-            f64::INFINITY,
-            f64::NEG_INFINITY,
-        ));
+        let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
         assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
 
         let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
@@ -4118,8 +4229,8 @@ mod tests {
                 f32::INFINITY,
                 f32::NEG_INFINITY,
                 f32::MAX,
-                f32::NEG_INFINITY
-            )
+                f32::NEG_INFINITY,
+            ),
         );
     }
 
@@ -4175,7 +4286,10 @@ mod tests {
         let a =
             _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
         let r = _mm_cvttps_epi32(a);
-        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
+        assert_eq_m128i(
+            r,
+            _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN),
+        );
     }
 
     #[simd_test = "sse2"]
diff --git a/coresimd/src/x86/i586/sse41.rs b/coresimd/src/x86/i586/sse41.rs
index 4ca8397bd8..ab5e509d0d 100644
--- a/coresimd/src/x86/i586/sse41.rs
+++ b/coresimd/src/x86/i586/sse41.rs
@@ -50,7 +50,9 @@ pub const _MM_FROUND_NEARBYINT: i32 =
 #[inline]
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pblendvb))]
-pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
+pub unsafe fn _mm_blendv_epi8(
+    a: __m128i, b: __m128i, mask: __m128i
+) -> __m128i {
     mem::transmute(pblendvb(a.as_i8x16(), b.as_i8x16(), mask.as_i8x16()))
 }
 
@@ -539,13 +541,13 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
 /// use coresimd::vendor;
 ///
 /// // round to nearest, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEAREST_INT |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
 /// // round down, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEG_INF |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
 /// // round up, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_POS_INF |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
 /// // truncate, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_ZERO |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
 /// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
 /// vendor::_MM_FROUND_CUR_DIRECTION;
 /// ```
@@ -568,13 +570,13 @@ pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
 /// use coresimd::vendor;
 ///
 /// // round to nearest, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEAREST_INT |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
 /// // round down, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEG_INF |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
 /// // round up, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_POS_INF |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
 /// // truncate, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_ZERO |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
 /// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
 /// vendor::_MM_FROUND_CUR_DIRECTION;
 /// ```
@@ -599,13 +601,13 @@ pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
 /// use coresimd::vendor;
 ///
 /// // round to nearest, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEAREST_INT |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
 /// // round down, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEG_INF |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
 /// // round up, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_POS_INF |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
 /// // truncate, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_ZERO |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
 /// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
 /// vendor::_MM_FROUND_CUR_DIRECTION;
 /// ```
@@ -630,13 +632,13 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
 /// use coresimd::vendor;
 ///
 /// // round to nearest, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEAREST_INT |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
 /// // round down, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEG_INF |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
 /// // round up, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_POS_INF |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
 /// // truncate, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_ZERO |vendor::_MM_FROUND_NO_EXC);
+/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
 /// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
 /// vendor::_MM_FROUND_CUR_DIRECTION;
 /// ```
@@ -689,8 +691,8 @@ pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
 /// 64-bit integers, and returns the lowest 32-bit, whatever they might be,
 /// reinterpreted as a signed integer. While `pmulld __m128i::splat(2),
 /// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping
-/// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would return a
-/// negative number.
+/// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would
+/// return a negative number.
 #[inline]
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmulld))]
@@ -803,14 +805,20 @@ mod tests {
 
     #[simd_test = "sse4.1"]
     unsafe fn test_mm_blendv_epi8() {
-        let a =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = _mm_setr_epi8(
             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
         );
-        let mask =
-            _mm_setr_epi8(0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let mask = _mm_setr_epi8(
+            0, -1, 0, -1, 0, -1, 0, -1,
+            0, -1, 0, -1, 0, -1, 0, -1,
+        );
         #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = _mm_setr_epi8(
             0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31,
@@ -876,8 +884,11 @@ mod tests {
 
     #[simd_test = "sse4.1"]
     unsafe fn test_mm_extract_epi8() {
-        let a =
-            _mm_setr_epi8(-1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            -1, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15
+        );
         let r1 = _mm_extract_epi8(a, 0);
         let r2 = _mm_extract_epi8(a, 19);
         assert_eq!(r1, 0xFF);
@@ -1385,8 +1396,11 @@ mod tests {
 
     #[simd_test = "sse4.1"]
     unsafe fn test_mm_mpsadbw_epu8() {
-        let a =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
 
         let r = _mm_mpsadbw_epu8(a, a, 0b000);
         let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
diff --git a/coresimd/src/x86/i586/sse42.rs b/coresimd/src/x86/i586/sse42.rs
index f850306d29..e53836efa2 100644
--- a/coresimd/src/x86/i586/sse42.rs
+++ b/coresimd/src/x86/i586/sse42.rs
@@ -627,7 +627,7 @@ mod tests {
             slice.get_unchecked_mut(0) as *mut u8 as *mut u8,
             s.len(),
         );
-         _mm_loadu_si128(slice.as_ptr() as *const _)
+        _mm_loadu_si128(slice.as_ptr() as *const _)
     }
 
     #[simd_test = "sse4.2"]
@@ -689,11 +689,7 @@ mod tests {
         );
         let a = a_bytes;
         let b = b_bytes;
-        let i = _mm_cmpistro(
-            a,
-            b,
-            _SIDD_UWORD_OPS | _SIDD_UNIT_MASK,
-        );
+        let i = _mm_cmpistro(a, b, _SIDD_UWORD_OPS | _SIDD_UNIT_MASK);
         assert_eq!(0, i);
     }
 
@@ -722,8 +718,7 @@ mod tests {
     unsafe fn test_mm_cmpestri() {
         let a = str_to_m128i(b"bar - garbage");
         let b = str_to_m128i(b"foobar");
-        let i =
-            _mm_cmpestri(a, 3, b, 6, _SIDD_CMP_EQUAL_ORDERED);
+        let i = _mm_cmpestri(a, 3, b, 6, _SIDD_CMP_EQUAL_ORDERED);
         assert_eq!(3, i);
     }
 
@@ -731,8 +726,7 @@ mod tests {
     unsafe fn test_mm_cmpestrz() {
         let a = str_to_m128i(b"");
         let b = str_to_m128i(b"Hello");
-        let i =
-            _mm_cmpestrz(a, 16, b, 6, _SIDD_CMP_EQUAL_ORDERED);
+        let i = _mm_cmpestrz(a, 16, b, 6, _SIDD_CMP_EQUAL_ORDERED);
         assert_eq!(1, i);
     }
 
@@ -769,13 +763,8 @@ mod tests {
     unsafe fn test_mm_cmpestra() {
         let a = str_to_m128i(b"Cannot match a");
         let b = str_to_m128i(b"Null after 14");
-        let i = _mm_cmpestra(
-            a,
-            14,
-            b,
-            16,
-            _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK,
-        );
+        let i =
+            _mm_cmpestra(a, 14, b, 16, _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK);
         assert_eq!(1, i);
     }
 
diff --git a/coresimd/src/x86/i586/ssse3.rs b/coresimd/src/x86/i586/ssse3.rs
index 01e461bd79..4efcee388d 100644
--- a/coresimd/src/x86/i586/ssse3.rs
+++ b/coresimd/src/x86/i586/ssse3.rs
@@ -313,10 +313,18 @@ mod tests {
 
     #[simd_test = "ssse3"]
     unsafe fn test_mm_shuffle_epi8() {
-        let a =
-            _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b =
-            _mm_setr_epi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            1, 2, 3, 4, 5, 6, 7, 8,
+            9, 10, 11, 12, 13, 14, 15, 16,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let b = _mm_setr_epi8(
+            4, 128_u8 as i8, 4, 3,
+            24, 12, 6, 19,
+            12, 5, 5, 10,
+            4, 1, 8, 0,
+        );
         let expected =
             _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
         let r = _mm_shuffle_epi8(a, b);
@@ -325,24 +333,38 @@ mod tests {
 
     #[simd_test = "ssse3"]
     unsafe fn test_mm_alignr_epi8() {
-        let a =
-            _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b =
-            _mm_setr_epi8(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            1, 2, 3, 4, 5, 6, 7, 8,
+            9, 10, 11, 12, 13, 14, 15, 16,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let b = _mm_setr_epi8(
+            4, 63, 4, 3,
+            24, 12, 6, 19,
+            12, 5, 5, 10,
+            4, 1, 8, 0,
+        );
         let r = _mm_alignr_epi8(a, b, 33);
         assert_eq_m128i(r, _mm_set1_epi8(0));
 
         let r = _mm_alignr_epi8(a, b, 17);
-        let expected =
-            _mm_setr_epi8(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let expected = _mm_setr_epi8(
+            2, 3, 4, 5, 6, 7, 8, 9,
+            10, 11, 12, 13, 14, 15, 16, 0,
+        );
         assert_eq_m128i(r, expected);
 
         let r = _mm_alignr_epi8(a, b, 16);
         assert_eq_m128i(r, a);
 
         let r = _mm_alignr_epi8(a, b, 15);
-        let expected =
-            _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let expected = _mm_setr_epi8(
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         assert_eq_m128i(r, expected);
 
         let r = _mm_alignr_epi8(a, b, 0);
@@ -405,10 +427,18 @@ mod tests {
 
     #[simd_test = "ssse3"]
     unsafe fn test_mm_maddubs_epi16() {
-        let a =
-            _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b =
-            _mm_setr_epi8(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_epi8(
+            1, 2, 3, 4, 5, 6, 7, 8,
+            9, 10, 11, 12, 13, 14, 15, 16,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let b = _mm_setr_epi8(
+            4, 63, 4, 3,
+            24, 12, 6, 19,
+            12, 5, 5, 10,
+            4, 1, 8, 0,
+        );
         let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
         let r = _mm_maddubs_epi16(a, b);
         assert_eq_m128i(r, expected);
diff --git a/coresimd/src/x86/i686/mmx.rs b/coresimd/src/x86/i686/mmx.rs
index c5d69bcec0..7074f6a28b 100644
--- a/coresimd/src/x86/i686/mmx.rs
+++ b/coresimd/src/x86/i686/mmx.rs
@@ -514,7 +514,8 @@ mod tests {
             -30001,
             i16::max_value() - 1,
         );
-        let e = _mm_setr_pi16(i16::min_value(), 30000, -30000, i16::max_value());
+        let e =
+            _mm_setr_pi16(i16::min_value(), 30000, -30000, i16::max_value());
         assert_eq_m64(e, _mm_add_pi16(a, b));
         assert_eq_m64(e, _m_paddw(a, b));
     }
diff --git a/coresimd/src/x86/i686/sse.rs b/coresimd/src/x86/i686/sse.rs
index 1cb830eff8..e77f1f3ed6 100644
--- a/coresimd/src/x86/i686/sse.rs
+++ b/coresimd/src/x86/i686/sse.rs
@@ -1,6 +1,5 @@
 //! `i686` Streaming SIMD Extensions (SSE)
 
-use core::mem;
 use x86::*;
 
 #[cfg(test)]
@@ -204,7 +203,7 @@ pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
 #[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvtpi2ps))]
 pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 {
-    cvtpi2ps(a, mem::transmute(b))
+    cvtpi2ps(a, b)
 }
 
 /// Converts two elements of a 64-bit vector of [2 x i32] into two
@@ -315,7 +314,7 @@ pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
 #[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
 pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i16 {
     macro_rules! call {
-        ($imm2:expr) => { pextrw(mem::transmute(a), $imm2) as i16 }
+        ($imm2:expr) => { pextrw(a, $imm2) as i16 }
     }
     constify_imm2!(imm2, call)
 }
@@ -359,7 +358,7 @@ pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
 #[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pmovmskb))]
 pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 {
-    pmovmskb(mem::transmute(a))
+    pmovmskb(a)
 }
 
 /// Takes the most significant bit from each 8-bit element in a 64-bit
@@ -399,7 +398,7 @@ pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
 #[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvttps2pi))]
 pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
-    mem::transmute(cvttps2pi(a))
+    cvttps2pi(a)
 }
 
 /// Convert the two lower packed single-precision (32-bit) floating-point
@@ -534,8 +533,11 @@ mod tests {
 
     #[simd_test = "sse,mmx"]
     unsafe fn test_mm_sad_pu8() {
-        let a = _mm_setr_pi8(255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
-                             1, 2, 3, 4);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = _mm_setr_pi8(
+            255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
+            1, 2, 3, 4,
+        );
         let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1);
         let r = _mm_sad_pu8(a, b);
         assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
@@ -602,11 +604,7 @@ mod tests {
         let a = _mm_set1_pi8(9);
         let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0);
         let mut r = _mm_set1_pi8(0);
-        _mm_maskmove_si64(
-            a,
-            mask,
-            &mut r as *mut _ as *mut i8,
-        );
+        _mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8);
         let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0);
         assert_eq_m64(r, e);
 
@@ -643,7 +641,8 @@ mod tests {
 
     #[simd_test = "sse,mmx"]
     unsafe fn test_mm_movemask_pi8() {
-        let a = _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
+        let a =
+            _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
         let r = _mm_movemask_pi8(a);
         assert_eq!(r, 0b10001);
 
diff --git a/coresimd/src/x86/i686/sse2.rs b/coresimd/src/x86/i686/sse2.rs
index ba34838c03..41cd8b3578 100644
--- a/coresimd/src/x86/i686/sse2.rs
+++ b/coresimd/src/x86/i686/sse2.rs
@@ -24,7 +24,7 @@ pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
 #[target_feature(enable = "sse2,mmx")]
 #[cfg_attr(test, assert_instr(pmuludq))]
 pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 {
-    pmuludq(mem::transmute(a), mem::transmute(b))
+    pmuludq(a, b)
 }
 
 /// Subtracts signed or unsigned 64-bit integer values and writes the
@@ -176,8 +176,7 @@ mod tests {
 
     #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_set_epi64() {
-        let r =
-            _mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
+        let r = _mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
         assert_eq_m128i(r, _mm_setr_epi64x(2, 1));
     }
 
@@ -189,8 +188,7 @@ mod tests {
 
     #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_setr_epi64() {
-        let r =
-            _mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
+        let r = _mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
         assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
     }
 
@@ -202,16 +200,7 @@ mod tests {
 
     #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_movpi64_epi64() {
-        let r = _mm_movpi64_epi64(_mm_setr_pi8(
-            5,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-        ));
+        let r = _mm_movpi64_epi64(_mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
         assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
     }
 
diff --git a/coresimd/src/x86/i686/sse42.rs b/coresimd/src/x86/i686/sse42.rs
index f092fe412f..ad788861da 100644
--- a/coresimd/src/x86/i686/sse42.rs
+++ b/coresimd/src/x86/i686/sse42.rs
@@ -27,6 +27,9 @@ mod tests {
         let a = _mm_setr_epi64x(0, 0x2a);
         let b = _mm_set1_epi64x(0x00);
         let i = _mm_cmpgt_epi64(a, b);
-        assert_eq_m128i(i, _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64));
+        assert_eq_m128i(
+            i,
+            _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64),
+        );
     }
 }
diff --git a/coresimd/src/x86/mod.rs b/coresimd/src/x86/mod.rs
index d1f42f6cc9..27404922ee 100644
--- a/coresimd/src/x86/mod.rs
+++ b/coresimd/src/x86/mod.rs
@@ -16,6 +16,7 @@ macro_rules! types {
         #[repr(simd)]
         pub struct $name($($fields)*);
 
+        #[cfg_attr(feature = "cargo-clippy", allow(expl_impl_clone_on_copy))]
         impl Clone for $name {
             #[inline] // currently needed for correctness
             fn clone(&self) -> $name {
@@ -350,7 +351,9 @@ trait m128iExt: Sized {
 
 impl m128iExt for __m128i {
     #[inline]
-    fn as_m128i(self) -> __m128i { self }
+    fn as_m128i(self) -> Self {
+        self
+    }
 }
 
 #[doc(hidden)]
@@ -401,7 +404,9 @@ trait m256iExt: Sized {
 
 impl m256iExt for __m256i {
     #[inline]
-    fn as_m256i(self) -> __m256i { self }
+    fn as_m256i(self) -> Self {
+        self
+    }
 }
 
 mod i386;
diff --git a/coresimd/src/x86/test.rs b/coresimd/src/x86/test.rs
index 51763380a9..1ebc8d7406 100644
--- a/coresimd/src/x86/test.rs
+++ b/coresimd/src/x86/test.rs
@@ -4,13 +4,19 @@ use x86::*;
 
 #[target_feature(enable = "mmx")]
 pub unsafe fn assert_eq_m64(a: __m64, b: __m64) {
-    union A { a: __m64, b: u64 }
+    union A {
+        a: __m64,
+        b: u64,
+    }
     assert_eq!(A { a }.b, A { a: b }.b)
 }
 
 #[target_feature(enable = "sse2")]
 pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
-    union A { a: __m128i, b: [u64; 2] }
+    union A {
+        a: __m128i,
+        b: [u64; 2],
+    }
     assert_eq!(A { a }.b, A { a: b }.b)
 }
 
@@ -23,7 +29,10 @@ pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
 
 #[target_feature(enable = "sse2")]
 pub unsafe fn get_m128d(a: __m128d, idx: usize) -> f64 {
-    union A { a: __m128d, b: [f64; 2] };
+    union A {
+        a: __m128d,
+        b: [f64; 2],
+    };
     A { a }.b[idx]
 }
 
@@ -37,7 +46,10 @@ pub unsafe fn assert_eq_m128(a: __m128, b: __m128) {
 
 #[target_feature(enable = "sse")]
 pub unsafe fn get_m128(a: __m128, idx: usize) -> f32 {
-    union A { a: __m128, b: [f32; 4] };
+    union A {
+        a: __m128,
+        b: [f32; 4],
+    };
     A { a }.b[idx]
 }
 
@@ -50,7 +62,10 @@ pub unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i {
 
 #[target_feature(enable = "avx")]
 pub unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) {
-    union A { a: __m256i, b: [u64; 4] }
+    union A {
+        a: __m256i,
+        b: [u64; 4],
+    }
     assert_eq!(A { a }.b, A { a: b }.b)
 }
 
@@ -64,7 +79,10 @@ pub unsafe fn assert_eq_m256d(a: __m256d, b: __m256d) {
 
 #[target_feature(enable = "avx")]
 pub unsafe fn get_m256d(a: __m256d, idx: usize) -> f64 {
-    union A { a: __m256d, b: [f64; 4] };
+    union A {
+        a: __m256d,
+        b: [f64; 4],
+    };
     A { a }.b[idx]
 }
 
@@ -78,26 +96,37 @@ pub unsafe fn assert_eq_m256(a: __m256, b: __m256) {
 
 #[target_feature(enable = "avx")]
 pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 {
-    union A { a: __m256, b: [f32; 8] };
+    union A {
+        a: __m256,
+        b: [f32; 8],
+    };
     A { a }.b[idx]
 }
 
-// These intrinsics doesn't exist on x86 b/c it requires a 64-bit registe,r which
-// doesn't exist on x86!
+// These intrinsics doesn't exist on x86 b/c it requires a 64-bit registe,r
+// which doesn't exist on x86!
 #[cfg(target_arch = "x86")]
 mod x86_polyfill {
     use x86::*;
 
     pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64, idx: i32) -> __m128i {
-        union A { a: __m128i, b: [i64; 2] };
+        union A {
+            a: __m128i,
+            b: [i64; 2],
+        };
         let mut a = A { a };
         a.b[idx as usize] = val;
         a.a
     }
 
     #[target_feature(enable = "avx2")]
-    pub unsafe fn _mm256_insert_epi64(a: __m256i, val: i64, idx: i32) -> __m256i {
-        union A { a: __m256i, b: [i64; 4] };
+    pub unsafe fn _mm256_insert_epi64(
+        a: __m256i, val: i64, idx: i32
+    ) -> __m256i {
+        union A {
+            a: __m256i,
+            b: [i64; 4],
+        };
         let mut a = A { a };
         a.b[idx as usize] = val;
         a.a
diff --git a/coresimd/src/x86/x86_64/abm.rs b/coresimd/src/x86/x86_64/abm.rs
index 235fa8bb17..510ba41c16 100644
--- a/coresimd/src/x86/x86_64/abm.rs
+++ b/coresimd/src/x86/x86_64/abm.rs
@@ -1,3 +1,22 @@
+//! Advanced Bit Manipulation (ABM) instructions
+//!
+//! The POPCNT and LZCNT have their own CPUID bits to indicate support.
+//!
+//! The references are:
+//!
+//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
+//! Instruction Set Reference, A-Z][intel64_ref].
+//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
+//! System Instructions][amd64_ref].
+//!
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
+//! available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [wikipedia_bmi]:
+//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
+
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
diff --git a/coresimd/src/x86/x86_64/avx.rs b/coresimd/src/x86/x86_64/avx.rs
index 3f9fda1451..33338bfb26 100644
--- a/coresimd/src/x86/x86_64/avx.rs
+++ b/coresimd/src/x86/x86_64/avx.rs
@@ -1,3 +1,18 @@
+//! Advanced Vector Extensions (AVX)
+//!
+//! The references are:
+//!
+//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
+//! Instruction Set Reference, A-Z][intel64_ref]. - [AMD64 Architecture
+//! Programmer's Manual, Volume 3: General-Purpose and System
+//! Instructions][amd64_ref].
+//!
+//! [Wikipedia][wiki] provides a quick overview of the instructions available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
+
 use core::mem;
 
 use simd_llvm::*;
diff --git a/coresimd/src/x86/x86_64/avx2.rs b/coresimd/src/x86/x86_64/avx2.rs
index 6cdb542bfb..4786ef4d5d 100644
--- a/coresimd/src/x86/x86_64/avx2.rs
+++ b/coresimd/src/x86/x86_64/avx2.rs
@@ -1,3 +1,23 @@
+//! Advanced Vector Extensions 2 (AVX)
+//!
+//! AVX2 expands most AVX commands to 256-bit wide vector registers and
+//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
+//!
+//! The references are:
+//!
+//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
+//!   Instruction Set Reference, A-Z][intel64_ref].
+//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
+//!   System Instructions][amd64_ref].
+//!
+//! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick
+//! overview of the instructions available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
+//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
+
 use simd_llvm::*;
 use x86::*;
 
diff --git a/coresimd/src/x86/x86_64/bmi.rs b/coresimd/src/x86/x86_64/bmi.rs
index d8d8a74972..a048851424 100644
--- a/coresimd/src/x86/x86_64/bmi.rs
+++ b/coresimd/src/x86/x86_64/bmi.rs
@@ -1,3 +1,14 @@
+//! Bit Manipulation Instruction (BMI) Set 1.0.
+//!
+//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
+//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
+//!
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
+//! available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
+
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
diff --git a/coresimd/src/x86/x86_64/bmi2.rs b/coresimd/src/x86/x86_64/bmi2.rs
index b1c74d15c8..aa92133f08 100644
--- a/coresimd/src/x86/x86_64/bmi2.rs
+++ b/coresimd/src/x86/x86_64/bmi2.rs
@@ -1,3 +1,15 @@
+//! Bit Manipulation Instruction (BMI) Set 2.0.
+//!
+//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
+//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
+//!
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
+//! available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [wikipedia_bmi]:
+//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
+
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
diff --git a/coresimd/src/x86/x86_64/sse.rs b/coresimd/src/x86/x86_64/sse.rs
index 4763e81b6f..ece2220daf 100644
--- a/coresimd/src/x86/x86_64/sse.rs
+++ b/coresimd/src/x86/x86_64/sse.rs
@@ -86,13 +86,9 @@ mod tests {
             let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
             let r = _mm_cvtss_si64(x);
             assert_eq!(
-                e,
-                r,
+                e, r,
                 "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}",
-                i,
-                x,
-                r,
-                e
+                i, x, r, e
             );
         }
     }
@@ -118,13 +114,9 @@ mod tests {
             let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
             let r = _mm_cvttss_si64(x);
             assert_eq!(
-                e,
-                r,
+                e, r,
                 "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}",
-                i,
-                x,
-                r,
-                e
+                i, x, r, e
             );
         }
     }
diff --git a/coresimd/src/x86/x86_64/sse2.rs b/coresimd/src/x86/x86_64/sse2.rs
index ff16d1f957..4136da5677 100644
--- a/coresimd/src/x86/x86_64/sse2.rs
+++ b/coresimd/src/x86/x86_64/sse2.rs
@@ -120,7 +120,6 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn test_mm_cvtsd_si64() {
-
         let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0));
         assert_eq!(r, -2_i64);
 
diff --git a/examples/hex.rs b/examples/hex.rs
index 630c6105bd..6da9a6c5e3 100644
--- a/examples/hex.rs
+++ b/examples/hex.rs
@@ -14,6 +14,10 @@
 
 #![feature(cfg_target_feature, target_feature)]
 #![cfg_attr(test, feature(test))]
+#![cfg_attr(feature = "cargo-clippy",
+            allow(result_unwrap_used, option_unwrap_used, print_stdout,
+                  missing_docs_in_private_items, shadow_reuse,
+                  cast_possible_wrap, cast_sign_loss))]
 
 #[macro_use]
 extern crate stdsimd;
@@ -38,16 +42,16 @@ fn main() {
 fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
     let len = src.len().checked_mul(2).unwrap();
     if dst.len() < len {
-        return Err(len)
+        return Err(len);
     }
 
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
     {
         if cfg_feature_enabled!("avx2") {
-            return unsafe { hex_encode_avx2(src, dst) }
+            return unsafe { hex_encode_avx2(src, dst) };
         }
         if cfg_feature_enabled!("sse4.1") {
-            return unsafe { hex_encode_sse41(src, dst) }
+            return unsafe { hex_encode_sse41(src, dst) };
         }
     }
 
@@ -56,15 +60,15 @@ fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
 
 #[target_feature(enable = "avx2")]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8])
-    -> Result<&'a str, usize>
-{
+unsafe fn hex_encode_avx2<'a>(
+    mut src: &[u8], dst: &'a mut [u8]
+) -> Result<&'a str, usize> {
     let ascii_zero = _mm256_set1_epi8(b'0' as i8);
     let nines = _mm256_set1_epi8(9);
     let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
     let and4bits = _mm256_set1_epi8(0xf);
 
-    let mut i = 0isize;
+    let mut i = 0_isize;
     while src.len() >= 32 {
         let invec = _mm256_loadu_si256(src.as_ptr() as *const _);
 
@@ -76,8 +80,14 @@ unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8])
         let cmpmask2 = _mm256_cmpgt_epi8(masked2, nines);
 
         // add '0' or the offset depending on the masks
-        let masked1 = _mm256_add_epi8(masked1, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
-        let masked2 = _mm256_add_epi8(masked2, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2));
+        let masked1 = _mm256_add_epi8(
+            masked1,
+            _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
+        );
+        let masked2 = _mm256_add_epi8(
+            masked2,
+            _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
+        );
 
         // interleave masked1 and masked2 bytes
         let res1 = _mm256_unpacklo_epi8(masked2, masked1);
@@ -96,23 +106,23 @@ unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8])
     }
 
     let i = i as usize;
-    drop(hex_encode_sse41(src, &mut dst[i * 2..]));
+    let _ = hex_encode_sse41(src, &mut dst[i * 2..]);
 
-    return Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
+    Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
 }
 
 // copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
 #[target_feature(enable = "sse4.1")]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8])
-    -> Result<&'a str, usize>
-{
+unsafe fn hex_encode_sse41<'a>(
+    mut src: &[u8], dst: &'a mut [u8]
+) -> Result<&'a str, usize> {
     let ascii_zero = _mm_set1_epi8(b'0' as i8);
     let nines = _mm_set1_epi8(9);
     let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
     let and4bits = _mm_set1_epi8(0xf);
 
-    let mut i = 0isize;
+    let mut i = 0_isize;
     while src.len() >= 16 {
         let invec = _mm_loadu_si128(src.as_ptr() as *const _);
 
@@ -124,8 +134,14 @@ unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8])
         let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
 
         // add '0' or the offset depending on the masks
-        let masked1 = _mm_add_epi8(masked1, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
-        let masked2 = _mm_add_epi8(masked2, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2));
+        let masked1 = _mm_add_epi8(
+            masked1,
+            _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
+        );
+        let masked2 = _mm_add_epi8(
+            masked2,
+            _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
+        );
 
         // interleave masked1 and masked2 bytes
         let res1 = _mm_unpacklo_epi8(masked2, masked1);
@@ -138,25 +154,25 @@ unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8])
     }
 
     let i = i as usize;
-    drop(hex_encode_fallback(src, &mut dst[i * 2..]));
+    let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
 
-    return Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
+    Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
 }
 
-fn hex_encode_fallback<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
-    for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
-        slots[0] = hex((*byte >> 4) & 0xf);
-        slots[1] = hex((*byte >> 0) & 0xf);
-    }
-
-    unsafe {
-        return Ok(str::from_utf8_unchecked(&dst[..src.len() * 2]))
-    }
-
+fn hex_encode_fallback<'a>(
+    src: &[u8], dst: &'a mut [u8]
+) -> Result<&'a str, usize> {
     fn hex(byte: u8) -> u8 {
         static TABLE: &[u8] = b"0123456789abcdef";
         TABLE[byte as usize]
     }
+
+    for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
+        slots[0] = hex((*byte >> 4) & 0xf);
+        slots[1] = hex(*byte & 0xf);
+    }
+
+    unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2])) }
 }
 
 // Run these with `cargo +nightly test --example hex`
@@ -175,10 +191,16 @@ mod tests {
         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
         unsafe {
             if cfg_feature_enabled!("avx2") {
-                assert_eq!(hex_encode_avx2(input, &mut tmp()).unwrap(), output);
+                assert_eq!(
+                    hex_encode_avx2(input, &mut tmp()).unwrap(),
+                    output
+                );
             }
             if cfg_feature_enabled!("sse4.1") {
-                assert_eq!(hex_encode_sse41(input, &mut tmp()).unwrap(), output);
+                assert_eq!(
+                    hex_encode_sse41(input, &mut tmp()).unwrap(),
+                    output
+                );
             }
         }
     }
@@ -190,12 +212,18 @@ mod tests {
 
     #[test]
     fn big() {
-        test(&[0; 1024], &iter::repeat('0').take(2048).collect::<String>());
+        test(
+            &[0; 1024],
+            &iter::repeat('0').take(2048).collect::<String>(),
+        );
     }
 
     #[test]
     fn odd() {
-        test(&[0; 313], &iter::repeat('0').take(313 * 2).collect::<String>());
+        test(
+            &[0; 313],
+            &iter::repeat('0').take(313 * 2).collect::<String>(),
+        );
     }
 
     #[test]
@@ -206,13 +234,16 @@ mod tests {
         input[17] = 0x30;
         input[21] = 1;
         input[31] = 0x24;
-        test(&input, "\
-            0000000003000000\
-            0000000000000000\
-            0330000000010000\
-            0000000000000024\
-            00\
-        ");
+        test(
+            &input,
+            "\
+             0000000003000000\
+             0000000000000000\
+             0330000000010000\
+             0000000000000024\
+             00\
+             ",
+        );
     }
 
     quickcheck! {
@@ -253,8 +284,8 @@ mod tests {
 // Run these with `cargo +nightly bench --example hex`
 #[cfg(test)]
 mod benches {
-    extern crate test;
     extern crate rand;
+    extern crate test;
 
     use self::rand::Rng;
 
@@ -263,10 +294,10 @@ mod benches {
     const SMALL_LEN: usize = 117;
     const LARGE_LEN: usize = 1 * 1024 * 1024;
 
-    fn doit(b: &mut test::Bencher,
-            len: usize,
-            f: for<'a> unsafe fn(&[u8], &'a mut [u8]) -> Result<&'a str, usize>)
-    {
+    fn doit(
+        b: &mut test::Bencher, len: usize,
+        f: for<'a> unsafe fn(&[u8], &'a mut [u8]) -> Result<&'a str, usize>,
+    ) {
         let input = rand::thread_rng()
             .gen_iter::<u8>()
             .take(len)
diff --git a/src/lib.rs b/src/lib.rs
index e610631265..b2c521cb41 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -85,7 +85,7 @@
 //!         unsafe { sum_sse2(x) }
 //!     }
 //!     #[cfg(not(all(any(target_arch = "x86_64", target_arch = "x86"),
-//!               target_feature = "sse2")))]
+//!                   target_feature = "sse2")))]
 //!     {
 //!         sum_portable(x)
 //!     }
diff --git a/src/runtime/linux/mod.rs b/src/runtime/linux/mod.rs
index 9ff760b12d..fe685f9916 100644
--- a/src/runtime/linux/mod.rs
+++ b/src/runtime/linux/mod.rs
@@ -17,7 +17,7 @@ mod auxv;
 mod cpuinfo;
 
 /// Detects CPU features:
-pub fn detect_features() -> usize {
+pub fn detect_features() -> cache::Initializer {
     // Try to read the ELF Auxiliary Vector using libc's getauxval:
     if let Ok(v) = auxv::libc::auxv() {
         return arch::detect_features(v);
@@ -31,7 +31,7 @@ pub fn detect_features() -> usize {
         return arch::detect_features(v);
     }
     // Otherwise all features are disabled
-    0
+    cache::Initializer::default()
 }
 
 /// Performs run-time feature detection.
diff --git a/stdsimd-test/assert-instr-macro/src/lib.rs b/stdsimd-test/assert-instr-macro/src/lib.rs
index c9e2850944..545449c9a7 100644
--- a/stdsimd-test/assert-instr-macro/src/lib.rs
+++ b/stdsimd-test/assert-instr-macro/src/lib.rs
@@ -10,8 +10,8 @@
 
 #![feature(proc_macro)]
 
-extern crate proc_macro2;
 extern crate proc_macro;
+extern crate proc_macro2;
 #[macro_use]
 extern crate quote;
 #[macro_use]
diff --git a/stdsimd-test/simd-test-macro/src/lib.rs b/stdsimd-test/simd-test-macro/src/lib.rs
index 6f9ddc474d..1044d44858 100644
--- a/stdsimd-test/simd-test-macro/src/lib.rs
+++ b/stdsimd-test/simd-test-macro/src/lib.rs
@@ -5,8 +5,8 @@
 
 #![feature(proc_macro)]
 
-extern crate proc_macro2;
 extern crate proc_macro;
+extern crate proc_macro2;
 #[macro_use]
 extern crate quote;
 
diff --git a/stdsimd-test/src/lib.rs b/stdsimd-test/src/lib.rs
index 1ada9193e7..60c9e3e073 100644
--- a/stdsimd-test/src/lib.rs
+++ b/stdsimd-test/src/lib.rs
@@ -279,7 +279,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
     let function = &functions[0];
 
     let mut instrs = &function.instrs[..];
-    while instrs.last().map(|s| s.parts == ["nop"]).unwrap_or(false) {
+    while instrs.last().map_or(false, |s| s.parts == ["nop"]) {
         instrs = &instrs[..instrs.len() - 1];
     }
 
@@ -346,8 +346,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
 
         _ => 20,
     };
-    let probably_only_one_instruction =
-        instrs.len() < instruction_limit;
+    let probably_only_one_instruction = instrs.len() < instruction_limit;
 
     if found && probably_only_one_instruction && !inlining_failed {
         return;
diff --git a/stdsimd-verify/src/lib.rs b/stdsimd-verify/src/lib.rs
index 2682a55adc..5e55e9d296 100644
--- a/stdsimd-verify/src/lib.rs
+++ b/stdsimd-verify/src/lib.rs
@@ -1,7 +1,7 @@
 #![feature(proc_macro)]
 
-extern crate proc_macro2;
 extern crate proc_macro;
+extern crate proc_macro2;
 #[macro_use]
 extern crate quote;
 extern crate syn;
@@ -158,7 +158,7 @@ fn walk(root: &Path, files: &mut Vec<(syn::File, String)>) {
         }
 
         if path.file_name().and_then(|s| s.to_str()) == Some("test.rs") {
-            continue
+            continue;
         }
 
         let mut contents = String::new();
@@ -220,23 +220,19 @@ fn find_target_feature(attrs: &[syn::Attribute]) -> Option<syn::Lit> {
             _ => None,
         })
         .flat_map(|list| list)
-        .filter_map(|nested| {
-            match nested {
-                syn::NestedMeta::Meta(m) => Some(m),
-                syn::NestedMeta::Literal(_) => None,
-            }
+        .filter_map(|nested| match nested {
+            syn::NestedMeta::Meta(m) => Some(m),
+            syn::NestedMeta::Literal(_) => None,
         })
-        .filter_map(|m| {
-            match m {
-                syn::Meta::NameValue(i) => {
-                    if i.ident == "enable" {
-                        Some(i.lit)
-                    } else {
-                        None
-                    }
+        .filter_map(|m| match m {
+            syn::Meta::NameValue(i) => {
+                if i.ident == "enable" {
+                    Some(i.lit)
+                } else {
+                    None
                 }
-                _ => None,
             }
+            _ => None,
         })
         .next()
 }
diff --git a/stdsimd-verify/tests/x86-intel.rs b/stdsimd-verify/tests/x86-intel.rs
index 41d66c07c6..0f49ee9854 100644
--- a/stdsimd-verify/tests/x86-intel.rs
+++ b/stdsimd-verify/tests/x86-intel.rs
@@ -1,14 +1,16 @@
 #![feature(proc_macro)]
 #![allow(bad_style)]
 #![cfg_attr(feature = "cargo-clippy",
-            allow(shadow_reuse, cast_lossless, match_same_arms))]
+            allow(shadow_reuse, cast_lossless, match_same_arms,
+                  nonminimal_bool, print_stdout, use_debug, eq_op,
+                  useless_format))]
 
 #[macro_use]
 extern crate serde_derive;
 extern crate serde_xml_rs;
 extern crate stdsimd_verify;
 
-use std::collections::{HashMap, BTreeMap};
+use std::collections::{BTreeMap, HashMap};
 
 use stdsimd_verify::x86_functions;
 
@@ -70,21 +72,26 @@ x86_functions!(static FUNCTIONS);
 
 #[derive(Deserialize)]
 struct Data {
-    #[serde(rename = "intrinsic", default)] intrinsics: Vec<Intrinsic>,
+    #[serde(rename = "intrinsic", default)]
+    intrinsics: Vec<Intrinsic>,
 }
 
 #[derive(Deserialize)]
 struct Intrinsic {
     rettype: String,
     name: String,
-    #[serde(rename = "CPUID", default)] cpuid: Vec<String>,
-    #[serde(rename = "parameter", default)] parameters: Vec<Parameter>,
-    #[serde(default)] instruction: Vec<Instruction>,
+    #[serde(rename = "CPUID", default)]
+    cpuid: Vec<String>,
+    #[serde(rename = "parameter", default)]
+    parameters: Vec<Parameter>,
+    #[serde(default)]
+    instruction: Vec<Instruction>,
 }
 
 #[derive(Deserialize)]
 struct Parameter {
-    #[serde(rename = "type")] type_: String,
+    #[serde(rename = "type")]
+    type_: String,
 }
 
 #[derive(Deserialize, Debug)]
@@ -113,22 +120,21 @@ fn verify_all_signatures() {
         serde_xml_rs::deserialize(xml).expect("failed to deserialize xml");
     let mut map = HashMap::new();
     for intrinsic in &data.intrinsics {
-        map.entry(&intrinsic.name[..]).or_insert(Vec::new()).push(intrinsic);
+        map.entry(&intrinsic.name[..])
+            .or_insert_with(Vec::new)
+            .push(intrinsic);
     }
 
     let mut all_valid = true;
-    'outer:
-    for rust in FUNCTIONS {
+    'outer: for rust in FUNCTIONS {
         match rust.name {
-            // These aren't defined by Intel but they're defined by what appears
-            // to be all other compilers. For more information see
-            // rust-lang-nursery/stdsimd#307, and otherwise these signatures
-            // have all been manually verified.
-            "__readeflags" |
-            "__writeeflags" |
-            "__cpuid_count" |
-            "__cpuid" |
-            "__get_cpuid_max" => continue,
+            // These aren't defined by Intel but they're defined by what
+            // appears to be all other compilers. For more
+            // information see rust-lang-nursery/stdsimd#307, and
+            // otherwise these signatures have all been manually
+            // verified.
+            "__readeflags" | "__writeeflags" | "__cpuid_count" | "__cpuid"
+            | "__get_cpuid_max" => continue,
 
             _ => {}
         }
@@ -147,7 +153,7 @@ fn verify_all_signatures() {
 
         let mut errors = Vec::new();
         for intel in intel {
-            match matches(rust, &intel) {
+            match matches(rust, intel) {
                 Ok(()) => continue 'outer,
                 Err(e) => errors.push(e),
             }
@@ -161,11 +167,11 @@ fn verify_all_signatures() {
     assert!(all_valid);
 
     let mut missing = BTreeMap::new();
-    for (name, intel) in map.iter() {
+    for (name, intel) in &map {
         // currently focused mainly on missing SIMD intrinsics, but there's
         // definitely some other assorted ones that we're missing.
         if !name.starts_with("_mm") {
-            continue
+            continue;
         }
 
         // we'll get to avx-512 later
@@ -179,8 +185,9 @@ fn verify_all_signatures() {
         // }
 
         for intel in intel {
-            missing.entry(&intel.cpuid)
-                .or_insert(Vec::new())
+            missing
+                .entry(&intel.cpuid)
+                .or_insert_with(Vec::new)
                 .push(intel);
         }
     }
@@ -191,8 +198,11 @@ fn verify_all_signatures() {
             if PRINT_MISSING_LISTS_MARKDOWN {
                 println!("\n<details><summary>{:?}</summary><p>\n", k);
                 for intel in v {
-                    let url = format!("https://software.intel.com/sites/landingpage\
-                                      /IntrinsicsGuide/#text={}&expand=5236", intel.name);
+                    let url = format!(
+                        "https://software.intel.com/sites/landingpage\
+                         /IntrinsicsGuide/#text={}&expand=5236",
+                        intel.name
+                    );
                     println!("  * [ ] [`{}`]({})", intel.name, url);
                 }
                 println!("</p></details>\n");
@@ -251,7 +261,7 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
         let rust_feature = rust.target_feature
             .expect(&format!("no target feature listed for {}", rust.name));
         if rust_feature.contains(&cpuid) {
-            continue
+            continue;
         }
         bail!(
             "intel cpuid `{}` not in `{}` for {}",
@@ -263,9 +273,11 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
 
     if PRINT_INSTRUCTION_VIOLATIONS {
         if rust.instrs.is_empty() {
-            if intel.instruction.len() > 0 {
-                println!("instruction not listed for `{}`, but intel lists {:?}",
-                         rust.name, intel.instruction);
+            if !intel.instruction.is_empty() {
+                println!(
+                    "instruction not listed for `{}`, but intel lists {:?}",
+                    rust.name, intel.instruction
+                );
             }
 
         // If intel doesn't list any instructions and we do then don't
@@ -280,8 +292,7 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
                 if !asserting {
                     println!(
                         "intel failed to list `{}` as an instruction for `{}`",
-                        instr,
-                        rust.name
+                        instr, rust.name
                     );
                 }
             }
@@ -315,51 +326,37 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
         }
     }
 
-    let any_i64 = rust.arguments.iter()
-        .cloned()
-        .chain(rust.ret)
-        .any(|arg| {
-            match *arg {
-                Type::PrimSigned(64) |
-                Type::PrimUnsigned(64) => true,
-                _ => false,
-            }
-        });
+    let any_i64 = rust.arguments.iter().cloned().chain(rust.ret).any(|arg| {
+        match *arg {
+            Type::PrimSigned(64) | Type::PrimUnsigned(64) => true,
+            _ => false,
+        }
+    });
     let any_i64_exempt = match rust.name {
         // These intrinsics have all been manually verified against Clang's
         // headers to be available on x86, and the u64 arguments seem
         // spurious I guess?
-        "_xsave" |
-        "_xrstor" |
-        "_xsetbv" |
-        "_xgetbv" |
-        "_xsaveopt" |
-        "_xsavec" |
-        "_xsaves" |
-        "_xrstors" => true,
+        "_xsave" | "_xrstor" | "_xsetbv" | "_xgetbv" | "_xsaveopt"
+        | "_xsavec" | "_xsaves" | "_xrstors" => true,
 
         // Apparently all of clang/msvc/gcc accept these intrinsics on
         // 32-bit, so let's do the same
-        "_mm_set_epi64x" |
-        "_mm_set1_epi64x" |
-        "_mm256_set_epi64x" |
-        "_mm256_setr_epi64x" |
-        "_mm256_set1_epi64x" => true,
+        "_mm_set_epi64x" | "_mm_set1_epi64x" | "_mm256_set_epi64x"
+        | "_mm256_setr_epi64x" | "_mm256_set1_epi64x" => true,
 
         // These return a 64-bit argument but they're assembled from other
         // 32-bit registers, so these work on 32-bit just fine. See #308 for
         // more info.
-        "_rdtsc" |
-        "__rdtscp" => true,
+        "_rdtsc" | "__rdtscp" => true,
 
         _ => false,
     };
-    if any_i64 && !any_i64_exempt {
-        if !rust.file.contains("x86_64") {
-            bail!("intrinsic `{}` uses a 64-bit bare type but may be \
-                   available on 32-bit platforms",
-                  rust.name)
-        }
+    if any_i64 && !any_i64_exempt && !rust.file.contains("x86_64") {
+        bail!(
+            "intrinsic `{}` uses a 64-bit bare type but may be \
+             available on 32-bit platforms",
+            rust.name
+        )
     }
     Ok(())
 }
@@ -394,8 +391,7 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) -> Result<(), String> {
         (&Type::Ptr(&Type::PrimUnsigned(8)), "const void*") => {}
         (&Type::Ptr(&Type::PrimUnsigned(8)), "void*") => {}
 
-        (&Type::M64, "__m64")
-        | (&Type::Ptr(&Type::M64), "__m64*") => {}
+        (&Type::M64, "__m64") | (&Type::Ptr(&Type::M64), "__m64*") => {}
 
         (&Type::M128I, "__m128i")
         | (&Type::Ptr(&Type::M128I), "__m128i*")
@@ -435,12 +431,12 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) -> Result<(), String> {
             if intrinsic.starts_with("_mm_ucomi")
                 && intrinsic.ends_with("_sd") => {}
 
-        _ => {
-            bail!(
-                "failed to equate: `{}` and {:?} for {}",
-                intel, t, intrinsic
-            )
-        }
+        _ => bail!(
+            "failed to equate: `{}` and {:?} for {}",
+            intel,
+            t,
+            intrinsic
+        ),
     }
     Ok(())
 }
diff --git a/tests/cpu-detection.rs b/tests/cpu-detection.rs
index 3efb61ab74..3af2a320d9 100644
--- a/tests/cpu-detection.rs
+++ b/tests/cpu-detection.rs
@@ -1,6 +1,7 @@
 #![feature(cfg_target_feature)]
 #![cfg_attr(feature = "strict", deny(warnings))]
-#![cfg_attr(feature = "cargo-clippy", allow(option_unwrap_used))]
+#![cfg_attr(feature = "cargo-clippy",
+            allow(option_unwrap_used, use_debug, print_stdout))]
 
 #[cfg(any(target_arch = "arm", target_arch = "aarch64",
           target_arch = "x86", target_arch = "x86_64",