Merge pull request rust-lang#55 from AdamNiederer/addps

alexcrichton · web-flow · commit b316109a5c4e · 2017-09-26T22:10:21.000-05:00
Add vaddsubps
diff --git a/src/x86/avx.rs b/src/x86/avx.rs
@@ -43,6 +43,15 @@ pub fn _mm256_addsub_pd(a: f64x4, b: f64x4) -> f64x4 {
     unsafe { addsubpd256(a, b) }
 }
 
+/// Alternatively add and subtract packed single-precision (32-bit)
+/// floating-point elements in `a` to/from packed elements in `b`.
+#[inline(always)]
+#[target_feature = "+avx"]
+#[cfg_attr(test, assert_instr(vaddsubps))]
+pub fn _mm256_addsub_ps(a: f32x8, b: f32x8) -> f32x8 {
+    unsafe { addsubps256(a, b) }
+}
+
 /// Subtract packed double-precision (64-bit) floating-point elements in `b`
 /// from packed elements in `a`.
 #[inline(always)]
@@ -110,6 +119,8 @@ pub fn _mm256_floor_pd(a: f64x4) -> f64x4 {
 extern "C" {
     #[link_name = "llvm.x86.avx.addsub.pd.256"]
     fn addsubpd256(a: f64x4, b: f64x4) -> f64x4;
+    #[link_name = "llvm.x86.avx.addsub.ps.256"]
+    fn addsubps256(a: f32x8, b: f32x8) -> f32x8;
     #[link_name = "llvm.x86.avx.round.pd.256"]
     fn roundpd256(a: f64x4, b: i32) -> f64x4;
 }
@@ -162,7 +173,16 @@ mod tests {
         let a = f64x4::new(1.0, 2.0, 3.0, 4.0);
         let b = f64x4::new(5.0, 6.0, 7.0, 8.0);
         let r = avx::_mm256_addsub_pd(a, b);
-        let e = f64x4::new(-4.0,8.0,-4.0,12.0);
+        let e = f64x4::new(-4.0, 8.0, -4.0, 12.0);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "avx"]
+    fn _mm256_addsub_ps() {
+        let a = f32x8::new(1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0);
+        let b = f32x8::new(5.0, 6.0, 7.0, 8.0, 5.0, 6.0, 7.0, 8.0);
+        let r = avx::_mm256_addsub_ps(a, b);
+        let e = f32x8::new(-4.0, 8.0, -4.0, 12.0, -4.0, 8.0, -4.0, 12.0);
         assert_eq!(r, e);
     }