Skip to content

Commit b316109

Browse files
authored
Merge pull request rust-lang#55 from AdamNiederer/addps
Add vaddsubps
2 parents b408555 + 499a21b commit b316109

File tree

1 file changed

+21
-1
lines changed

1 file changed

+21
-1
lines changed

src/x86/avx.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@ pub fn _mm256_addsub_pd(a: f64x4, b: f64x4) -> f64x4 {
4343
unsafe { addsubpd256(a, b) }
4444
}
4545

46+
/// Alternatively add and subtract packed single-precision (32-bit)
47+
/// floating-point elements in `a` to/from packed elements in `b`.
48+
#[inline(always)]
49+
#[target_feature = "+avx"]
50+
#[cfg_attr(test, assert_instr(vaddsubps))]
51+
pub fn _mm256_addsub_ps(a: f32x8, b: f32x8) -> f32x8 {
52+
unsafe { addsubps256(a, b) }
53+
}
54+
4655
/// Subtract packed double-precision (64-bit) floating-point elements in `b`
4756
/// from packed elements in `a`.
4857
#[inline(always)]
@@ -110,6 +119,8 @@ pub fn _mm256_floor_pd(a: f64x4) -> f64x4 {
110119
extern "C" {
111120
#[link_name = "llvm.x86.avx.addsub.pd.256"]
112121
fn addsubpd256(a: f64x4, b: f64x4) -> f64x4;
122+
#[link_name = "llvm.x86.avx.addsub.ps.256"]
123+
fn addsubps256(a: f32x8, b: f32x8) -> f32x8;
113124
#[link_name = "llvm.x86.avx.round.pd.256"]
114125
fn roundpd256(a: f64x4, b: i32) -> f64x4;
115126
}
@@ -162,7 +173,16 @@ mod tests {
162173
let a = f64x4::new(1.0, 2.0, 3.0, 4.0);
163174
let b = f64x4::new(5.0, 6.0, 7.0, 8.0);
164175
let r = avx::_mm256_addsub_pd(a, b);
165-
let e = f64x4::new(-4.0,8.0,-4.0,12.0);
176+
let e = f64x4::new(-4.0, 8.0, -4.0, 12.0);
177+
assert_eq!(r, e);
178+
}
179+
180+
#[simd_test = "avx"]
181+
fn _mm256_addsub_ps() {
182+
let a = f32x8::new(1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0);
183+
let b = f32x8::new(5.0, 6.0, 7.0, 8.0, 5.0, 6.0, 7.0, 8.0);
184+
let r = avx::_mm256_addsub_ps(a, b);
185+
let e = f32x8::new(-4.0, 8.0, -4.0, 12.0, -4.0, 8.0, -4.0, 12.0);
166186
assert_eq!(r, e);
167187
}
168188

0 commit comments

Comments
 (0)