@@ -43,6 +43,15 @@ pub fn _mm256_addsub_pd(a: f64x4, b: f64x4) -> f64x4 {
43
43
unsafe { addsubpd256 ( a, b) }
44
44
}
45
45
46
+ /// Alternatively add and subtract packed single-precision (32-bit)
47
+ /// floating-point elements in `a` to/from packed elements in `b`.
48
+ #[ inline( always) ]
49
+ #[ target_feature = "+avx" ]
50
+ #[ cfg_attr( test, assert_instr( vaddsubps) ) ]
51
+ pub fn _mm256_addsub_ps ( a : f32x8 , b : f32x8 ) -> f32x8 {
52
+ unsafe { addsubps256 ( a, b) }
53
+ }
54
+
46
55
/// Subtract packed double-precision (64-bit) floating-point elements in `b`
47
56
/// from packed elements in `a`.
48
57
#[ inline( always) ]
@@ -110,6 +119,8 @@ pub fn _mm256_floor_pd(a: f64x4) -> f64x4 {
110
119
extern "C" {
111
120
#[ link_name = "llvm.x86.avx.addsub.pd.256" ]
112
121
fn addsubpd256 ( a : f64x4 , b : f64x4 ) -> f64x4 ;
122
+ #[ link_name = "llvm.x86.avx.addsub.ps.256" ]
123
+ fn addsubps256 ( a : f32x8 , b : f32x8 ) -> f32x8 ;
113
124
#[ link_name = "llvm.x86.avx.round.pd.256" ]
114
125
fn roundpd256 ( a : f64x4 , b : i32 ) -> f64x4 ;
115
126
}
@@ -162,7 +173,16 @@ mod tests {
162
173
let a = f64x4:: new ( 1.0 , 2.0 , 3.0 , 4.0 ) ;
163
174
let b = f64x4:: new ( 5.0 , 6.0 , 7.0 , 8.0 ) ;
164
175
let r = avx:: _mm256_addsub_pd ( a, b) ;
165
- let e = f64x4:: new ( -4.0 , 8.0 , -4.0 , 12.0 ) ;
176
+ let e = f64x4:: new ( -4.0 , 8.0 , -4.0 , 12.0 ) ;
177
+ assert_eq ! ( r, e) ;
178
+ }
179
+
180
+ #[ simd_test = "avx" ]
181
+ fn _mm256_addsub_ps ( ) {
182
+ let a = f32x8:: new ( 1.0 , 2.0 , 3.0 , 4.0 , 1.0 , 2.0 , 3.0 , 4.0 ) ;
183
+ let b = f32x8:: new ( 5.0 , 6.0 , 7.0 , 8.0 , 5.0 , 6.0 , 7.0 , 8.0 ) ;
184
+ let r = avx:: _mm256_addsub_ps ( a, b) ;
185
+ let e = f32x8:: new ( -4.0 , 8.0 , -4.0 , 12.0 , -4.0 , 8.0 , -4.0 , 12.0 ) ;
166
186
assert_eq ! ( r, e) ;
167
187
}
168
188
0 commit comments