@@ -474,9 +474,6 @@ pub unsafe fn _mm256_broadcastw_epi16(a: i16x8) -> i16x16 {
474
474
simd_shuffle16 ( a, i16x8:: splat ( 0_i16 ) , [ 0_u32 ; 16 ] )
475
475
}
476
476
477
- // TODO _mm256_bslli_epi128
478
- // TODO _mm256_bsrli_epi128
479
-
480
477
/// Compare packed 64-bit integers in `a` and `b` for equality.
481
478
#[ inline( always) ]
482
479
#[ target_feature = "+avx2" ]
@@ -2050,7 +2047,26 @@ pub unsafe fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 {
2050
2047
pslliq ( a, imm8)
2051
2048
}
2052
2049
2053
- // TODO _mm256_slli_si256 (__m256i a, const int imm8)
2050
+ /// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2051
+ #[ inline( always) ]
2052
+ #[ target_feature = "+avx2" ]
2053
+ #[ cfg_attr( test, assert_instr( vpslldq, imm8 = 3 ) ) ]
2054
+ pub unsafe fn _mm256_slli_si256 ( a : __m256i , imm8 : i32 ) -> __m256i {
2055
+ macro_rules! call {
2056
+ ( $imm8: expr) => {
2057
+ vpslldq( a, $imm8)
2058
+ }
2059
+ }
2060
+ constify_imm8 ! ( imm8 * 8 , call)
2061
+ }
2062
+
2063
+ /// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2064
+ #[ inline( always) ]
2065
+ #[ target_feature = "+avx2" ]
2066
+ #[ cfg_attr( test, assert_instr( vpslldq, imm8 = 3 ) ) ]
2067
+ pub unsafe fn _mm256_bslli_epi128 ( a : __m256i , imm8 : i32 ) -> __m256i {
2068
+ _mm256_slli_si256 ( a, imm8)
2069
+ }
2054
2070
2055
2071
/// Shift packed 32-bit integers in `a` left by the amount
2056
2072
/// specified by the corresponding element in `count` while
@@ -2146,6 +2162,27 @@ pub unsafe fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 {
2146
2162
psravd256 ( a, count)
2147
2163
}
2148
2164
2165
+ /// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2166
+ #[ inline( always) ]
2167
+ #[ target_feature = "+avx2" ]
2168
+ #[ cfg_attr( test, assert_instr( vpsrldq, imm8 = 3 ) ) ]
2169
+ pub unsafe fn _mm256_srli_si256 ( a : __m256i , imm8 : i32 ) -> __m256i {
2170
+ macro_rules! call {
2171
+ ( $imm8: expr) => {
2172
+ vpsrldq( a, $imm8)
2173
+ }
2174
+ }
2175
+ constify_imm8 ! ( imm8 * 8 , call)
2176
+ }
2177
+
2178
+ /// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2179
+ #[ inline( always) ]
2180
+ #[ target_feature = "+avx2" ]
2181
+ #[ cfg_attr( test, assert_instr( vpsrldq, imm8 = 3 ) ) ]
2182
+ pub unsafe fn _mm256_bsrli_epi128 ( a : __m256i , imm8 : i32 ) -> __m256i {
2183
+ _mm256_srli_si256 ( a, imm8)
2184
+ }
2185
+
2149
2186
/// Shift packed 16-bit integers in `a` right by `count` while shifting in
2150
2187
/// zeros.
2151
2188
#[ inline( always) ]
@@ -2698,6 +2735,22 @@ pub unsafe fn _mm256_extract_epi64(a: i64x4, imm8: i32) -> i64 {
2698
2735
a. extract_unchecked ( imm8)
2699
2736
}
2700
2737
2738
+ /// Returns the first element of the input vector of [4 x double].
2739
+ #[ inline( always) ]
2740
+ #[ target_feature = "+avx2" ]
2741
+ //#[cfg_attr(test, assert_instr(movsd))] FIXME
2742
+ pub unsafe fn _mm256_cvtsd_f64 ( a : f64x4 ) -> f64 {
2743
+ a. extract ( 0 )
2744
+ }
2745
+
2746
+ /// Returns the first element of the input vector of [8 x i32].
2747
+ #[ inline( always) ]
2748
+ #[ target_feature = "+avx2" ]
2749
+ //#[cfg_attr(test, assert_instr(movd))] FIXME
2750
+ pub unsafe fn _mm256_cvtsi256_si32 ( a : i32x8 ) -> i32 {
2751
+ a. extract ( 0 )
2752
+ }
2753
+
2701
2754
#[ allow( improper_ctypes) ]
2702
2755
extern "C" {
2703
2756
#[ link_name = "llvm.x86.avx2.pabs.b" ]
@@ -2938,7 +2991,10 @@ extern "C" {
2938
2991
fn vpgatherqps (
2939
2992
src : f32x4 , slice : * const i8 , offsets : i64x4 , mask : f32x4 , scale : i8
2940
2993
) -> f32x4 ;
2941
-
2994
+ #[ link_name = "llvm.x86.avx2.psll.dq" ]
2995
+ fn vpslldq ( a : __m256i , b : i32 ) -> __m256i ;
2996
+ #[ link_name = "llvm.x86.avx2.psrl.dq" ]
2997
+ fn vpsrldq ( a : __m256i , b : i32 ) -> __m256i ;
2942
2998
}
2943
2999
2944
3000
#[ cfg( test) ]
@@ -4075,6 +4131,13 @@ mod tests {
4075
4131
) ;
4076
4132
}
4077
4133
4134
+ #[ simd_test = "avx2" ]
4135
+ unsafe fn _mm256_slli_si256 ( ) {
4136
+ let a = i64x4:: splat ( 0xFFFFFFFF ) ;
4137
+ let r = avx2:: _mm256_slli_si256 ( __m256i:: from ( a) , 3 ) ;
4138
+ assert_eq ! ( r, __m256i:: from( i64x4:: splat( 0xFFFFFFFF000000 ) ) ) ;
4139
+ }
4140
+
4078
4141
#[ simd_test = "avx2" ]
4079
4142
unsafe fn _mm_sllv_epi32 ( ) {
4080
4143
let a = i32x4:: splat ( 2 ) ;
@@ -4161,6 +4224,26 @@ mod tests {
4161
4224
assert_eq ! ( r, e) ;
4162
4225
}
4163
4226
4227
+ #[ simd_test = "avx2" ]
4228
+ unsafe fn _mm256_srli_si256 ( ) {
4229
+ #[ cfg_attr( rustfmt, rustfmt_skip) ]
4230
+ let a = i8x32:: new (
4231
+ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ,
4232
+ 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 ,
4233
+ 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 ,
4234
+ 25 , 26 , 27 , 28 , 29 , 30 , 31 , 32 ,
4235
+ ) ;
4236
+ let r = avx2:: _mm256_srli_si256 ( __m256i:: from ( a) , 3 ) ;
4237
+ #[ cfg_attr( rustfmt, rustfmt_skip) ]
4238
+ let e = i8x32:: new (
4239
+ 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 ,
4240
+ 12 , 13 , 14 , 15 , 16 , 0 , 0 , 0 ,
4241
+ 20 , 21 , 22 , 23 , 24 , 25 , 26 , 27 ,
4242
+ 28 , 29 , 30 , 31 , 32 , 0 , 0 , 0 ,
4243
+ ) ;
4244
+ assert_eq ! ( r, __m256i:: from( e) ) ;
4245
+ }
4246
+
4164
4247
#[ simd_test = "avx2" ]
4165
4248
unsafe fn _mm256_srl_epi16 ( ) {
4166
4249
let a = i16x16:: splat ( 0xFF ) ;
@@ -5005,4 +5088,18 @@ mod tests {
5005
5088
let r = avx2:: _mm256_extract_epi64 ( a, 3 ) ;
5006
5089
assert_eq ! ( r, 3 ) ;
5007
5090
}
5091
+
5092
+ #[ simd_test = "avx2" ]
5093
+ unsafe fn _mm256_cvtsd_f64 ( ) {
5094
+ let a = f64x4:: new ( 1. , 2. , 3. , 4. ) ;
5095
+ let r = avx2:: _mm256_cvtsd_f64 ( a) ;
5096
+ assert_eq ! ( r, 1. ) ;
5097
+ }
5098
+
5099
+ #[ simd_test = "avx2" ]
5100
+ unsafe fn _mm256_cvtsi256_si32 ( ) {
5101
+ let a = i32x8:: new ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ) ;
5102
+ let r = avx2:: _mm256_cvtsi256_si32 ( a) ;
5103
+ assert_eq ! ( r, 1 ) ;
5104
+ }
5008
5105
}
0 commit comments