Skip to content

Commit dda7157

Browse files
gwennalexcrichton
authored andcommitted
Last missing avx and avx2 intrinsics (#258)
* avx: _mm256_cvtss_f32, avx2: _mm256_cvtsd_f64, _mm256_cvtsi256_si32 * avx2: _mm256_slli_si256, _mm256_srli_si256 And aliases: _mm256_bslli_epi128 _mm256_bsrli_epi128
1 parent fedf60d commit dda7157

File tree

2 files changed

+117
-5
lines changed

2 files changed

+117
-5
lines changed

coresimd/src/x86/i586/avx.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2398,6 +2398,14 @@ pub unsafe fn _mm256_storeu2_m128i(
23982398
_mm_storeu_si128(hiaddr, hi);
23992399
}
24002400

2401+
/// Returns the first element of the input vector of [8 x float].
2402+
#[inline(always)]
2403+
#[target_feature = "+avx"]
2404+
//#[cfg_attr(test, assert_instr(movss))] FIXME
2405+
pub unsafe fn _mm256_cvtss_f32(a: f32x8) -> f32 {
2406+
a.extract(0)
2407+
}
2408+
24012409
/// LLVM intrinsics used in the above functions
24022410
#[allow(improper_ctypes)]
24032411
extern "C" {
@@ -4290,4 +4298,11 @@ mod tests {
42904298
assert_eq!(hi, e_hi);
42914299
assert_eq!(lo, e_lo);
42924300
}
4301+
4302+
#[simd_test = "avx"]
4303+
unsafe fn _mm256_cvtss_f32() {
4304+
let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
4305+
let r = avx::_mm256_cvtss_f32(a);
4306+
assert_eq!(r, 1.);
4307+
}
42934308
}

coresimd/src/x86/i586/avx2.rs

Lines changed: 102 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -474,9 +474,6 @@ pub unsafe fn _mm256_broadcastw_epi16(a: i16x8) -> i16x16 {
474474
simd_shuffle16(a, i16x8::splat(0_i16), [0_u32; 16])
475475
}
476476

477-
// TODO _mm256_bslli_epi128
478-
// TODO _mm256_bsrli_epi128
479-
480477
/// Compare packed 64-bit integers in `a` and `b` for equality.
481478
#[inline(always)]
482479
#[target_feature = "+avx2"]
@@ -2050,7 +2047,26 @@ pub unsafe fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 {
20502047
pslliq(a, imm8)
20512048
}
20522049

2053-
// TODO _mm256_slli_si256 (__m256i a, const int imm8)
2050+
/// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2051+
#[inline(always)]
2052+
#[target_feature = "+avx2"]
2053+
#[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
2054+
pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i {
2055+
macro_rules! call {
2056+
($imm8:expr) => {
2057+
vpslldq(a, $imm8)
2058+
}
2059+
}
2060+
constify_imm8!(imm8 * 8, call)
2061+
}
2062+
2063+
/// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2064+
#[inline(always)]
2065+
#[target_feature = "+avx2"]
2066+
#[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
2067+
pub unsafe fn _mm256_bslli_epi128(a: __m256i, imm8: i32) -> __m256i {
2068+
_mm256_slli_si256(a, imm8)
2069+
}
20542070

20552071
/// Shift packed 32-bit integers in `a` left by the amount
20562072
/// specified by the corresponding element in `count` while
@@ -2146,6 +2162,27 @@ pub unsafe fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 {
21462162
psravd256(a, count)
21472163
}
21482164

2165+
/// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2166+
#[inline(always)]
2167+
#[target_feature = "+avx2"]
2168+
#[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
2169+
pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i {
2170+
macro_rules! call {
2171+
($imm8:expr) => {
2172+
vpsrldq(a, $imm8)
2173+
}
2174+
}
2175+
constify_imm8!(imm8 * 8, call)
2176+
}
2177+
2178+
/// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2179+
#[inline(always)]
2180+
#[target_feature = "+avx2"]
2181+
#[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
2182+
pub unsafe fn _mm256_bsrli_epi128(a: __m256i, imm8: i32) -> __m256i {
2183+
_mm256_srli_si256(a, imm8)
2184+
}
2185+
21492186
/// Shift packed 16-bit integers in `a` right by `count` while shifting in
21502187
/// zeros.
21512188
#[inline(always)]
@@ -2698,6 +2735,22 @@ pub unsafe fn _mm256_extract_epi64(a: i64x4, imm8: i32) -> i64 {
26982735
a.extract_unchecked(imm8)
26992736
}
27002737

2738+
/// Returns the first element of the input vector of [4 x double].
2739+
#[inline(always)]
2740+
#[target_feature = "+avx2"]
2741+
//#[cfg_attr(test, assert_instr(movsd))] FIXME
2742+
pub unsafe fn _mm256_cvtsd_f64(a: f64x4) -> f64 {
2743+
a.extract(0)
2744+
}
2745+
2746+
/// Returns the first element of the input vector of [8 x i32].
2747+
#[inline(always)]
2748+
#[target_feature = "+avx2"]
2749+
//#[cfg_attr(test, assert_instr(movd))] FIXME
2750+
pub unsafe fn _mm256_cvtsi256_si32(a: i32x8) -> i32 {
2751+
a.extract(0)
2752+
}
2753+
27012754
#[allow(improper_ctypes)]
27022755
extern "C" {
27032756
#[link_name = "llvm.x86.avx2.pabs.b"]
@@ -2938,7 +2991,10 @@ extern "C" {
29382991
fn vpgatherqps(
29392992
src: f32x4, slice: *const i8, offsets: i64x4, mask: f32x4, scale: i8
29402993
) -> f32x4;
2941-
2994+
#[link_name = "llvm.x86.avx2.psll.dq"]
2995+
fn vpslldq(a: __m256i, b: i32) -> __m256i;
2996+
#[link_name = "llvm.x86.avx2.psrl.dq"]
2997+
fn vpsrldq(a: __m256i, b: i32) -> __m256i;
29422998
}
29432999

29443000
#[cfg(test)]
@@ -4075,6 +4131,13 @@ mod tests {
40754131
);
40764132
}
40774133

4134+
#[simd_test = "avx2"]
4135+
unsafe fn _mm256_slli_si256() {
4136+
let a = i64x4::splat(0xFFFFFFFF);
4137+
let r = avx2::_mm256_slli_si256(__m256i::from(a), 3);
4138+
assert_eq!(r, __m256i::from(i64x4::splat(0xFFFFFFFF000000)));
4139+
}
4140+
40784141
#[simd_test = "avx2"]
40794142
unsafe fn _mm_sllv_epi32() {
40804143
let a = i32x4::splat(2);
@@ -4161,6 +4224,26 @@ mod tests {
41614224
assert_eq!(r, e);
41624225
}
41634226

4227+
#[simd_test = "avx2"]
4228+
unsafe fn _mm256_srli_si256() {
4229+
#[cfg_attr(rustfmt, rustfmt_skip)]
4230+
let a = i8x32::new(
4231+
1, 2, 3, 4, 5, 6, 7, 8,
4232+
9, 10, 11, 12, 13, 14, 15, 16,
4233+
17, 18, 19, 20, 21, 22, 23, 24,
4234+
25, 26, 27, 28, 29, 30, 31, 32,
4235+
);
4236+
let r = avx2::_mm256_srli_si256(__m256i::from(a), 3);
4237+
#[cfg_attr(rustfmt, rustfmt_skip)]
4238+
let e = i8x32::new(
4239+
4, 5, 6, 7, 8, 9, 10, 11,
4240+
12, 13, 14, 15, 16, 0, 0, 0,
4241+
20, 21, 22, 23, 24, 25, 26, 27,
4242+
28, 29, 30, 31, 32, 0, 0, 0,
4243+
);
4244+
assert_eq!(r, __m256i::from(e));
4245+
}
4246+
41644247
#[simd_test = "avx2"]
41654248
unsafe fn _mm256_srl_epi16() {
41664249
let a = i16x16::splat(0xFF);
@@ -5005,4 +5088,18 @@ mod tests {
50055088
let r = avx2::_mm256_extract_epi64(a, 3);
50065089
assert_eq!(r, 3);
50075090
}
5091+
5092+
#[simd_test = "avx2"]
5093+
unsafe fn _mm256_cvtsd_f64() {
5094+
let a = f64x4::new(1., 2., 3., 4.);
5095+
let r = avx2::_mm256_cvtsd_f64(a);
5096+
assert_eq!(r, 1.);
5097+
}
5098+
5099+
#[simd_test = "avx2"]
5100+
unsafe fn _mm256_cvtsi256_si32() {
5101+
let a = i32x8::new(1, 2, 3, 4, 5, 6, 7, 8);
5102+
let r = avx2::_mm256_cvtsi256_si32(a);
5103+
assert_eq!(r, 1);
5104+
}
50085105
}

0 commit comments

Comments
 (0)