Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.

Commit 17c1c32

Browse files
committedFeb 19, 2025·
Auto merge of rust-lang#135408 - RalfJung:x86-sse2, r=workingjubilee
x86: use SSE2 to pass float and SIMD types This builds on the new X86Sse2 ABI landed in rust-lang#137037 to actually make it a separate ABI from the default x86 ABI, and use SSE2 registers. Specifically, we use it in two ways: to return `f64` values in a register rather than by-ptr, and to pass vectors of size up to 128bit in a register (or, well, whatever LLVM does when passing `<4 x float>` by-val, I don't actually know if this ends up in a register). Cc `@workingjubilee` Fixes rust-lang#133611 try-job: aarch64-apple try-job: aarch64-gnu try-job: aarch64-gnu-debug try-job: test-various try-job: x86_64-gnu-nopt try-job: dist-i586-gnu-i586-i686-musl try-job: x86_64-msvc-1
2 parents f44efbf + 803feb5 commit 17c1c32

File tree

14 files changed

+273
-151
lines changed

14 files changed

+273
-151
lines changed
 

‎compiler/rustc_target/src/callconv/mod.rs

Lines changed: 74 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use rustc_abi::{
77
};
88
use rustc_macros::HashStable_Generic;
99

10-
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi};
10+
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustcAbi, WasmCAbi};
1111

1212
mod aarch64;
1313
mod amdgpu;
@@ -386,6 +386,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
386386
/// Pass this argument directly instead. Should NOT be used!
387387
/// Only exists because of past ABI mistakes that will take time to fix
388388
/// (see <https://github.com/rust-lang/rust/issues/115666>).
389+
#[track_caller]
389390
pub fn make_direct_deprecated(&mut self) {
390391
match self.mode {
391392
PassMode::Indirect { .. } => {
@@ -398,6 +399,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
398399

399400
/// Pass this argument indirectly, by passing a (thin or wide) pointer to the argument instead.
400401
/// This is valid for both sized and unsized arguments.
402+
#[track_caller]
401403
pub fn make_indirect(&mut self) {
402404
match self.mode {
403405
PassMode::Direct(_) | PassMode::Pair(_, _) => {
@@ -412,6 +414,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
412414

413415
/// Same as `make_indirect`, but for arguments that are ignored. Only needed for ABIs that pass
414416
/// ZSTs indirectly.
417+
#[track_caller]
415418
pub fn make_indirect_from_ignore(&mut self) {
416419
match self.mode {
417420
PassMode::Ignore => {
@@ -716,27 +719,46 @@ impl<'a, Ty> FnAbi<'a, Ty> {
716719
C: HasDataLayout + HasTargetSpec,
717720
{
718721
let spec = cx.target_spec();
719-
match &spec.arch[..] {
722+
match &*spec.arch {
720723
"x86" => x86::compute_rust_abi_info(cx, self, abi),
721724
"riscv32" | "riscv64" => riscv::compute_rust_abi_info(cx, self, abi),
722725
"loongarch64" => loongarch::compute_rust_abi_info(cx, self, abi),
723726
"aarch64" => aarch64::compute_rust_abi_info(cx, self),
724727
_ => {}
725728
};
726729

730+
// Decides whether we can pass the given SIMD argument via `PassMode::Direct`.
731+
// May only return `true` if the target will always pass those arguments the same way,
732+
// no matter what the user does with `-Ctarget-feature`! In other words, whatever
733+
// target features are required to pass a SIMD value in registers must be listed in
734+
// the `abi_required_features` for the current target and ABI.
735+
let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch {
736+
// On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up
737+
// to 128-bit-sized vectors.
738+
"x86" if spec.rustc_abi == Some(RustcAbi::X86Sse2) => arg.layout.size.bits() <= 128,
739+
"x86_64" if spec.rustc_abi != Some(RustcAbi::X86Softfloat) => {
740+
arg.layout.size.bits() <= 128
741+
}
742+
// So far, we haven't implemented this logic for any other target.
743+
_ => false,
744+
};
745+
727746
for (arg_idx, arg) in self
728747
.args
729748
.iter_mut()
730749
.enumerate()
731750
.map(|(idx, arg)| (Some(idx), arg))
732751
.chain(iter::once((None, &mut self.ret)))
733752
{
734-
if arg.is_ignore() {
753+
// If the logic above already picked a specific type to cast the argument to, leave that
754+
// in place.
755+
if matches!(arg.mode, PassMode::Ignore | PassMode::Cast { .. }) {
735756
continue;
736757
}
737758

738759
if arg_idx.is_none()
739760
&& arg.layout.size > Primitive::Pointer(AddressSpace::DATA).size(cx) * 2
761+
&& !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. })
740762
{
741763
// Return values larger than 2 registers using a return area
742764
// pointer. LLVM and Cranelift disagree about how to return
@@ -746,7 +768,8 @@ impl<'a, Ty> FnAbi<'a, Ty> {
746768
// return value independently and decide to pass it in a
747769
// register or not, which would result in the return value
748770
// being passed partially in registers and partially through a
749-
// return area pointer.
771+
// return area pointer. For large IR-level values such as `i128`,
772+
// cranelift will even split up the value into smaller chunks.
750773
//
751774
// While Cranelift may need to be fixed as the LLVM behavior is
752775
// generally more correct with respect to the surface language,
@@ -776,53 +799,60 @@ impl<'a, Ty> FnAbi<'a, Ty> {
776799
// rustc_target already ensure any return value which doesn't
777800
// fit in the available amount of return registers is passed in
778801
// the right way for the current target.
802+
//
803+
// The adjustment is not necessary nor desired for types with a vector
804+
// representation; those are handled below.
779805
arg.make_indirect();
780806
continue;
781807
}
782808

783809
match arg.layout.backend_repr {
784-
BackendRepr::Memory { .. } => {}
785-
786-
// This is a fun case! The gist of what this is doing is
787-
// that we want callers and callees to always agree on the
788-
// ABI of how they pass SIMD arguments. If we were to *not*
789-
// make these arguments indirect then they'd be immediates
790-
// in LLVM, which means that they'd used whatever the
791-
// appropriate ABI is for the callee and the caller. That
792-
// means, for example, if the caller doesn't have AVX
793-
// enabled but the callee does, then passing an AVX argument
794-
// across this boundary would cause corrupt data to show up.
795-
//
796-
// This problem is fixed by unconditionally passing SIMD
797-
// arguments through memory between callers and callees
798-
// which should get them all to agree on ABI regardless of
799-
// target feature sets. Some more information about this
800-
// issue can be found in #44367.
801-
//
802-
// Note that the intrinsic ABI is exempt here as
803-
// that's how we connect up to LLVM and it's unstable
804-
// anyway, we control all calls to it in libstd.
805-
BackendRepr::Vector { .. }
806-
if abi != ExternAbi::RustIntrinsic && spec.simd_types_indirect =>
807-
{
808-
arg.make_indirect();
809-
continue;
810+
BackendRepr::Memory { .. } => {
811+
// Compute `Aggregate` ABI.
812+
813+
let is_indirect_not_on_stack =
814+
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
815+
assert!(is_indirect_not_on_stack);
816+
817+
let size = arg.layout.size;
818+
if arg.layout.is_sized()
819+
&& size <= Primitive::Pointer(AddressSpace::DATA).size(cx)
820+
{
821+
// We want to pass small aggregates as immediates, but using
822+
// an LLVM aggregate type for this leads to bad optimizations,
823+
// so we pick an appropriately sized integer type instead.
824+
arg.cast_to(Reg { kind: RegKind::Integer, size });
825+
}
810826
}
811827

812-
_ => continue,
813-
}
814-
// Compute `Aggregate` ABI.
815-
816-
let is_indirect_not_on_stack =
817-
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
818-
assert!(is_indirect_not_on_stack);
819-
820-
let size = arg.layout.size;
821-
if !arg.layout.is_unsized() && size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {
822-
// We want to pass small aggregates as immediates, but using
823-
// an LLVM aggregate type for this leads to bad optimizations,
824-
// so we pick an appropriately sized integer type instead.
825-
arg.cast_to(Reg { kind: RegKind::Integer, size });
828+
BackendRepr::Vector { .. } => {
829+
// This is a fun case! The gist of what this is doing is
830+
// that we want callers and callees to always agree on the
831+
// ABI of how they pass SIMD arguments. If we were to *not*
832+
// make these arguments indirect then they'd be immediates
833+
// in LLVM, which means that they'd used whatever the
834+
// appropriate ABI is for the callee and the caller. That
835+
// means, for example, if the caller doesn't have AVX
836+
// enabled but the callee does, then passing an AVX argument
837+
// across this boundary would cause corrupt data to show up.
838+
//
839+
// This problem is fixed by unconditionally passing SIMD
840+
// arguments through memory between callers and callees
841+
// which should get them all to agree on ABI regardless of
842+
// target feature sets. Some more information about this
843+
// issue can be found in #44367.
844+
//
845+
// Note that the intrinsic ABI is exempt here as those are not
846+
// real functions anyway, and the backend expects very specific types.
847+
if abi != ExternAbi::RustIntrinsic
848+
&& spec.simd_types_indirect
849+
&& !can_pass_simd_directly(arg)
850+
{
851+
arg.make_indirect();
852+
}
853+
}
854+
855+
_ => {}
826856
}
827857
}
828858
}

‎compiler/rustc_target/src/callconv/x86.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use rustc_abi::{
44
};
55

66
use crate::callconv::{ArgAttribute, FnAbi, PassMode};
7-
use crate::spec::HasTargetSpec;
7+
use crate::spec::{HasTargetSpec, RustcAbi};
88

99
#[derive(PartialEq)]
1010
pub(crate) enum Flavor {
@@ -236,8 +236,16 @@ where
236236
_ => false, // anyway not passed via registers on x86
237237
};
238238
if has_float {
239-
if fn_abi.ret.layout.size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {
240-
// Same size or smaller than pointer, return in a register.
239+
if cx.target_spec().rustc_abi == Some(RustcAbi::X86Sse2)
240+
&& fn_abi.ret.layout.backend_repr.is_scalar()
241+
&& fn_abi.ret.layout.size.bits() <= 128
242+
{
243+
// This is a single scalar that fits into an SSE register, and the target uses the
244+
// SSE ABI. We prefer this over integer registers as float scalars need to be in SSE
245+
// registers for float operations, so that's the best place to pass them around.
246+
fn_abi.ret.cast_to(Reg { kind: RegKind::Vector, size: fn_abi.ret.layout.size });
247+
} else if fn_abi.ret.layout.size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {
248+
// Same size or smaller than pointer, return in an integer register.
241249
fn_abi.ret.cast_to(Reg { kind: RegKind::Integer, size: fn_abi.ret.layout.size });
242250
} else {
243251
// Larger than a pointer, return indirectly.

‎tests/assembly/closure-inherit-target-feature.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88

99
use std::arch::x86_64::{__m128, _mm_blend_ps};
1010

11+
// Use an explicit return pointer to prevent tail call optimization.
1112
#[no_mangle]
12-
pub unsafe fn sse41_blend_nofeature(x: __m128, y: __m128) -> __m128 {
13+
pub unsafe fn sse41_blend_nofeature(x: __m128, y: __m128, ret: *mut __m128) {
1314
let f = {
1415
// check that _mm_blend_ps is not being inlined into the closure
1516
// CHECK-LABEL: {{sse41_blend_nofeature.*closure.*:}}
@@ -18,9 +19,9 @@ pub unsafe fn sse41_blend_nofeature(x: __m128, y: __m128) -> __m128 {
1819
// CHECK-NOT: blendps
1920
// CHECK: ret
2021
#[inline(never)]
21-
|x, y| _mm_blend_ps(x, y, 0b0101)
22+
|x, y, ret: *mut __m128| unsafe { *ret = _mm_blend_ps(x, y, 0b0101) }
2223
};
23-
f(x, y)
24+
f(x, y, ret);
2425
}
2526

2627
#[no_mangle]

‎tests/assembly/x86-return-float.rs

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,18 @@ use minicore::*;
3333
// CHECK-LABEL: return_f32:
3434
#[no_mangle]
3535
pub fn return_f32(x: f32) -> f32 {
36-
// CHECK: movl {{.*}}(%ebp), %eax
37-
// CHECK-NOT: ax
38-
// CHECK: retl
36+
// CHECK: movss {{.*}}(%ebp), %xmm0
37+
// CHECK-NEXT: popl %ebp
38+
// CHECK-NEXT: retl
3939
x
4040
}
4141

4242
// CHECK-LABEL: return_f64:
4343
#[no_mangle]
4444
pub fn return_f64(x: f64) -> f64 {
45-
// CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
46-
// CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL:.*]]
47-
// CHECK-NEXT: movsd %[[VAL]], (%[[PTR]])
48-
// CHECK: retl
45+
// CHECK: movsd {{.*}}(%ebp), %xmm0
46+
// CHECK-NEXT: popl %ebp
47+
// CHECK-NEXT: retl
4948
x
5049
}
5150

@@ -157,7 +156,7 @@ pub unsafe fn call_f32(x: &mut f32) {
157156
}
158157
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
159158
// CHECK: calll {{()|_}}get_f32
160-
// CHECK-NEXT: movl %eax, (%[[PTR]])
159+
// CHECK-NEXT: movss %xmm0, (%[[PTR]])
161160
*x = get_f32();
162161
}
163162

@@ -169,8 +168,7 @@ pub unsafe fn call_f64(x: &mut f64) {
169168
}
170169
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
171170
// CHECK: calll {{()|_}}get_f64
172-
// CHECK: movsd {{.*}}(%{{ebp|esp}}), %[[VAL:.*]]
173-
// CHECK-NEXT: movsd %[[VAL:.*]], (%[[PTR]])
171+
// CHECK-NEXT: movlps %xmm0, (%[[PTR]])
174172
*x = get_f64();
175173
}
176174

@@ -315,25 +313,21 @@ pub unsafe fn call_other_f64(x: &mut (usize, f64)) {
315313
#[no_mangle]
316314
pub fn return_f16(x: f16) -> f16 {
317315
// CHECK: pushl %ebp
318-
// CHECK: movl %esp, %ebp
319-
// CHECK: movzwl 8(%ebp), %eax
320-
// CHECK: popl %ebp
321-
// CHECK: retl
316+
// CHECK-NEXT: movl %esp, %ebp
317+
// CHECK-NEXT: pinsrw $0, 8(%ebp), %xmm0
318+
// CHECK-NEXT: popl %ebp
319+
// CHECK-NEXT: retl
322320
x
323321
}
324322

325323
// CHECK-LABEL: return_f128:
326324
#[no_mangle]
327325
pub fn return_f128(x: f128) -> f128 {
328-
// CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
329-
// CHECK-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
330-
// CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
331-
// CHECK-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]]
332-
// CHECK-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]]
333-
// CHECK-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]])
334-
// CHECK-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]])
335-
// CHECK-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]])
336-
// CHECK-NEXT: movl %[[VAL1:.*]] (%[[PTR]])
337-
// CHECK: retl
326+
// CHECK: pushl %ebp
327+
// CHECK-NEXT: movl %esp, %ebp
328+
// linux-NEXT: movaps 8(%ebp), %xmm0
329+
// win-NEXT: movups 8(%ebp), %xmm0
330+
// CHECK-NEXT: popl %ebp
331+
// CHECK-NEXT: retl
338332
x
339333
}

‎tests/codegen/abi-x86-sse.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//@ compile-flags: -Z merge-functions=disabled
2+
3+
//@ revisions: x86-64
4+
//@[x86-64] compile-flags: --target x86_64-unknown-linux-gnu
5+
//@[x86-64] needs-llvm-components: x86
6+
7+
//@ revisions: x86-32
8+
//@[x86-32] compile-flags: --target i686-unknown-linux-gnu
9+
//@[x86-32] needs-llvm-components: x86
10+
11+
//@ revisions: x86-32-nosse
12+
//@[x86-32-nosse] compile-flags: --target i586-unknown-linux-gnu
13+
//@[x86-32-nosse] needs-llvm-components: x86
14+
15+
#![feature(no_core, lang_items, rustc_attrs, repr_simd)]
16+
#![no_core]
17+
#![crate_type = "lib"]
18+
19+
#[lang = "sized"]
20+
trait Sized {}
21+
22+
#[lang = "copy"]
23+
trait Copy {}
24+
25+
// Ensure this type is passed without ptr indirection on targets that
26+
// require SSE2.
27+
#[repr(simd)]
28+
pub struct Sse([f32; 4]);
29+
30+
// x86-64: <4 x float> @sse_id(<4 x float> {{[^,]*}})
31+
// x86-32: <4 x float> @sse_id(<4 x float> {{[^,]*}})
32+
// x86-32-nosse: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}})
33+
#[no_mangle]
34+
pub fn sse_id(x: Sse) -> Sse {
35+
x
36+
}

‎tests/codegen/float/f128.rs

Lines changed: 55 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
// 32-bit x86 returns float types differently to avoid the x87 stack.
22
// 32-bit systems will return 128bit values using a return area pointer.
33
// Emscripten aligns f128 to 8 bytes, not 16.
4-
//@ revisions: x86 bit32 bit64 emscripten
5-
//@[x86] only-x86
4+
//@ revisions: x86-sse x86-nosse bit32 bit64 emscripten
5+
//@[x86-sse] only-x86
6+
//@[x86-sse] only-rustc_abi-x86-sse2
7+
//@[x86-nosse] only-x86
8+
//@[x86-nosse] ignore-rustc_abi-x86-sse2
69
//@[bit32] ignore-x86
710
//@[bit32] ignore-emscripten
811
//@[bit32] only-32bit
@@ -60,7 +63,8 @@ pub fn f128_le(a: f128, b: f128) -> bool {
6063
a <= b
6164
}
6265

63-
// x86-LABEL: void @f128_neg({{.*}}sret([16 x i8])
66+
// x86-nosse-LABEL: void @f128_neg({{.*}}sret([16 x i8])
67+
// x86-sse-LABEL: <16 x i8> @f128_neg(fp128
6468
// bit32-LABEL: void @f128_neg({{.*}}sret([16 x i8])
6569
// bit64-LABEL: fp128 @f128_neg(
6670
// emscripten-LABEL: void @f128_neg({{.*}}sret([16 x i8])
@@ -70,7 +74,8 @@ pub fn f128_neg(a: f128) -> f128 {
7074
-a
7175
}
7276

73-
// x86-LABEL: void @f128_add({{.*}}sret([16 x i8])
77+
// x86-nosse-LABEL: void @f128_add({{.*}}sret([16 x i8])
78+
// x86-sse-LABEL: <16 x i8> @f128_add(fp128
7479
// bit32-LABEL: void @f128_add({{.*}}sret([16 x i8])
7580
// bit64-LABEL: fp128 @f128_add(
7681
// emscripten-LABEL: void @f128_add({{.*}}sret([16 x i8])
@@ -80,7 +85,8 @@ pub fn f128_add(a: f128, b: f128) -> f128 {
8085
a + b
8186
}
8287

83-
// x86-LABEL: void @f128_sub({{.*}}sret([16 x i8])
88+
// x86-nosse-LABEL: void @f128_sub({{.*}}sret([16 x i8])
89+
// x86-sse-LABEL: <16 x i8> @f128_sub(fp128
8490
// bit32-LABEL: void @f128_sub({{.*}}sret([16 x i8])
8591
// bit64-LABEL: fp128 @f128_sub(
8692
// emscripten-LABEL: void @f128_sub({{.*}}sret([16 x i8])
@@ -90,7 +96,8 @@ pub fn f128_sub(a: f128, b: f128) -> f128 {
9096
a - b
9197
}
9298

93-
// x86-LABEL: void @f128_mul({{.*}}sret([16 x i8])
99+
// x86-nosse-LABEL: void @f128_mul({{.*}}sret([16 x i8])
100+
// x86-sse-LABEL: <16 x i8> @f128_mul(fp128
94101
// bit32-LABEL: void @f128_mul({{.*}}sret([16 x i8])
95102
// bit64-LABEL: fp128 @f128_mul(
96103
// emscripten-LABEL: void @f128_mul({{.*}}sret([16 x i8])
@@ -100,7 +107,8 @@ pub fn f128_mul(a: f128, b: f128) -> f128 {
100107
a * b
101108
}
102109

103-
// x86-LABEL: void @f128_div({{.*}}sret([16 x i8])
110+
// x86-nosse-LABEL: void @f128_div({{.*}}sret([16 x i8])
111+
// x86-sse-LABEL: <16 x i8> @f128_div(fp128
104112
// bit32-LABEL: void @f128_div({{.*}}sret([16 x i8])
105113
// bit64-LABEL: fp128 @f128_div(
106114
// emscripten-LABEL: void @f128_div({{.*}}sret([16 x i8])
@@ -110,7 +118,8 @@ pub fn f128_div(a: f128, b: f128) -> f128 {
110118
a / b
111119
}
112120

113-
// x86-LABEL: void @f128_rem({{.*}}sret([16 x i8])
121+
// x86-nosse-LABEL: void @f128_rem({{.*}}sret([16 x i8])
122+
// x86-sse-LABEL: <16 x i8> @f128_rem(fp128
114123
// bit32-LABEL: void @f128_rem({{.*}}sret([16 x i8])
115124
// bit64-LABEL: fp128 @f128_rem(
116125
// emscripten-LABEL: void @f128_rem({{.*}}sret([16 x i8])
@@ -162,7 +171,8 @@ pub fn f128_rem_assign(a: &mut f128, b: f128) {
162171

163172
/* float to float conversions */
164173

165-
// x86-LABEL: i16 @f128_as_f16(
174+
// x86-sse-LABEL: <2 x i8> @f128_as_f16(
175+
// x86-nosse-LABEL: i16 @f128_as_f16(
166176
// bits32-LABEL: half @f128_as_f16(
167177
// bits64-LABEL: half @f128_as_f16(
168178
#[no_mangle]
@@ -171,7 +181,8 @@ pub fn f128_as_f16(a: f128) -> f16 {
171181
a as f16
172182
}
173183

174-
// x86-LABEL: i32 @f128_as_f32(
184+
// x86-sse-LABEL: <4 x i8> @f128_as_f32(
185+
// x86-nosse-LABEL: i32 @f128_as_f32(
175186
// bit32-LABEL: float @f128_as_f32(
176187
// bit64-LABEL: float @f128_as_f32(
177188
// emscripten-LABEL: float @f128_as_f32(
@@ -181,7 +192,8 @@ pub fn f128_as_f32(a: f128) -> f32 {
181192
a as f32
182193
}
183194

184-
// x86-LABEL: void @f128_as_f64(
195+
// x86-sse-LABEL: <8 x i8> @f128_as_f64(
196+
// x86-nosse-LABEL: void @f128_as_f64({{.*}}sret([8 x i8])
185197
// bit32-LABEL: double @f128_as_f64(
186198
// bit64-LABEL: double @f128_as_f64(
187199
// emscripten-LABEL: double @f128_as_f64(
@@ -191,7 +203,8 @@ pub fn f128_as_f64(a: f128) -> f64 {
191203
a as f64
192204
}
193205

194-
// x86-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
206+
// x86-sse-LABEL: <16 x i8> @f128_as_self(
207+
// x86-nosse-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
195208
// bit32-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
196209
// bit64-LABEL: fp128 @f128_as_self(
197210
// emscripten-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
@@ -204,7 +217,8 @@ pub fn f128_as_self(a: f128) -> f128 {
204217
a as f128
205218
}
206219

207-
// x86-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
220+
// x86-sse-LABEL: <16 x i8> @f16_as_f128(
221+
// x86-nosse-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
208222
// bit32-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
209223
// bit64-LABEL: fp128 @f16_as_f128(
210224
// emscripten-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
@@ -214,7 +228,8 @@ pub fn f16_as_f128(a: f16) -> f128 {
214228
a as f128
215229
}
216230

217-
// x86-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
231+
// x86-sse-LABEL: <16 x i8> @f32_as_f128(
232+
// x86-nosse-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
218233
// bit32-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
219234
// bit64-LABEL: fp128 @f32_as_f128(
220235
// emscripten-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
@@ -224,7 +239,8 @@ pub fn f32_as_f128(a: f32) -> f128 {
224239
a as f128
225240
}
226241

227-
// x86-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
242+
// x86-sse-LABEL: <16 x i8> @f64_as_f128(
243+
// x86-nosse-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
228244
// bit32-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
229245
// bit64-LABEL: fp128 @f64_as_f128(
230246
// emscripten-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
@@ -263,7 +279,8 @@ pub fn f128_as_u64(a: f128) -> u64 {
263279
a as u64
264280
}
265281

266-
// x86-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
282+
// x86-sse-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
283+
// x86-nosse-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
267284
// bit32-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
268285
// bit64-LABEL: i128 @f128_as_u128(
269286
// emscripten-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
@@ -300,7 +317,8 @@ pub fn f128_as_i64(a: f128) -> i64 {
300317
a as i64
301318
}
302319

303-
// x86-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
320+
// x86-sse-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
321+
// x86-nosse-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
304322
// bit32-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
305323
// bit64-LABEL: i128 @f128_as_i128(
306324
// emscripten-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
@@ -312,7 +330,8 @@ pub fn f128_as_i128(a: f128) -> i128 {
312330

313331
/* int to float conversions */
314332

315-
// x86-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
333+
// x86-sse-LABEL: <16 x i8> @u8_as_f128(
334+
// x86-nosse-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
316335
// bit32-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
317336
// bit64-LABEL: fp128 @u8_as_f128(
318337
// emscripten-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
@@ -322,7 +341,8 @@ pub fn u8_as_f128(a: u8) -> f128 {
322341
a as f128
323342
}
324343

325-
// x86-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
344+
// x86-sse-LABEL: <16 x i8> @u16_as_f128(
345+
// x86-nosse-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
326346
// bit32-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
327347
// bit64-LABEL: fp128 @u16_as_f128(
328348
// emscripten-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
@@ -332,7 +352,8 @@ pub fn u16_as_f128(a: u16) -> f128 {
332352
a as f128
333353
}
334354

335-
// x86-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
355+
// x86-sse-LABEL: <16 x i8> @u32_as_f128(
356+
// x86-nosse-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
336357
// bit32-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
337358
// bit64-LABEL: fp128 @u32_as_f128(
338359
// emscripten-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
@@ -342,7 +363,8 @@ pub fn u32_as_f128(a: u32) -> f128 {
342363
a as f128
343364
}
344365

345-
// x86-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
366+
// x86-sse-LABEL: <16 x i8> @u64_as_f128(
367+
// x86-nosse-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
346368
// bit32-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
347369
// bit64-LABEL: fp128 @u64_as_f128(
348370
// emscripten-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
@@ -352,7 +374,8 @@ pub fn u64_as_f128(a: u64) -> f128 {
352374
a as f128
353375
}
354376

355-
// x86-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
377+
// x86-sse-LABEL: <16 x i8> @u128_as_f128(
378+
// x86-nosse-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
356379
// bit32-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
357380
// bit64-LABEL: fp128 @u128_as_f128(
358381
// emscripten-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
@@ -362,7 +385,8 @@ pub fn u128_as_f128(a: u128) -> f128 {
362385
a as f128
363386
}
364387

365-
// x86-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
388+
// x86-sse-LABEL: <16 x i8> @i8_as_f128(
389+
// x86-nosse-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
366390
// bit32-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
367391
// bit64-LABEL: fp128 @i8_as_f128(
368392
// emscripten-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
@@ -372,7 +396,8 @@ pub fn i8_as_f128(a: i8) -> f128 {
372396
a as f128
373397
}
374398

375-
// x86-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
399+
// x86-sse-LABEL: <16 x i8> @i16_as_f128(
400+
// x86-nosse-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
376401
// bit32-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
377402
// bit64-LABEL: fp128 @i16_as_f128(
378403
// emscripten-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
@@ -382,7 +407,8 @@ pub fn i16_as_f128(a: i16) -> f128 {
382407
a as f128
383408
}
384409

385-
// x86-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
410+
// x86-sse-LABEL: <16 x i8> @i32_as_f128(
411+
// x86-nosse-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
386412
// bit32-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
387413
// bit64-LABEL: fp128 @i32_as_f128(
388414
// emscripten-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
@@ -392,7 +418,8 @@ pub fn i32_as_f128(a: i32) -> f128 {
392418
a as f128
393419
}
394420

395-
// x86-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
421+
// x86-sse-LABEL: <16 x i8> @i64_as_f128(
422+
// x86-nosse-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
396423
// bit32-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
397424
// bit64-LABEL: fp128 @i64_as_f128(
398425
// emscripten-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
@@ -402,7 +429,8 @@ pub fn i64_as_f128(a: i64) -> f128 {
402429
a as f128
403430
}
404431

405-
// x86-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])
432+
// x86-sse-LABEL: <16 x i8> @i128_as_f128(
433+
// x86-nosse-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])
406434
// bit32-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])
407435
// bit64-LABEL: fp128 @i128_as_f128(
408436
// emscripten-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])

‎tests/codegen/float/f16.rs

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
// 32-bit x86 returns float types differently to avoid the x87 stack.
22
// 32-bit systems will return 128bit values using a return area pointer.
3-
//@ revisions: x86 bit32 bit64
4-
//@[x86] only-x86
3+
//@ revisions: x86-sse x86-nosse bit32 bit64
4+
//@[x86-sse] only-x86
5+
//@[x86-sse] only-rustc_abi-x86-sse2
6+
//@[x86-nosse] only-x86
7+
//@[x86-nosse] ignore-rustc_abi-x86-sse2
58
//@[bit32] ignore-x86
69
//@[bit32] only-32bit
710
//@[bit64] ignore-x86
@@ -59,8 +62,10 @@ pub fn f16_le(a: f16, b: f16) -> bool {
5962
}
6063

6164
// This is where we check the argument and return ABI for f16.
62-
// other-LABEL: half @f16_neg(half
63-
// x86-LABEL: i16 @f16_neg(half
65+
// bit32-LABEL: half @f16_neg(half
66+
// bit64-LABEL: half @f16_neg(half
67+
// x86-sse-LABEL: <2 x i8> @f16_neg(half
68+
// x86-nosse-LABEL: i16 @f16_neg(half
6469
#[no_mangle]
6570
pub fn f16_neg(a: f16) -> f16 {
6671
// CHECK: fneg half %{{.+}}
@@ -144,17 +149,23 @@ pub fn f16_rem_assign(a: &mut f16, b: f16) {
144149

145150
/* float to float conversions */
146151

147-
// other-LABEL: half @f16_as_self(
148-
// x86-LABEL: i16 @f16_as_self(
152+
// bit32-LABEL: half @f16_as_self(
153+
// bit64-LABEL: half @f16_as_self(
154+
// x86-sse-LABEL: <2 x i8> @f16_as_self(
155+
// x86-nosse-LABEL: i16 @f16_as_self(
149156
#[no_mangle]
150157
pub fn f16_as_self(a: f16) -> f16 {
151-
// other-CHECK: ret half %{{.+}}
152-
// x86-CHECK: bitcast half
153-
// x86-CHECK: ret i16
158+
// bit32-CHECK: ret half %{{.+}}
159+
// bit64-CHECK: ret half %{{.+}}
160+
// x86-sse-CHECK: bitcast half
161+
// x86-nosse-CHECK: bitcast half
162+
// x86-sse-CHECK: ret i16
163+
// x86-nosse-CHECK: ret i16
154164
a as f16
155165
}
156166

157-
// x86-LABEL: i32 @f16_as_f32(
167+
// x86-sse-LABEL: <4 x i8> @f16_as_f32(
168+
// x86-nosse-LABEL: i32 @f16_as_f32(
158169
// bit32-LABEL: float @f16_as_f32(
159170
// bit64-LABEL: float @f16_as_f32(
160171
#[no_mangle]
@@ -163,7 +174,8 @@ pub fn f16_as_f32(a: f16) -> f32 {
163174
a as f32
164175
}
165176

166-
// x86-LABEL: void @f16_as_f64(
177+
// x86-sse-LABEL: <8 x i8> @f16_as_f64(
178+
// x86-nosse-LABEL: void @f16_as_f64({{.*}}sret([8 x i8])
167179
// bit32-LABEL: double @f16_as_f64(
168180
// bit64-LABEL: double @f16_as_f64(
169181
#[no_mangle]
@@ -172,7 +184,8 @@ pub fn f16_as_f64(a: f16) -> f64 {
172184
a as f64
173185
}
174186

175-
// x86-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
187+
// x86-sse-LABEL: <16 x i8> @f16_as_f128(
188+
// x86-nosse-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
176189
// bit32-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
177190
// bit64-LABEL: fp128 @f16_as_f128(
178191
#[no_mangle]
@@ -231,7 +244,8 @@ pub fn f16_as_u64(a: f16) -> u64 {
231244
a as u64
232245
}
233246

234-
// x86-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
247+
// x86-sse-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
248+
// x86-nosse-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
235249
// bit32-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
236250
// bit64-LABEL: i128 @f16_as_u128(
237251
#[no_mangle]
@@ -267,7 +281,8 @@ pub fn f16_as_i64(a: f16) -> i64 {
267281
a as i64
268282
}
269283

270-
// x86-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
284+
// x86-sse-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
285+
// x86-nosse-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
271286
// bit32-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
272287
// bit64-LABEL: i128 @f16_as_i128(
273288
#[no_mangle]

‎tests/codegen/intrinsics/transmute-x64.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,6 @@
66
use std::arch::x86_64::{__m128, __m128i, __m256i};
77
use std::mem::transmute;
88

9-
// CHECK-LABEL: @check_sse_float_to_int(
10-
#[no_mangle]
11-
pub unsafe fn check_sse_float_to_int(x: __m128) -> __m128i {
12-
// CHECK-NOT: alloca
13-
// CHECK: %0 = load <4 x float>, ptr %x, align 16
14-
// CHECK: store <4 x float> %0, ptr %_0, align 16
15-
transmute(x)
16-
}
17-
189
// CHECK-LABEL: @check_sse_pair_to_avx(
1910
#[no_mangle]
2011
pub unsafe fn check_sse_pair_to_avx(x: (__m128i, __m128i)) -> __m256i {

‎tests/codegen/issues/issue-32031.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//@ compile-flags: -C no-prepopulate-passes -Copt-level=0
22
// 32-bit x86 returns `f32` and `f64` differently to avoid the x87 stack.
33
//@ revisions: x86 other
4-
//@[x86] only-x86
4+
//@[x86] only-rustc_abi-x86-sse2
55
//@[other] ignore-x86
66

77
#![crate_type = "lib"]
@@ -10,7 +10,7 @@
1010
pub struct F32(f32);
1111

1212
// other: define{{.*}}float @add_newtype_f32(float %a, float %b)
13-
// x86: define{{.*}}i32 @add_newtype_f32(float %a, float %b)
13+
// x86: define{{.*}}<4 x i8> @add_newtype_f32(float %a, float %b)
1414
#[inline(never)]
1515
#[no_mangle]
1616
pub fn add_newtype_f32(a: F32, b: F32) -> F32 {
@@ -21,7 +21,7 @@ pub fn add_newtype_f32(a: F32, b: F32) -> F32 {
2121
pub struct F64(f64);
2222

2323
// other: define{{.*}}double @add_newtype_f64(double %a, double %b)
24-
// x86: define{{.*}}void @add_newtype_f64(ptr{{.*}}sret([8 x i8]){{.*}}%_0, double %a, double %b)
24+
// x86: define{{.*}}<8 x i8> @add_newtype_f64(double %a, double %b)
2525
#[inline(never)]
2626
#[no_mangle]
2727
pub fn add_newtype_f64(a: F64, b: F64) -> F64 {

‎tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
//
22
//@ compile-flags: -C no-prepopulate-passes
3+
// LLVM IR isn't very portable and the one tested here depends on the ABI
4+
// which is different between x86 (where we use SSE registers) and others.
5+
// `x86-64` and `x86-32-sse2` are identical, but compiletest does not support
6+
// taking the union of multiple `only` annotations.
7+
//@ revisions: x86-64 x86-32-sse2 other
8+
//@[x86-64] only-x86_64
9+
//@[x86-32-sse2] only-rustc_abi-x86-sse2
10+
//@[other] ignore-rustc_abi-x86-sse2
11+
//@[other] ignore-x86_64
312

413
#![crate_type = "lib"]
514
#![allow(non_camel_case_types)]
@@ -38,7 +47,9 @@ pub fn build_array_s(x: [f32; 4]) -> S<4> {
3847
#[no_mangle]
3948
pub fn build_array_transmute_s(x: [f32; 4]) -> S<4> {
4049
// CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
41-
// CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
50+
// x86-32: ret <4 x float> %[[VAL:.+]]
51+
// x86-64: ret <4 x float> %[[VAL:.+]]
52+
// other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
4253
unsafe { std::mem::transmute(x) }
4354
}
4455

@@ -53,6 +64,8 @@ pub fn build_array_t(x: [f32; 4]) -> T {
5364
#[no_mangle]
5465
pub fn build_array_transmute_t(x: [f32; 4]) -> T {
5566
// CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
56-
// CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
67+
// x86-32: ret <4 x float> %[[VAL:.+]]
68+
// x86-64: ret <4 x float> %[[VAL:.+]]
69+
// other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
5770
unsafe { std::mem::transmute(x) }
5871
}

‎tests/codegen/simd/packed-simd.rs

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
//@ revisions:opt3 noopt
2+
//@ only-x86_64
23
//@[opt3] compile-flags: -Copt-level=3
34
//@[noopt] compile-flags: -Cno-prepopulate-passes
45

@@ -14,41 +15,39 @@ use core::{mem, ptr};
1415

1516
#[repr(simd, packed)]
1617
#[derive(Copy, Clone)]
17-
pub struct Simd<T, const N: usize>([T; N]);
18+
pub struct PackedSimd<T, const N: usize>([T; N]);
1819

1920
#[repr(simd)]
2021
#[derive(Copy, Clone)]
2122
pub struct FullSimd<T, const N: usize>([T; N]);
2223

2324
// non-powers-of-two have padding and need to be expanded to full vectors
24-
fn load<T, const N: usize>(v: Simd<T, N>) -> FullSimd<T, N> {
25+
fn load<T, const N: usize>(v: PackedSimd<T, N>) -> FullSimd<T, N> {
2526
unsafe {
2627
let mut tmp = mem::MaybeUninit::<FullSimd<T, N>>::uninit();
2728
ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1);
2829
tmp.assume_init()
2930
}
3031
}
3132

32-
// CHECK-LABEL: square_packed_full
33-
// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align (8|16)]]{{[^%]*}} [[RET_VREG:%[_0-9]*]]
34-
// CHECK-SAME: ptr{{[a-z_ ]*}} align 4
33+
// CHECK-LABEL: define <3 x float> @square_packed_full(ptr{{[a-z_ ]*}} align 4 {{[^,]*}})
3534
#[no_mangle]
36-
pub fn square_packed_full(x: Simd<f32, 3>) -> FullSimd<f32, 3> {
37-
// CHECK-NEXT: start
38-
// noopt: alloca [[RET_TYPE]], [[RET_ALIGN]]
39-
// CHECK: load <3 x float>
35+
pub fn square_packed_full(x: PackedSimd<f32, 3>) -> FullSimd<f32, 3> {
36+
// The unoptimized version of this is not very interesting to check
37+
// since `load` does not get inlined.
38+
// opt3-NEXT: start:
39+
// opt3-NEXT: load <3 x float>
4040
let x = load(x);
41-
// CHECK: [[VREG:%[a-z0-9_]+]] = fmul <3 x float>
42-
// CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]]
43-
// CHECK-NEXT: ret void
41+
// opt3-NEXT: [[VREG:%[a-z0-9_]+]] = fmul <3 x float>
42+
// opt3-NEXT: ret <3 x float> [[VREG:%[a-z0-9_]+]]
4443
unsafe { intrinsics::simd_mul(x, x) }
4544
}
4645

4746
// CHECK-LABEL: square_packed
4847
// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align 4]]{{[^%]*}} [[RET_VREG:%[_0-9]*]]
4948
// CHECK-SAME: ptr{{[a-z_ ]*}} align 4
5049
#[no_mangle]
51-
pub fn square_packed(x: Simd<f32, 3>) -> Simd<f32, 3> {
50+
pub fn square_packed(x: PackedSimd<f32, 3>) -> PackedSimd<f32, 3> {
5251
// CHECK-NEXT: start
5352
// CHECK-NEXT: load <3 x float>
5453
// noopt-NEXT: load <3 x float>

‎tests/codegen/union-abi.rs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22
//@ compile-flags: -Copt-level=3 -C no-prepopulate-passes
33
// 32-bit x86 returns `f32` differently to avoid the x87 stack.
44
// 32-bit systems will return 128bit values using a return area pointer.
5-
//@ revisions: x86 bit32 bit64
6-
//@[x86] only-x86
5+
//@ revisions: x86-sse x86-nosse bit32 bit64
6+
//@[x86-sse] only-x86
7+
//@[x86-sse] only-rustc_abi-x86-sse2
8+
//@[x86-nosse] only-x86
9+
//@[x86-nosse] ignore-rustc_abi-x86-sse2
710
//@[bit32] ignore-x86
811
//@[bit32] only-32bit
912
//@[bit64] ignore-x86
@@ -75,7 +78,8 @@ pub union UnionF32 {
7578
a: f32,
7679
}
7780

78-
// x86: define {{(dso_local )?}}i32 @test_UnionF32(float %_1)
81+
// x86-sse: define {{(dso_local )?}}<4 x i8> @test_UnionF32(float %_1)
82+
// x86-nosse: define {{(dso_local )?}}i32 @test_UnionF32(float %_1)
7983
// bit32: define {{(dso_local )?}}float @test_UnionF32(float %_1)
8084
// bit64: define {{(dso_local )?}}float @test_UnionF32(float %_1)
8185
#[no_mangle]
@@ -88,7 +92,8 @@ pub union UnionF32F32 {
8892
b: f32,
8993
}
9094

91-
// x86: define {{(dso_local )?}}i32 @test_UnionF32F32(float %_1)
95+
// x86-sse: define {{(dso_local )?}}<4 x i8> @test_UnionF32F32(float %_1)
96+
// x86-nosse: define {{(dso_local )?}}i32 @test_UnionF32F32(float %_1)
9297
// bit32: define {{(dso_local )?}}float @test_UnionF32F32(float %_1)
9398
// bit64: define {{(dso_local )?}}float @test_UnionF32F32(float %_1)
9499
#[no_mangle]
@@ -110,7 +115,8 @@ pub fn test_UnionF32U32(_: UnionF32U32) -> UnionF32U32 {
110115
pub union UnionU128 {
111116
a: u128,
112117
}
113-
// x86: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
118+
// x86-sse: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
119+
// x86-nosse: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
114120
// bit32: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
115121
// bit64: define {{(dso_local )?}}i128 @test_UnionU128(i128 %_1)
116122
#[no_mangle]

‎tests/ui/sse-abi-checks.rs renamed to ‎tests/ui/sse-simd-abi-checks.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
//! Ensure we trigger abi_unsupported_vector_types for target features that are usually enabled
2-
//! on a target, but disabled in this file via a `-C` flag.
2+
//! on a target via the base CPU, but disabled in this file via a `-C` flag.
3+
//@ compile-flags: --crate-type=rlib --target=i586-unknown-linux-gnu
4+
//@ compile-flags: -Ctarget-cpu=pentium4 -C target-feature=-sse,-sse2
35
//@ add-core-stubs
4-
//@ compile-flags: --crate-type=rlib --target=i586-unknown-linux-gnu -C target-feature=-sse,-sse2
56
//@ build-pass
67
//@ ignore-pass (test emits codegen-time warnings)
78
//@ needs-llvm-components: x86

‎tests/ui/sse-abi-checks.stderr renamed to ‎tests/ui/sse-simd-abi-checks.stderr

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
warning: this function definition uses SIMD vector type `SseVector` which (with the chosen ABI) requires the `sse` target feature, which is not enabled
2-
--> $DIR/sse-abi-checks.rs:19:1
2+
--> $DIR/sse-simd-abi-checks.rs:20:1
33
|
44
LL | pub unsafe extern "C" fn f(_: SseVector) {
55
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ function defined here
@@ -13,7 +13,7 @@ warning: 1 warning emitted
1313

1414
Future incompatibility report: Future breakage diagnostic:
1515
warning: this function definition uses SIMD vector type `SseVector` which (with the chosen ABI) requires the `sse` target feature, which is not enabled
16-
--> $DIR/sse-abi-checks.rs:19:1
16+
--> $DIR/sse-simd-abi-checks.rs:20:1
1717
|
1818
LL | pub unsafe extern "C" fn f(_: SseVector) {
1919
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ function defined here

0 commit comments

Comments
 (0)
This repository has been archived.