Skip to content

Commit adc5509

Browse files
committed
[AArch64] Add LRINT/LLRINT/LROUND/LLROUND FP16 lowering without fullfp16 (#66174)
We apparently somehow had lowering for the STRICT nodes without any handling for the normal operations. This makes sure we support the LRINT and LROUND intrinsics for fp16 when +fullfp16 is not present.
1 parent a9d0f5e commit adc5509

File tree

5 files changed

+172
-47
lines changed

5 files changed

+172
-47
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
707707

708708
// Round-to-integer need custom lowering for fp16, as Promote doesn't work
709709
// because the result type is integer.
710-
for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
710+
for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
711+
ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
711712
ISD::STRICT_LLRINT})
712713
setOperationAction(Op, MVT::f16, Custom);
713714

@@ -6185,6 +6186,16 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
61856186
return LowerVECTOR_DEINTERLEAVE(Op, DAG);
61866187
case ISD::VECTOR_INTERLEAVE:
61876188
return LowerVECTOR_INTERLEAVE(Op, DAG);
6189+
case ISD::LROUND:
6190+
case ISD::LLROUND:
6191+
case ISD::LRINT:
6192+
case ISD::LLRINT: {
6193+
assert(Op.getOperand(0).getValueType() == MVT::f16 &&
6194+
"Expected custom lowering of rounding operations only for f16");
6195+
SDLoc DL(Op);
6196+
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6197+
return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
6198+
}
61886199
case ISD::STRICT_LROUND:
61896200
case ISD::STRICT_LLROUND:
61906201
case ISD::STRICT_LRINT:

llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,62 @@
1-
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
3+
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
24

3-
; CHECK-LABEL: testmhhs:
4-
; CHECK: frintx h0, h0
5-
; CHECK-NEXT: fcvtzs x0, h0
6-
; CHECK: ret
75
define i16 @testmhhs(half %x) {
6+
; CHECK-NOFP16-LABEL: testmhhs:
7+
; CHECK-NOFP16: // %bb.0: // %entry
8+
; CHECK-NOFP16-NEXT: fcvt s0, h0
9+
; CHECK-NOFP16-NEXT: frintx s0, s0
10+
; CHECK-NOFP16-NEXT: fcvtzs x0, s0
11+
; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
12+
; CHECK-NOFP16-NEXT: ret
13+
;
14+
; CHECK-FP16-LABEL: testmhhs:
15+
; CHECK-FP16: // %bb.0: // %entry
16+
; CHECK-FP16-NEXT: frintx h0, h0
17+
; CHECK-FP16-NEXT: fcvtzs x0, h0
18+
; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
19+
; CHECK-FP16-NEXT: ret
820
entry:
921
%0 = tail call i64 @llvm.llrint.i64.f16(half %x)
1022
%conv = trunc i64 %0 to i16
1123
ret i16 %conv
1224
}
1325

14-
; CHECK-LABEL: testmhws:
15-
; CHECK: frintx h0, h0
16-
; CHECK-NEXT: fcvtzs x0, h0
17-
; CHECK: ret
1826
define i32 @testmhws(half %x) {
27+
; CHECK-NOFP16-LABEL: testmhws:
28+
; CHECK-NOFP16: // %bb.0: // %entry
29+
; CHECK-NOFP16-NEXT: fcvt s0, h0
30+
; CHECK-NOFP16-NEXT: frintx s0, s0
31+
; CHECK-NOFP16-NEXT: fcvtzs x0, s0
32+
; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
33+
; CHECK-NOFP16-NEXT: ret
34+
;
35+
; CHECK-FP16-LABEL: testmhws:
36+
; CHECK-FP16: // %bb.0: // %entry
37+
; CHECK-FP16-NEXT: frintx h0, h0
38+
; CHECK-FP16-NEXT: fcvtzs x0, h0
39+
; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
40+
; CHECK-FP16-NEXT: ret
1941
entry:
2042
%0 = tail call i64 @llvm.llrint.i64.f16(half %x)
2143
%conv = trunc i64 %0 to i32
2244
ret i32 %conv
2345
}
2446

25-
; CHECK-LABEL: testmhxs:
26-
; CHECK: frintx h0, h0
27-
; CHECK-NEXT: fcvtzs x0, h0
28-
; CHECK: ret
2947
define i64 @testmhxs(half %x) {
48+
; CHECK-NOFP16-LABEL: testmhxs:
49+
; CHECK-NOFP16: // %bb.0: // %entry
50+
; CHECK-NOFP16-NEXT: fcvt s0, h0
51+
; CHECK-NOFP16-NEXT: frintx s0, s0
52+
; CHECK-NOFP16-NEXT: fcvtzs x0, s0
53+
; CHECK-NOFP16-NEXT: ret
54+
;
55+
; CHECK-FP16-LABEL: testmhxs:
56+
; CHECK-FP16: // %bb.0: // %entry
57+
; CHECK-FP16-NEXT: frintx h0, h0
58+
; CHECK-FP16-NEXT: fcvtzs x0, h0
59+
; CHECK-FP16-NEXT: ret
3060
entry:
3161
%0 = tail call i64 @llvm.llrint.i64.f16(half %x)
3262
ret i64 %0

llvm/test/CodeGen/AArch64/llround-conv-fp16.ll

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,56 @@
1-
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
3+
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
24

3-
; CHECK-LABEL: testmhhs:
4-
; CHECK: fcvtas x0, h0
5-
; CHECK: ret
65
define i16 @testmhhs(half %x) {
6+
; CHECK-NOFP16-LABEL: testmhhs:
7+
; CHECK-NOFP16: // %bb.0: // %entry
8+
; CHECK-NOFP16-NEXT: fcvt s0, h0
9+
; CHECK-NOFP16-NEXT: fcvtas x0, s0
10+
; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
11+
; CHECK-NOFP16-NEXT: ret
12+
;
13+
; CHECK-FP16-LABEL: testmhhs:
14+
; CHECK-FP16: // %bb.0: // %entry
15+
; CHECK-FP16-NEXT: fcvtas x0, h0
16+
; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
17+
; CHECK-FP16-NEXT: ret
718
entry:
819
%0 = tail call i64 @llvm.llround.i64.f16(half %x)
920
%conv = trunc i64 %0 to i16
1021
ret i16 %conv
1122
}
1223

13-
; CHECK-LABEL: testmhws:
14-
; CHECK: fcvtas x0, h0
15-
; CHECK: ret
1624
define i32 @testmhws(half %x) {
25+
; CHECK-NOFP16-LABEL: testmhws:
26+
; CHECK-NOFP16: // %bb.0: // %entry
27+
; CHECK-NOFP16-NEXT: fcvt s0, h0
28+
; CHECK-NOFP16-NEXT: fcvtas x0, s0
29+
; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
30+
; CHECK-NOFP16-NEXT: ret
31+
;
32+
; CHECK-FP16-LABEL: testmhws:
33+
; CHECK-FP16: // %bb.0: // %entry
34+
; CHECK-FP16-NEXT: fcvtas x0, h0
35+
; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
36+
; CHECK-FP16-NEXT: ret
1737
entry:
1838
%0 = tail call i64 @llvm.llround.i64.f16(half %x)
1939
%conv = trunc i64 %0 to i32
2040
ret i32 %conv
2141
}
2242

23-
; CHECK-LABEL: testmhxs:
24-
; CHECK: fcvtas x0, h0
25-
; CHECK-NEXT: ret
2643
define i64 @testmhxs(half %x) {
44+
; CHECK-NOFP16-LABEL: testmhxs:
45+
; CHECK-NOFP16: // %bb.0: // %entry
46+
; CHECK-NOFP16-NEXT: fcvt s0, h0
47+
; CHECK-NOFP16-NEXT: fcvtas x0, s0
48+
; CHECK-NOFP16-NEXT: ret
49+
;
50+
; CHECK-FP16-LABEL: testmhxs:
51+
; CHECK-FP16: // %bb.0: // %entry
52+
; CHECK-FP16-NEXT: fcvtas x0, h0
53+
; CHECK-FP16-NEXT: ret
2754
entry:
2855
%0 = tail call i64 @llvm.llround.i64.f16(half %x)
2956
ret i64 %0

llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,62 @@
1-
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
3+
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
24

3-
; CHECK-LABEL: testmhhs:
4-
; CHECK: frintx h0, h0
5-
; CHECK-NEXT: fcvtzs x0, h0
6-
; CHECK: ret
75
define i16 @testmhhs(half %x) {
6+
; CHECK-NOFP16-LABEL: testmhhs:
7+
; CHECK-NOFP16: // %bb.0: // %entry
8+
; CHECK-NOFP16-NEXT: fcvt s0, h0
9+
; CHECK-NOFP16-NEXT: frintx s0, s0
10+
; CHECK-NOFP16-NEXT: fcvtzs x0, s0
11+
; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
12+
; CHECK-NOFP16-NEXT: ret
13+
;
14+
; CHECK-FP16-LABEL: testmhhs:
15+
; CHECK-FP16: // %bb.0: // %entry
16+
; CHECK-FP16-NEXT: frintx h0, h0
17+
; CHECK-FP16-NEXT: fcvtzs x0, h0
18+
; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
19+
; CHECK-FP16-NEXT: ret
820
entry:
921
%0 = tail call i64 @llvm.lrint.i64.f16(half %x)
1022
%conv = trunc i64 %0 to i16
1123
ret i16 %conv
1224
}
1325

14-
; CHECK-LABEL: testmhws:
15-
; CHECK: frintx h0, h0
16-
; CHECK-NEXT: fcvtzs x0, h0
17-
; CHECK: ret
1826
define i32 @testmhws(half %x) {
27+
; CHECK-NOFP16-LABEL: testmhws:
28+
; CHECK-NOFP16: // %bb.0: // %entry
29+
; CHECK-NOFP16-NEXT: fcvt s0, h0
30+
; CHECK-NOFP16-NEXT: frintx s0, s0
31+
; CHECK-NOFP16-NEXT: fcvtzs x0, s0
32+
; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
33+
; CHECK-NOFP16-NEXT: ret
34+
;
35+
; CHECK-FP16-LABEL: testmhws:
36+
; CHECK-FP16: // %bb.0: // %entry
37+
; CHECK-FP16-NEXT: frintx h0, h0
38+
; CHECK-FP16-NEXT: fcvtzs x0, h0
39+
; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
40+
; CHECK-FP16-NEXT: ret
1941
entry:
2042
%0 = tail call i64 @llvm.lrint.i64.f16(half %x)
2143
%conv = trunc i64 %0 to i32
2244
ret i32 %conv
2345
}
2446

25-
; CHECK-LABEL: testmhxs:
26-
; CHECK: frintx h0, h0
27-
; CHECK-NEXT: fcvtzs x0, h0
28-
; CHECK: ret
2947
define i64 @testmhxs(half %x) {
48+
; CHECK-NOFP16-LABEL: testmhxs:
49+
; CHECK-NOFP16: // %bb.0: // %entry
50+
; CHECK-NOFP16-NEXT: fcvt s0, h0
51+
; CHECK-NOFP16-NEXT: frintx s0, s0
52+
; CHECK-NOFP16-NEXT: fcvtzs x0, s0
53+
; CHECK-NOFP16-NEXT: ret
54+
;
55+
; CHECK-FP16-LABEL: testmhxs:
56+
; CHECK-FP16: // %bb.0: // %entry
57+
; CHECK-FP16-NEXT: frintx h0, h0
58+
; CHECK-FP16-NEXT: fcvtzs x0, h0
59+
; CHECK-FP16-NEXT: ret
3060
entry:
3161
%0 = tail call i64 @llvm.lrint.i64.f16(half %x)
3262
ret i64 %0

llvm/test/CodeGen/AArch64/lround-conv-fp16.ll

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,56 @@
1-
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
3+
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
24

3-
; CHECK-LABEL: testmhhs:
4-
; CHECK: fcvtas x0, h0
5-
; CHECK: ret
65
define i16 @testmhhs(half %x) {
6+
; CHECK-NOFP16-LABEL: testmhhs:
7+
; CHECK-NOFP16: // %bb.0: // %entry
8+
; CHECK-NOFP16-NEXT: fcvt s0, h0
9+
; CHECK-NOFP16-NEXT: fcvtas x0, s0
10+
; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
11+
; CHECK-NOFP16-NEXT: ret
12+
;
13+
; CHECK-FP16-LABEL: testmhhs:
14+
; CHECK-FP16: // %bb.0: // %entry
15+
; CHECK-FP16-NEXT: fcvtas x0, h0
16+
; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
17+
; CHECK-FP16-NEXT: ret
718
entry:
819
%0 = tail call i64 @llvm.lround.i64.f16(half %x)
920
%conv = trunc i64 %0 to i16
1021
ret i16 %conv
1122
}
1223

13-
; CHECK-LABEL: testmhws:
14-
; CHECK: fcvtas x0, h0
15-
; CHECK: ret
1624
define i32 @testmhws(half %x) {
25+
; CHECK-NOFP16-LABEL: testmhws:
26+
; CHECK-NOFP16: // %bb.0: // %entry
27+
; CHECK-NOFP16-NEXT: fcvt s0, h0
28+
; CHECK-NOFP16-NEXT: fcvtas x0, s0
29+
; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
30+
; CHECK-NOFP16-NEXT: ret
31+
;
32+
; CHECK-FP16-LABEL: testmhws:
33+
; CHECK-FP16: // %bb.0: // %entry
34+
; CHECK-FP16-NEXT: fcvtas x0, h0
35+
; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
36+
; CHECK-FP16-NEXT: ret
1737
entry:
1838
%0 = tail call i64 @llvm.lround.i64.f16(half %x)
1939
%conv = trunc i64 %0 to i32
2040
ret i32 %conv
2141
}
2242

23-
; CHECK-LABEL: testmhxs:
24-
; CHECK: fcvtas x0, h0
25-
; CHECK-NEXT: ret
2643
define i64 @testmhxs(half %x) {
44+
; CHECK-NOFP16-LABEL: testmhxs:
45+
; CHECK-NOFP16: // %bb.0: // %entry
46+
; CHECK-NOFP16-NEXT: fcvt s0, h0
47+
; CHECK-NOFP16-NEXT: fcvtas x0, s0
48+
; CHECK-NOFP16-NEXT: ret
49+
;
50+
; CHECK-FP16-LABEL: testmhxs:
51+
; CHECK-FP16: // %bb.0: // %entry
52+
; CHECK-FP16-NEXT: fcvtas x0, h0
53+
; CHECK-FP16-NEXT: ret
2754
entry:
2855
%0 = tail call i64 @llvm.lround.i64.f16(half %x)
2956
ret i64 %0

0 commit comments

Comments
 (0)