Skip to content

Commit df97673

Browse files
committed
[X86] Add some initial test coverage for half libcall expansion/promotion
We can add additional tests in the future, but this is an initial placeholder Inspired by #105775
1 parent aec3ec0 commit df97673

File tree

1 file changed

+375
-0
lines changed

1 file changed

+375
-0
lines changed
Lines changed: 375 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,375 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -enable-legalize-types-checking -mtriple=x86_64-linux-gnu -mattr=+f16c | FileCheck %s --check-prefix=F16C
3+
; RUN: llc < %s -enable-legalize-types-checking -mtriple=x86_64-linux-gnu -mattr=+avx512fp16 | FileCheck %s --check-prefix=FP16
4+
; RUN: llc < %s -enable-legalize-types-checking -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefix=X64
5+
; RUN: llc < %s -enable-legalize-types-checking -mtriple=i686-linux-gnu -mattr=sse2 | FileCheck %s --check-prefix=X86
6+
7+
; Check all soft floating point library function calls.
8+
9+
define void @test_half_ceil(half %a0, ptr %p0) nounwind {
10+
; F16C-LABEL: test_half_ceil:
11+
; F16C: # %bb.0:
12+
; F16C-NEXT: vpextrw $0, %xmm0, %eax
13+
; F16C-NEXT: vmovd %eax, %xmm0
14+
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
15+
; F16C-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
16+
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
17+
; F16C-NEXT: vmovd %xmm0, %eax
18+
; F16C-NEXT: movw %ax, (%rdi)
19+
; F16C-NEXT: retq
20+
;
21+
; FP16-LABEL: test_half_ceil:
22+
; FP16: # %bb.0:
23+
; FP16-NEXT: vrndscalesh $10, %xmm0, %xmm0, %xmm0
24+
; FP16-NEXT: vmovsh %xmm0, (%rdi)
25+
; FP16-NEXT: retq
26+
;
27+
; X64-LABEL: test_half_ceil:
28+
; X64: # %bb.0:
29+
; X64-NEXT: pushq %rbx
30+
; X64-NEXT: movq %rdi, %rbx
31+
; X64-NEXT: callq __extendhfsf2@PLT
32+
; X64-NEXT: callq ceilf@PLT
33+
; X64-NEXT: callq __truncsfhf2@PLT
34+
; X64-NEXT: pextrw $0, %xmm0, %eax
35+
; X64-NEXT: movw %ax, (%rbx)
36+
; X64-NEXT: popq %rbx
37+
; X64-NEXT: retq
38+
;
39+
; X86-LABEL: test_half_ceil:
40+
; X86: # %bb.0:
41+
; X86-NEXT: pushl %esi
42+
; X86-NEXT: subl $8, %esp
43+
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
44+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
45+
; X86-NEXT: pextrw $0, %xmm0, %eax
46+
; X86-NEXT: movw %ax, (%esp)
47+
; X86-NEXT: calll __extendhfsf2
48+
; X86-NEXT: fstps (%esp)
49+
; X86-NEXT: calll ceilf
50+
; X86-NEXT: fstps (%esp)
51+
; X86-NEXT: calll __truncsfhf2
52+
; X86-NEXT: pextrw $0, %xmm0, %eax
53+
; X86-NEXT: movw %ax, (%esi)
54+
; X86-NEXT: addl $8, %esp
55+
; X86-NEXT: popl %esi
56+
; X86-NEXT: retl
57+
%res = call half @llvm.ceil.half(half %a0)
58+
store half %res, ptr %p0, align 2
59+
ret void
60+
}
61+
62+
define void @test_half_cos(half %a0, ptr %p0) nounwind {
63+
; F16C-LABEL: test_half_cos:
64+
; F16C: # %bb.0:
65+
; F16C-NEXT: pushq %rbx
66+
; F16C-NEXT: movq %rdi, %rbx
67+
; F16C-NEXT: vpextrw $0, %xmm0, %eax
68+
; F16C-NEXT: vmovd %eax, %xmm0
69+
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
70+
; F16C-NEXT: callq cosf@PLT
71+
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
72+
; F16C-NEXT: vmovd %xmm0, %eax
73+
; F16C-NEXT: movw %ax, (%rbx)
74+
; F16C-NEXT: popq %rbx
75+
; F16C-NEXT: retq
76+
;
77+
; FP16-LABEL: test_half_cos:
78+
; FP16: # %bb.0:
79+
; FP16-NEXT: pushq %rbx
80+
; FP16-NEXT: movq %rdi, %rbx
81+
; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
82+
; FP16-NEXT: callq cosf@PLT
83+
; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
84+
; FP16-NEXT: vmovsh %xmm0, (%rbx)
85+
; FP16-NEXT: popq %rbx
86+
; FP16-NEXT: retq
87+
;
88+
; X64-LABEL: test_half_cos:
89+
; X64: # %bb.0:
90+
; X64-NEXT: pushq %rbx
91+
; X64-NEXT: movq %rdi, %rbx
92+
; X64-NEXT: callq __extendhfsf2@PLT
93+
; X64-NEXT: callq cosf@PLT
94+
; X64-NEXT: callq __truncsfhf2@PLT
95+
; X64-NEXT: pextrw $0, %xmm0, %eax
96+
; X64-NEXT: movw %ax, (%rbx)
97+
; X64-NEXT: popq %rbx
98+
; X64-NEXT: retq
99+
;
100+
; X86-LABEL: test_half_cos:
101+
; X86: # %bb.0:
102+
; X86-NEXT: pushl %esi
103+
; X86-NEXT: subl $8, %esp
104+
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
105+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
106+
; X86-NEXT: pextrw $0, %xmm0, %eax
107+
; X86-NEXT: movw %ax, (%esp)
108+
; X86-NEXT: calll __extendhfsf2
109+
; X86-NEXT: fstps (%esp)
110+
; X86-NEXT: calll cosf
111+
; X86-NEXT: fstps (%esp)
112+
; X86-NEXT: calll __truncsfhf2
113+
; X86-NEXT: pextrw $0, %xmm0, %eax
114+
; X86-NEXT: movw %ax, (%esi)
115+
; X86-NEXT: addl $8, %esp
116+
; X86-NEXT: popl %esi
117+
; X86-NEXT: retl
118+
%res = call half @llvm.cos.half(half %a0)
119+
store half %res, ptr %p0, align 2
120+
ret void
121+
}
122+
123+
define void @test_half_fabs(half %a0, ptr %p0) nounwind {
124+
; F16C-LABEL: test_half_fabs:
125+
; F16C: # %bb.0:
126+
; F16C-NEXT: vpextrw $0, %xmm0, %eax
127+
; F16C-NEXT: vmovd %eax, %xmm0
128+
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
129+
; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
130+
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
131+
; F16C-NEXT: vmovd %xmm0, %eax
132+
; F16C-NEXT: movw %ax, (%rdi)
133+
; F16C-NEXT: retq
134+
;
135+
; FP16-LABEL: test_half_fabs:
136+
; FP16: # %bb.0:
137+
; FP16-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
138+
; FP16-NEXT: vpand %xmm1, %xmm0, %xmm0
139+
; FP16-NEXT: vmovsh %xmm0, (%rdi)
140+
; FP16-NEXT: retq
141+
;
142+
; X64-LABEL: test_half_fabs:
143+
; X64: # %bb.0:
144+
; X64-NEXT: pushq %rbx
145+
; X64-NEXT: movq %rdi, %rbx
146+
; X64-NEXT: callq __extendhfsf2@PLT
147+
; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
148+
; X64-NEXT: callq __truncsfhf2@PLT
149+
; X64-NEXT: pextrw $0, %xmm0, %eax
150+
; X64-NEXT: movw %ax, (%rbx)
151+
; X64-NEXT: popq %rbx
152+
; X64-NEXT: retq
153+
;
154+
; X86-LABEL: test_half_fabs:
155+
; X86: # %bb.0:
156+
; X86-NEXT: pushl %esi
157+
; X86-NEXT: subl $8, %esp
158+
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
159+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
160+
; X86-NEXT: pextrw $0, %xmm0, %eax
161+
; X86-NEXT: movw %ax, (%esp)
162+
; X86-NEXT: calll __extendhfsf2
163+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
164+
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
165+
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
166+
; X86-NEXT: movd %xmm0, (%esp)
167+
; X86-NEXT: calll __truncsfhf2
168+
; X86-NEXT: pextrw $0, %xmm0, %eax
169+
; X86-NEXT: movw %ax, (%esi)
170+
; X86-NEXT: addl $8, %esp
171+
; X86-NEXT: popl %esi
172+
; X86-NEXT: retl
173+
%res = call half @llvm.fabs.half(half %a0)
174+
store half %res, ptr %p0, align 2
175+
ret void
176+
}
177+
178+
define void @test_half_pow(half %a0, half %a1, ptr %p0) nounwind {
179+
; F16C-LABEL: test_half_pow:
180+
; F16C: # %bb.0:
181+
; F16C-NEXT: pushq %rbx
182+
; F16C-NEXT: movq %rdi, %rbx
183+
; F16C-NEXT: vpextrw $0, %xmm1, %eax
184+
; F16C-NEXT: vpextrw $0, %xmm0, %ecx
185+
; F16C-NEXT: vmovd %ecx, %xmm0
186+
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
187+
; F16C-NEXT: vmovd %eax, %xmm1
188+
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
189+
; F16C-NEXT: callq powf@PLT
190+
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
191+
; F16C-NEXT: vmovd %xmm0, %eax
192+
; F16C-NEXT: movw %ax, (%rbx)
193+
; F16C-NEXT: popq %rbx
194+
; F16C-NEXT: retq
195+
;
196+
; FP16-LABEL: test_half_pow:
197+
; FP16: # %bb.0:
198+
; FP16-NEXT: pushq %rbx
199+
; FP16-NEXT: movq %rdi, %rbx
200+
; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
201+
; FP16-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1
202+
; FP16-NEXT: callq powf@PLT
203+
; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
204+
; FP16-NEXT: vmovsh %xmm0, (%rbx)
205+
; FP16-NEXT: popq %rbx
206+
; FP16-NEXT: retq
207+
;
208+
; X64-LABEL: test_half_pow:
209+
; X64: # %bb.0:
210+
; X64-NEXT: pushq %rbx
211+
; X64-NEXT: subq $16, %rsp
212+
; X64-NEXT: movq %rdi, %rbx
213+
; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
214+
; X64-NEXT: movaps %xmm1, %xmm0
215+
; X64-NEXT: callq __extendhfsf2@PLT
216+
; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
217+
; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
218+
; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
219+
; X64-NEXT: callq __extendhfsf2@PLT
220+
; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
221+
; X64-NEXT: # xmm1 = mem[0],zero,zero,zero
222+
; X64-NEXT: callq powf@PLT
223+
; X64-NEXT: callq __truncsfhf2@PLT
224+
; X64-NEXT: pextrw $0, %xmm0, %eax
225+
; X64-NEXT: movw %ax, (%rbx)
226+
; X64-NEXT: addq $16, %rsp
227+
; X64-NEXT: popq %rbx
228+
; X64-NEXT: retq
229+
;
230+
; X86-LABEL: test_half_pow:
231+
; X86: # %bb.0:
232+
; X86-NEXT: pushl %esi
233+
; X86-NEXT: subl $56, %esp
234+
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
235+
; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
236+
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
237+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
238+
; X86-NEXT: pextrw $0, %xmm0, %eax
239+
; X86-NEXT: movw %ax, (%esp)
240+
; X86-NEXT: calll __extendhfsf2
241+
; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
242+
; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
243+
; X86-NEXT: pextrw $0, %xmm0, %eax
244+
; X86-NEXT: movw %ax, (%esp)
245+
; X86-NEXT: calll __extendhfsf2
246+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
247+
; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
248+
; X86-NEXT: fstps (%esp)
249+
; X86-NEXT: calll powf
250+
; X86-NEXT: fstps (%esp)
251+
; X86-NEXT: calll __truncsfhf2
252+
; X86-NEXT: pextrw $0, %xmm0, %eax
253+
; X86-NEXT: movw %ax, (%esi)
254+
; X86-NEXT: addl $56, %esp
255+
; X86-NEXT: popl %esi
256+
; X86-NEXT: retl
257+
%res = call half @llvm.pow.half(half %a0, half %a1)
258+
store half %res, ptr %p0, align 2
259+
ret void
260+
}
261+
262+
define void @test_half_sin(half %a0, ptr %p0) nounwind {
263+
; F16C-LABEL: test_half_sin:
264+
; F16C: # %bb.0:
265+
; F16C-NEXT: pushq %rbx
266+
; F16C-NEXT: movq %rdi, %rbx
267+
; F16C-NEXT: vpextrw $0, %xmm0, %eax
268+
; F16C-NEXT: vmovd %eax, %xmm0
269+
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
270+
; F16C-NEXT: callq sinf@PLT
271+
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
272+
; F16C-NEXT: vmovd %xmm0, %eax
273+
; F16C-NEXT: movw %ax, (%rbx)
274+
; F16C-NEXT: popq %rbx
275+
; F16C-NEXT: retq
276+
;
277+
; FP16-LABEL: test_half_sin:
278+
; FP16: # %bb.0:
279+
; FP16-NEXT: pushq %rbx
280+
; FP16-NEXT: movq %rdi, %rbx
281+
; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
282+
; FP16-NEXT: callq sinf@PLT
283+
; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
284+
; FP16-NEXT: vmovsh %xmm0, (%rbx)
285+
; FP16-NEXT: popq %rbx
286+
; FP16-NEXT: retq
287+
;
288+
; X64-LABEL: test_half_sin:
289+
; X64: # %bb.0:
290+
; X64-NEXT: pushq %rbx
291+
; X64-NEXT: movq %rdi, %rbx
292+
; X64-NEXT: callq __extendhfsf2@PLT
293+
; X64-NEXT: callq sinf@PLT
294+
; X64-NEXT: callq __truncsfhf2@PLT
295+
; X64-NEXT: pextrw $0, %xmm0, %eax
296+
; X64-NEXT: movw %ax, (%rbx)
297+
; X64-NEXT: popq %rbx
298+
; X64-NEXT: retq
299+
;
300+
; X86-LABEL: test_half_sin:
301+
; X86: # %bb.0:
302+
; X86-NEXT: pushl %esi
303+
; X86-NEXT: subl $8, %esp
304+
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
305+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
306+
; X86-NEXT: pextrw $0, %xmm0, %eax
307+
; X86-NEXT: movw %ax, (%esp)
308+
; X86-NEXT: calll __extendhfsf2
309+
; X86-NEXT: fstps (%esp)
310+
; X86-NEXT: calll sinf
311+
; X86-NEXT: fstps (%esp)
312+
; X86-NEXT: calll __truncsfhf2
313+
; X86-NEXT: pextrw $0, %xmm0, %eax
314+
; X86-NEXT: movw %ax, (%esi)
315+
; X86-NEXT: addl $8, %esp
316+
; X86-NEXT: popl %esi
317+
; X86-NEXT: retl
318+
%res = call half @llvm.sin.half(half %a0)
319+
store half %res, ptr %p0, align 2
320+
ret void
321+
}
322+
323+
define void @test_half_sqrt(half %a0, ptr %p0) nounwind {
324+
; F16C-LABEL: test_half_sqrt:
325+
; F16C: # %bb.0:
326+
; F16C-NEXT: vpextrw $0, %xmm0, %eax
327+
; F16C-NEXT: vmovd %eax, %xmm0
328+
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
329+
; F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
330+
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
331+
; F16C-NEXT: vmovd %xmm0, %eax
332+
; F16C-NEXT: movw %ax, (%rdi)
333+
; F16C-NEXT: retq
334+
;
335+
; FP16-LABEL: test_half_sqrt:
336+
; FP16: # %bb.0:
337+
; FP16-NEXT: vsqrtsh %xmm0, %xmm0, %xmm0
338+
; FP16-NEXT: vmovsh %xmm0, (%rdi)
339+
; FP16-NEXT: retq
340+
;
341+
; X64-LABEL: test_half_sqrt:
342+
; X64: # %bb.0:
343+
; X64-NEXT: pushq %rbx
344+
; X64-NEXT: movq %rdi, %rbx
345+
; X64-NEXT: callq __extendhfsf2@PLT
346+
; X64-NEXT: sqrtss %xmm0, %xmm0
347+
; X64-NEXT: callq __truncsfhf2@PLT
348+
; X64-NEXT: pextrw $0, %xmm0, %eax
349+
; X64-NEXT: movw %ax, (%rbx)
350+
; X64-NEXT: popq %rbx
351+
; X64-NEXT: retq
352+
;
353+
; X86-LABEL: test_half_sqrt:
354+
; X86: # %bb.0:
355+
; X86-NEXT: pushl %esi
356+
; X86-NEXT: subl $8, %esp
357+
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
358+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
359+
; X86-NEXT: pextrw $0, %xmm0, %eax
360+
; X86-NEXT: movw %ax, (%esp)
361+
; X86-NEXT: calll __extendhfsf2
362+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
363+
; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
364+
; X86-NEXT: sqrtss %xmm0, %xmm0
365+
; X86-NEXT: movss %xmm0, (%esp)
366+
; X86-NEXT: calll __truncsfhf2
367+
; X86-NEXT: pextrw $0, %xmm0, %eax
368+
; X86-NEXT: movw %ax, (%esi)
369+
; X86-NEXT: addl $8, %esp
370+
; X86-NEXT: popl %esi
371+
; X86-NEXT: retl
372+
%res = call half @llvm.sqrt.half(half %a0)
373+
store half %res, ptr %p0, align 2
374+
ret void
375+
}

0 commit comments

Comments
 (0)