1
+ ; RUN: llc -global-isel -mcpu=tahiti -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX678,GFX6789 %s
2
+ ; RUN: llc -global-isel -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9,GFX6789 %s
3
+ ; RUN: llc -global-isel -mcpu=gfx1010 -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
4
+
5
+ ; GCN-LABEL: {{^}}v_clamp_i64_i16
6
+ ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
7
+ ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
8
+ ; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000
9
+ ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff
10
+ ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
11
+ ; GCN: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
12
+ ; GCN: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff
13
+ ; GCN: v_med3_i32 [[A]], 0xffff8000, [[A]], [[C]]
14
+ define i16 @v_clamp_i64_i16 (i64 %in ) nounwind {
15
+ entry:
16
+ %0 = icmp sgt i64 %in , -32768
17
+ %1 = select i1 %0 , i64 %in , i64 -32768
18
+ %2 = icmp slt i64 %1 , 32767
19
+ %3 = select i1 %2 , i64 %1 , i64 32767
20
+ %4 = trunc i64 %3 to i16
21
+
22
+ ret i16 %4
23
+ }
24
+
25
+ ; GCN-LABEL: {{^}}v_clamp_i64_i16_reverse
26
+ ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
27
+ ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
28
+ ; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000
29
+ ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff
30
+ ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
31
+ ; GCN: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
32
+ ; GCN: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff
33
+ ; GCN: v_med3_i32 [[A]], 0xffff8000, [[A]], [[C]]
34
+ define i16 @v_clamp_i64_i16_reverse (i64 %in ) nounwind {
35
+ entry:
36
+ %0 = icmp slt i64 %in , 32767
37
+ %1 = select i1 %0 , i64 %in , i64 32767
38
+ %2 = icmp sgt i64 %1 , -32768
39
+ %3 = select i1 %2 , i64 %1 , i64 -32768
40
+ %4 = trunc i64 %3 to i16
41
+
42
+ ret i16 %4
43
+ }
44
+
45
+ ; GCN-LABEL: {{^}}v_clamp_i64_i16_wrong_lower
46
+ ; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8001
47
+ ; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc
48
+ ; GFX6789: v_cndmask_b32_e32 [[C:v[0-9]+]], 0, [[C]], vcc
49
+
50
+ ; GCN: v_cndmask_b32_e32 [[A:v[0-9]+]], 0x8001, [[A]], vcc_lo
51
+ ; GCN: v_cndmask_b32_e32 [[B:v[0-9]+]], 0, [[B]], vcc_lo
52
+ define i16 @v_clamp_i64_i16_wrong_lower (i64 %in ) nounwind {
53
+ entry:
54
+ %0 = icmp slt i64 %in , 32769
55
+ %1 = select i1 %0 , i64 %in , i64 32769
56
+ %2 = icmp sgt i64 %1 , -32768
57
+ %3 = select i1 %2 , i64 %1 , i64 -32768
58
+ %4 = trunc i64 %3 to i16
59
+
60
+ ret i16 %4
61
+ }
62
+
63
+ ; GCN-LABEL: {{^}}v_clamp_i64_i16_wrong_lower_and_higher
64
+ ; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8000
65
+ ; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc
66
+
67
+ ; GCN: v_cndmask_b32_e32 [[A:v[0-9]+]], 0x8000, [[A]], vcc_lo
68
+ define i16 @v_clamp_i64_i16_wrong_lower_and_higher (i64 %in ) nounwind {
69
+ entry:
70
+ %0 = icmp sgt i64 %in , -32769
71
+ %1 = select i1 %0 , i64 %in , i64 -32769
72
+ %2 = icmp slt i64 %1 , 32768
73
+ %3 = select i1 %2 , i64 %1 , i64 32768
74
+ %4 = trunc i64 %3 to i16
75
+
76
+ ret i16 %4
77
+ }
78
+
79
+ ; GCN-LABEL: {{^}}v_clamp_i64_i16_lower_than_short
80
+ ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
81
+ ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
82
+ ; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01
83
+ ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100
84
+ ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
85
+ ; GCN: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
86
+ ; GCN: v_mov_b32_e32 [[C:v[0-9]+]], 0x100
87
+ ; GCN: v_med3_i32 [[A]], 0xffffff01, [[A]], [[C]]
88
+ define i16 @v_clamp_i64_i16_lower_than_short (i64 %in ) nounwind {
89
+ entry:
90
+ %0 = icmp slt i64 %in , 256
91
+ %1 = select i1 %0 , i64 %in , i64 256
92
+ %2 = icmp sgt i64 %1 , -255
93
+ %3 = select i1 %2 , i64 %1 , i64 -255
94
+ %4 = trunc i64 %3 to i16
95
+
96
+ ret i16 %4
97
+ }
98
+
99
+ ; GCN-LABEL: {{^}}v_clamp_i64_i16_lower_than_short_reverse
100
+ ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
101
+ ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
102
+ ; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01
103
+ ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100
104
+ ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
105
+ ; GCN: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
106
+ ; GCN: v_mov_b32_e32 [[C:v[0-9]+]], 0x100
107
+ ; GCN: v_med3_i32 [[A]], 0xffffff01, [[A]], [[C]]
108
+ define i16 @v_clamp_i64_i16_lower_than_short_reverse (i64 %in ) nounwind {
109
+ entry:
110
+ %0 = icmp sgt i64 %in , -255
111
+ %1 = select i1 %0 , i64 %in , i64 -255
112
+ %2 = icmp slt i64 %1 , 256
113
+ %3 = select i1 %2 , i64 %1 , i64 256
114
+ %4 = trunc i64 %3 to i16
115
+
116
+ ret i16 %4
117
+ }
118
+
119
+ ; GCN-LABEL: {{^}}v_clamp_i64_i16_zero
120
+ ; GFX678: v_mov_b32_e32 [[A:v[0-9]+]], 0
121
+ ; GCN: v_mov_b32_e32 [[A:v[0-9]+]], 0
122
+ define i16 @v_clamp_i64_i16_zero (i64 %in ) nounwind {
123
+ entry:
124
+ %0 = icmp sgt i64 %in , 0
125
+ %1 = select i1 %0 , i64 %in , i64 0
126
+ %2 = icmp slt i64 %1 , 0
127
+ %3 = select i1 %2 , i64 %1 , i64 0
128
+ %4 = trunc i64 %3 to i16
129
+
130
+ ret i16 %4
131
+ }
0 commit comments