1# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX1010 -check-prefix=GCN %s
2
3# GCN-LABEL: {{^}}name: vop1_instructions
4
5# GFX1010: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec
6# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec
7# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $mode, implicit $exec
8# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $mode, implicit $exec
9# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec
10
11# GFX1010: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 6, 0, 5, implicit $exec
12# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec
13# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $mode, implicit $exec
14# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $mode, implicit $exec
15# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec
16
17# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $mode, implicit $exec
18# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit $mode, implicit $exec
19# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit $mode, implicit $exec
20# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 1, 5, 0, 5, implicit $mode, implicit $exec
21
22---
23name:            vop1_instructions
24tracksRegLiveness: true
25registers:
26  - { id: 0, class: vreg_64 }
27  - { id: 1, class: vreg_64 }
28  - { id: 2, class: sreg_64 }
29  - { id: 3, class: vgpr_32 }
30  - { id: 4, class: sreg_32_xm0 }
31  - { id: 5, class: sreg_32_xm0 }
32  - { id: 6, class: sreg_32_xm0 }
33  - { id: 7, class: sreg_32_xm0 }
34  - { id: 8, class: sreg_32 }
35  - { id: 9, class: vgpr_32 }
36  - { id: 10, class: vgpr_32 }
37  - { id: 11, class: vgpr_32 }
38  - { id: 12, class: vgpr_32 }
39  - { id: 13, class: vgpr_32 }
40  - { id: 14, class: vgpr_32 }
41  - { id: 15, class: vgpr_32 }
42  - { id: 16, class: vgpr_32 }
43  - { id: 17, class: vgpr_32 }
44  - { id: 18, class: vgpr_32 }
45  - { id: 19, class: vgpr_32 }
46  - { id: 20, class: vgpr_32 }
47  - { id: 21, class: vgpr_32 }
48  - { id: 22, class: vgpr_32 }
49  - { id: 23, class: vgpr_32 }
50  - { id: 24, class: vgpr_32 }
51  - { id: 25, class: vgpr_32 }
52  - { id: 26, class: vgpr_32 }
53  - { id: 27, class: vgpr_32 }
54  - { id: 28, class: vgpr_32 }
55  - { id: 29, class: vgpr_32 }
56  - { id: 30, class: vgpr_32 }
57  - { id: 31, class: vgpr_32 }
58  - { id: 32, class: vgpr_32 }
59  - { id: 33, class: vgpr_32 }
60  - { id: 34, class: vgpr_32 }
61  - { id: 35, class: vgpr_32 }
62  - { id: 36, class: vgpr_32 }
63  - { id: 37, class: vgpr_32 }
64  - { id: 38, class: vgpr_32 }
65  - { id: 39, class: vgpr_32 }
66  - { id: 40, class: vgpr_32 }
67  - { id: 41, class: vgpr_32 }
68  - { id: 42, class: vgpr_32 }
69  - { id: 43, class: vgpr_32 }
70  - { id: 44, class: vgpr_32 }
71  - { id: 45, class: vgpr_32 }
72  - { id: 46, class: vgpr_32 }
73  - { id: 47, class: vgpr_32 }
74  - { id: 48, class: vgpr_32 }
75  - { id: 100, class: vgpr_32 }
76body:             |
77  bb.0:
78    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
79
80    %2 = COPY $sgpr30_sgpr31
81    %1 = COPY $vgpr2_vgpr3
82    %0 = COPY $vgpr0_vgpr1
83    %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
84
85    %5 = S_MOV_B32 65535
86    %6 = S_MOV_B32 65535
87
88    %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec
89    %11 = V_MOV_B32_e32 %10, implicit $exec
90    %12 = V_LSHLREV_B32_e64 16, %11, implicit $exec
91    %14 = V_FRACT_F32_e32 123, implicit $mode, implicit $exec
92    %15 = V_LSHLREV_B32_e64 16, %14, implicit $exec
93    %16 = V_LSHRREV_B32_e64 16, %15, implicit $exec
94    %17 = V_SIN_F32_e32 %16, implicit $mode, implicit $exec
95    %18 = V_LSHLREV_B32_e64 16, %17, implicit $exec
96    %19 = V_LSHRREV_B32_e64 16, %18, implicit $exec
97    %20 = V_CVT_U32_F32_e32 %19, implicit $mode, implicit $exec
98    %21 = V_LSHLREV_B32_e64 16, %20, implicit $exec
99    %23 = V_CVT_F32_I32_e32 123, implicit $mode, implicit $exec
100    %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec
101
102    %25 = V_LSHRREV_B32_e64 16, %3, implicit $exec
103    %26 = V_MOV_B32_e64 %25, implicit $exec
104    %26 = V_LSHLREV_B32_e64 16, %26, implicit $exec
105    %27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec
106    %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec
107    %29 = V_LSHRREV_B32_e64 16, %28, implicit $exec
108    %30 = V_SIN_F32_e64 0, %29, 0, 0, implicit $mode, implicit $exec
109    %31 = V_LSHLREV_B32_e64 16, %30, implicit $exec
110    %32 = V_LSHRREV_B32_e64 16, %31, implicit $exec
111    %33 = V_CVT_U32_F32_e64 0, %32, 0, 0, implicit $mode, implicit $exec
112    %34 = V_LSHLREV_B32_e64 16, %33, implicit $exec
113    %35 = V_CVT_F32_I32_e64 %6, 0, 0, implicit $mode, implicit $exec
114    %36 = V_LSHLREV_B32_e64 16, %35, implicit $exec
115
116
117    %37 = V_LSHRREV_B32_e64 16, %36, implicit $exec
118    %38 = V_FRACT_F32_e64 1, %37, 0, 0, implicit $mode, implicit $exec
119    %39 = V_LSHLREV_B32_e64 16, %38, implicit $exec
120    %40 = V_LSHRREV_B32_e64 16, %39, implicit $exec
121    %41 = V_SIN_F32_e64 0, %40, 1, 0, implicit $mode, implicit $exec
122    %42 = V_LSHLREV_B32_e64 16, %41, implicit $exec
123    %43 = V_LSHRREV_B32_e64 16, %42, implicit $exec
124    %44 = V_CVT_U32_F32_e64 1, %43, 0, 0, implicit $mode, implicit $exec
125    %45 = V_LSHLREV_B32_e64 16, %44, implicit $exec
126    %46 = V_LSHRREV_B32_e64 16, %45, implicit $exec
127    %47 = V_CVT_F32_I32_e64 %46, 0, 1, implicit $mode, implicit $exec
128    %48 = V_LSHLREV_B32_e64 16, %47, implicit $exec
129
130
131    %100 = V_MOV_B32_e32 %48, implicit $exec
132
133    FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
134    $sgpr30_sgpr31 = COPY %2
135    S_SETPC_B64_return $sgpr30_sgpr31
136
137...
138---
139# GCN-LABEL: {{^}}name: vop2_instructions
140
141# GFX1010: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit $exec
142# GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec
143# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit $mode, implicit $exec
144# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $mode, implicit $exec
145# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $mode, implicit $exec
146
147# GFX1010: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit $exec
148# GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec
149# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $mode, implicit $exec
150# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e64 0, 23, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $mode, implicit $exec
151# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e64 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $mode, implicit $exec
152
153# GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec
154# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $mode, implicit $exec
155# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e64 1, 23, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 0, implicit $mode, implicit $exec
156# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit $mode, implicit $exec
157
158name:            vop2_instructions
159tracksRegLiveness: true
160registers:
161  - { id: 0, class: vreg_64 }
162  - { id: 1, class: vreg_64 }
163  - { id: 2, class: sreg_64 }
164  - { id: 3, class: vgpr_32 }
165  - { id: 4, class: sreg_32_xm0 }
166  - { id: 5, class: sreg_32_xm0 }
167  - { id: 6, class: sreg_32_xm0 }
168  - { id: 7, class: sreg_32_xm0 }
169  - { id: 8, class: sreg_32 }
170  - { id: 9, class: vgpr_32 }
171  - { id: 10, class: vgpr_32 }
172  - { id: 11, class: vgpr_32 }
173  - { id: 12, class: vgpr_32 }
174  - { id: 13, class: vgpr_32 }
175  - { id: 14, class: vgpr_32 }
176  - { id: 15, class: vgpr_32 }
177  - { id: 16, class: vgpr_32 }
178  - { id: 17, class: vgpr_32 }
179  - { id: 18, class: vgpr_32 }
180  - { id: 19, class: vgpr_32 }
181  - { id: 20, class: vgpr_32 }
182  - { id: 21, class: vgpr_32 }
183  - { id: 22, class: vgpr_32 }
184  - { id: 23, class: vgpr_32 }
185  - { id: 24, class: vgpr_32 }
186  - { id: 25, class: vgpr_32 }
187  - { id: 26, class: vgpr_32 }
188  - { id: 27, class: vgpr_32 }
189  - { id: 28, class: vgpr_32 }
190  - { id: 29, class: vgpr_32 }
191  - { id: 30, class: vgpr_32 }
192  - { id: 31, class: vgpr_32 }
193  - { id: 32, class: vgpr_32 }
194  - { id: 33, class: vgpr_32 }
195  - { id: 34, class: vgpr_32 }
196  - { id: 35, class: vgpr_32 }
197  - { id: 36, class: vgpr_32 }
198  - { id: 37, class: vgpr_32 }
199  - { id: 38, class: vgpr_32 }
200  - { id: 39, class: vgpr_32 }
201  - { id: 40, class: vgpr_32 }
202  - { id: 41, class: vgpr_32 }
203  - { id: 42, class: vgpr_32 }
204  - { id: 43, class: vgpr_32 }
205  - { id: 44, class: vgpr_32 }
206  - { id: 45, class: vgpr_32 }
207  - { id: 46, class: vgpr_32 }
208  - { id: 47, class: vgpr_32 }
209  - { id: 48, class: vgpr_32 }
210  - { id: 49, class: vgpr_32 }
211  - { id: 50, class: vgpr_32 }
212  - { id: 51, class: vgpr_32 }
213  - { id: 52, class: vgpr_32 }
214  - { id: 53, class: vgpr_32 }
215  - { id: 54, class: vgpr_32 }
216  - { id: 55, class: vgpr_32 }
217  - { id: 56, class: vgpr_32 }
218  - { id: 57, class: vgpr_32 }
219  - { id: 58, class: vgpr_32 }
220  - { id: 59, class: vgpr_32 }
221  - { id: 60, class: vgpr_32 }
222  - { id: 100, class: vgpr_32 }
223body:             |
224  bb.0:
225    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
226
227    %2 = COPY $sgpr30_sgpr31
228    %1 = COPY $vgpr2_vgpr3
229    %0 = COPY $vgpr0_vgpr1
230    %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
231
232    %5 = S_MOV_B32 65535
233    %6 = S_MOV_B32 65535
234
235    %11 = V_LSHRREV_B32_e64 16, %3, implicit $exec
236    %12 = V_AND_B32_e32 %6, %11, implicit $exec
237    %13 = V_LSHLREV_B32_e64 16, %12, implicit $exec
238    %14 = V_LSHRREV_B32_e64 16, %13, implicit $exec
239    %15 = V_BFE_U32_e64 %13, 8, 8, implicit $exec
240    %16 = V_ADD_F32_e32 %14, %15, implicit $mode, implicit $exec
241    %17 = V_LSHLREV_B32_e64 16, %16, implicit $exec
242    %18 = V_LSHRREV_B32_e64 16, %17, implicit $exec
243    %19 = V_BFE_U32_e64 %17, 8, 8, implicit $exec
244    %20 = V_SUB_F16_e32 %18, %19, implicit $mode, implicit $exec
245    %21 = V_LSHLREV_B32_e64 16, %20, implicit $exec
246    %22 = V_BFE_U32_e64 %20, 8, 8, implicit $exec
247    %23 = V_FMAC_F32_e32 %21, %22, %22, implicit $mode, implicit $exec
248    %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec
249    %25 = V_LSHRREV_B32_e64 16, %24, implicit $exec
250    %26 = V_BFE_U32_e64 %24, 8, 8, implicit $exec
251    %27 = V_FMAC_F16_e32 %25, %26, %26, implicit $mode, implicit $exec
252    %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec
253
254    %29 = V_LSHRREV_B32_e64 16, %28, implicit $exec
255    %30 = V_AND_B32_e64 23, %29, implicit $exec
256    %31 = V_LSHLREV_B32_e64 16, %30, implicit $exec
257    %32 = V_LSHRREV_B32_e64 16, %31, implicit $exec
258    %33 = V_BFE_U32_e64 %31, 8, 8, implicit $exec
259    %34 = V_ADD_F32_e64 0, %32, 0, %33, 0, 0, implicit $mode, implicit $exec
260    %35 = V_LSHLREV_B32_e64 16, %34, implicit $exec
261    %37 = V_BFE_U32_e64 %35, 8, 8, implicit $exec
262    %38 = V_SUB_F16_e64 0, 23, 0, %37, 0, 0, implicit $mode, implicit $exec
263    %39 = V_LSHLREV_B32_e64 16, %38, implicit $exec
264    %40 = V_BFE_U32_e64 %39, 8, 8, implicit $exec
265    %41 = V_FMAC_F32_e64 0, 23, 0, %40, 0, %40, 0, 0, implicit $mode, implicit $exec
266    %42 = V_LSHLREV_B32_e64 16, %41, implicit $exec
267    %43 = V_LSHRREV_B32_e64 16, %42, implicit $exec
268    %44 = V_BFE_U32_e64 %42, 8, 8, implicit $exec
269    %45 = V_FMAC_F16_e64 0, %43, 0, %44, 0, %44, 0, 0, implicit $mode, implicit $exec
270    %46 = V_LSHLREV_B32_e64 16, %45, implicit $exec
271
272    %47 = V_LSHRREV_B32_e64 16, %46, implicit $exec
273    %48 = V_BFE_U32_e64 %46, 8, 8, implicit $exec
274    %49 = V_ADD_F32_e64 0, %47, 1, %48, 0, 0, implicit $mode, implicit $exec
275    %50 = V_LSHLREV_B32_e64 16, %49, implicit $exec
276    %51 = V_BFE_U32_e64 %50, 8, 8, implicit $exec
277    %52 = V_SUB_F16_e64 1, 23, 1, %51, 0, 0, implicit $mode, implicit $exec
278    %53 = V_LSHLREV_B32_e64 16, %52, implicit $exec
279    %54 = V_BFE_U32_e64 %53, 8, 8, implicit $exec
280    %55 = V_FMAC_F32_e64 1, 23, 1, %54, 1, %54, 1, 0, implicit $mode, implicit $exec
281    %56 = V_LSHLREV_B32_e64 16, %55, implicit $exec
282    %57 = V_LSHRREV_B32_e64 16, %56, implicit $exec
283    %58 = V_BFE_U32_e64 %56, 8, 8, implicit $exec
284    %59 = V_FMAC_F16_e64 1, %57, 1, %58, 1, %58, 0, 2, implicit $mode, implicit $exec
285    %60 = V_LSHLREV_B32_e64 16, %59, implicit $exec
286
287    %100 = V_MOV_B32_e32 %60, implicit $exec
288
289    FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
290    $sgpr30_sgpr31 = COPY %2
291    S_SETPC_B64_return $sgpr30_sgpr31
292
293...
294