1# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking -o -  %s | FileCheck %s
2
3--- |
4  target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
5
6  define amdgpu_kernel void @optimize_if_and_saveexec_xor(i32 %z, i32 %v) #0 {
7  main_body:
8    %id = call i32 @llvm.amdgcn.workitem.id.x()
9    %cc = icmp eq i32 %id, 0
10    %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc)
11    %1 = extractvalue { i1, i64 } %0, 0
12    %2 = extractvalue { i1, i64 } %0, 1
13    br i1 %1, label %if, label %end
14
15  if:                                               ; preds = %main_body
16    %v.if = load volatile i32, i32 addrspace(1)* undef
17    br label %end
18
19  end:                                              ; preds = %if, %main_body
20    %r = phi i32 [ 4, %main_body ], [ %v.if, %if ]
21    call void @llvm.amdgcn.end.cf(i64 %2)
22    store i32 %r, i32 addrspace(1)* undef
23    ret void
24  }
25
26  define amdgpu_kernel void @optimize_if_and_saveexec(i32 %z, i32 %v)  #0 {
27  main_body:
28      br i1 undef, label %if, label %end
29
30  if:
31    br label %end
32
33  end:
34    ret void
35  }
36
37  define amdgpu_kernel void @optimize_if_or_saveexec(i32 %z, i32 %v)  #0 {
38  main_body:
39      br i1 undef, label %if, label %end
40
41  if:
42    br label %end
43
44  end:
45    ret void
46  }
47
48
49  define amdgpu_kernel void @optimize_if_and_saveexec_xor_valu_middle(i32 %z, i32 %v) #0 {
50  main_body:
51    %id = call i32 @llvm.amdgcn.workitem.id.x()
52    %cc = icmp eq i32 %id, 0
53    %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc)
54    %1 = extractvalue { i1, i64 } %0, 0
55    %2 = extractvalue { i1, i64 } %0, 1
56    store i32 %id, i32 addrspace(1)* undef
57    br i1 %1, label %if, label %end
58
59  if:                                               ; preds = %main_body
60    %v.if = load volatile i32, i32 addrspace(1)* undef
61    br label %end
62
63  end:                                              ; preds = %if, %main_body
64    %r = phi i32 [ 4, %main_body ], [ %v.if, %if ]
65    call void @llvm.amdgcn.end.cf(i64 %2)
66    store i32 %r, i32 addrspace(1)* undef
67    ret void
68  }
69
70  define amdgpu_kernel void @optimize_if_and_saveexec_xor_wrong_reg(i32 %z, i32 %v)  #0 {
71  main_body:
72      br i1 undef, label %if, label %end
73
74  if:
75    br label %end
76
77  end:
78    ret void
79  }
80
81  define amdgpu_kernel void @optimize_if_and_saveexec_xor_modify_copy_to_exec(i32 %z, i32 %v)  #0 {
82  main_body:
83      br i1 undef, label %if, label %end
84
85  if:
86    br label %end
87
88  end:
89    ret void
90  }
91
92  define amdgpu_kernel void @optimize_if_and_saveexec_xor_live_out_setexec(i32 %z, i32 %v)  #0 {
93  main_body:
94      br i1 undef, label %if, label %end
95
96  if:
97    br label %end
98
99  end:
100    ret void
101  }
102
103  define amdgpu_kernel void @optimize_if_unknown_saveexec(i32 %z, i32 %v)  #0 {
104  main_body:
105      br i1 undef, label %if, label %end
106
107  if:
108    br label %end
109
110  end:
111    ret void
112  }
113
114  define amdgpu_kernel void @optimize_if_andn2_saveexec(i32 %z, i32 %v)  #0 {
115  main_body:
116      br i1 undef, label %if, label %end
117
118  if:
119    br label %end
120
121  end:
122    ret void
123  }
124
125  define amdgpu_kernel void @optimize_if_andn2_saveexec_no_commute(i32 %z, i32 %v)  #0 {
126  main_body:
127      br i1 undef, label %if, label %end
128
129  if:
130    br label %end
131
132  end:
133    ret void
134  }
135
136  ; Function Attrs: nounwind readnone
137  declare i32 @llvm.amdgcn.workitem.id.x() #1
138
139  declare { i1, i64 } @llvm.amdgcn.if(i1)
140
141  declare void @llvm.amdgcn.end.cf(i64)
142
143
144  attributes #0 = { nounwind }
145  attributes #1 = { nounwind readnone }
146
147...
148---
149# CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}}
150# CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
151# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
152# CHECK-NEXT: SI_MASK_BRANCH
153
154name:            optimize_if_and_saveexec_xor
155alignment:       0
156exposesReturnsTwice: false
157legalized:       false
158regBankSelected: false
159selected:        false
160tracksRegLiveness: true
161liveins:
162  - { reg: '$vgpr0' }
163frameInfo:
164  isFrameAddressTaken: false
165  isReturnAddressTaken: false
166  hasStackMap:     false
167  hasPatchPoint:   false
168  stackSize:       0
169  offsetAdjustment: 0
170  maxAlignment:    0
171  adjustsStack:    false
172  hasCalls:        false
173  maxCallFrameSize: 0
174  hasOpaqueSPAdjustment: false
175  hasVAStart:      false
176  hasMustTailInVarArgFunc: false
177body:             |
178  bb.0.main_body:
179    liveins: $vgpr0
180
181    $sgpr0_sgpr1 = COPY $exec
182    $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
183    $vgpr0 = V_MOV_B32_e32 4, implicit $exec
184    $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
185    $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
186    $exec = S_MOV_B64_term killed $sgpr2_sgpr3
187    SI_MASK_BRANCH %bb.2, implicit $exec
188    S_BRANCH %bb.1
189
190  bb.1.if:
191    liveins: $sgpr0_sgpr1
192
193    $sgpr7 = S_MOV_B32 61440
194    $sgpr6 = S_MOV_B32 -1
195    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
196
197  bb.2.end:
198    liveins: $vgpr0, $sgpr0_sgpr1
199
200    $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
201    $sgpr3 = S_MOV_B32 61440
202    $sgpr2 = S_MOV_B32 -1
203    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`)
204    S_ENDPGM
205
206...
207---
208# CHECK-LABEL: name: optimize_if_and_saveexec{{$}}
209# CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
210# CHECK-NEXT: SI_MASK_BRANCH
211
212name:            optimize_if_and_saveexec
213alignment:       0
214exposesReturnsTwice: false
215legalized:       false
216regBankSelected: false
217selected:        false
218tracksRegLiveness: true
219liveins:
220  - { reg: '$vgpr0' }
221frameInfo:
222  isFrameAddressTaken: false
223  isReturnAddressTaken: false
224  hasStackMap:     false
225  hasPatchPoint:   false
226  stackSize:       0
227  offsetAdjustment: 0
228  maxAlignment:    0
229  adjustsStack:    false
230  hasCalls:        false
231  maxCallFrameSize: 0
232  hasOpaqueSPAdjustment: false
233  hasVAStart:      false
234  hasMustTailInVarArgFunc: false
235body:             |
236  bb.0.main_body:
237    liveins: $vgpr0
238
239    $sgpr0_sgpr1 = COPY $exec
240    $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
241    $vgpr0 = V_MOV_B32_e32 4, implicit $exec
242    $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
243    $exec = S_MOV_B64_term killed $sgpr2_sgpr3
244    SI_MASK_BRANCH %bb.2, implicit $exec
245    S_BRANCH %bb.1
246
247  bb.1.if:
248    liveins: $sgpr0_sgpr1
249
250    $sgpr7 = S_MOV_B32 61440
251    $sgpr6 = S_MOV_B32 -1
252    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
253
254  bb.2.end:
255    liveins: $vgpr0, $sgpr0_sgpr1
256
257    $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
258    $sgpr3 = S_MOV_B32 61440
259    $sgpr2 = S_MOV_B32 -1
260    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`)
261    S_ENDPGM
262
263...
264---
265# CHECK-LABEL: name: optimize_if_or_saveexec{{$}}
266# CHECK: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
267# CHECK-NEXT: SI_MASK_BRANCH
268
269name:            optimize_if_or_saveexec
270alignment:       0
271exposesReturnsTwice: false
272legalized:       false
273regBankSelected: false
274selected:        false
275tracksRegLiveness: true
276liveins:
277  - { reg: '$vgpr0' }
278frameInfo:
279  isFrameAddressTaken: false
280  isReturnAddressTaken: false
281  hasStackMap:     false
282  hasPatchPoint:   false
283  stackSize:       0
284  offsetAdjustment: 0
285  maxAlignment:    0
286  adjustsStack:    false
287  hasCalls:        false
288  maxCallFrameSize: 0
289  hasOpaqueSPAdjustment: false
290  hasVAStart:      false
291  hasMustTailInVarArgFunc: false
292body:             |
293  bb.0.main_body:
294    liveins: $vgpr0
295
296    $sgpr0_sgpr1 = COPY $exec
297    $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
298    $vgpr0 = V_MOV_B32_e32 4, implicit $exec
299    $sgpr2_sgpr3 = S_OR_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
300    $exec = S_MOV_B64_term killed $sgpr2_sgpr3
301    SI_MASK_BRANCH %bb.2, implicit $exec
302    S_BRANCH %bb.1
303
304  bb.1.if:
305    liveins: $sgpr0_sgpr1
306
307    $sgpr7 = S_MOV_B32 61440
308    $sgpr6 = S_MOV_B32 -1
309    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
310
311  bb.2.end:
312    liveins: $vgpr0, $sgpr0_sgpr1
313
314    $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
315    $sgpr3 = S_MOV_B32 61440
316    $sgpr2 = S_MOV_B32 -1
317    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`)
318    S_ENDPGM
319
320...
321---
322# CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle
323# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
324# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
325# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
326# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
327# CHECK-NEXT: SI_MASK_BRANCH
328name:            optimize_if_and_saveexec_xor_valu_middle
329alignment:       0
330exposesReturnsTwice: false
331legalized:       false
332regBankSelected: false
333selected:        false
334tracksRegLiveness: true
335liveins:
336  - { reg: '$vgpr0' }
337frameInfo:
338  isFrameAddressTaken: false
339  isReturnAddressTaken: false
340  hasStackMap:     false
341  hasPatchPoint:   false
342  stackSize:       0
343  offsetAdjustment: 0
344  maxAlignment:    0
345  adjustsStack:    false
346  hasCalls:        false
347  maxCallFrameSize: 0
348  hasOpaqueSPAdjustment: false
349  hasVAStart:      false
350  hasMustTailInVarArgFunc: false
351body:             |
352  bb.0.main_body:
353    liveins: $vgpr0
354
355    $sgpr0_sgpr1 = COPY $exec
356    $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
357    $vgpr0 = V_MOV_B32_e32 4, implicit $exec
358    $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
359    BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`)
360    $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
361    $exec = S_MOV_B64_term killed $sgpr2_sgpr3
362    SI_MASK_BRANCH %bb.2, implicit $exec
363    S_BRANCH %bb.1
364
365  bb.1.if:
366    liveins: $sgpr0_sgpr1
367
368    $sgpr7 = S_MOV_B32 61440
369    $sgpr6 = S_MOV_B32 -1
370    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
371
372  bb.2.end:
373    liveins: $vgpr0, $sgpr0_sgpr1
374
375    $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
376    $sgpr3 = S_MOV_B32 61440
377    $sgpr2 = S_MOV_B32 -1
378    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`)
379    S_ENDPGM
380
381...
382---
383# CHECK-LABEL: name: optimize_if_and_saveexec_xor_wrong_reg{{$}}
384# CHECK: $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
385# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
386# CHECK-NEXT: $exec = COPY $sgpr0_sgpr1
387# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
388name:            optimize_if_and_saveexec_xor_wrong_reg
389alignment:       0
390exposesReturnsTwice: false
391legalized:       false
392regBankSelected: false
393selected:        false
394tracksRegLiveness: true
395liveins:
396  - { reg: '$vgpr0' }
397frameInfo:
398  isFrameAddressTaken: false
399  isReturnAddressTaken: false
400  hasStackMap:     false
401  hasPatchPoint:   false
402  stackSize:       0
403  offsetAdjustment: 0
404  maxAlignment:    0
405  adjustsStack:    false
406  hasCalls:        false
407  maxCallFrameSize: 0
408  hasOpaqueSPAdjustment: false
409  hasVAStart:      false
410  hasMustTailInVarArgFunc: false
411body:             |
412  bb.0.main_body:
413    liveins: $vgpr0
414
415    $sgpr6 = S_MOV_B32 -1
416    $sgpr7 = S_MOV_B32 61440
417    $sgpr0_sgpr1 = COPY $exec
418    $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
419    $vgpr0 = V_MOV_B32_e32 4, implicit $exec
420    $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
421    $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
422    $exec = S_MOV_B64_term $sgpr0_sgpr1
423    SI_MASK_BRANCH %bb.2, implicit $exec
424    S_BRANCH %bb.1
425
426  bb.1.if:
427    liveins: $sgpr0_sgpr1 , $sgpr4_sgpr5_sgpr6_sgpr7
428    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
429
430  bb.2.end:
431    liveins: $vgpr0, $sgpr0_sgpr1, $sgpr4_sgpr5_sgpr6_sgpr7
432
433    $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
434    $sgpr3 = S_MOV_B32 61440
435    $sgpr2 = S_MOV_B32 -1
436    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`)
437    S_ENDPGM
438
439...
440---
441# CHECK-LABEL: name: optimize_if_and_saveexec_xor_modify_copy_to_exec{{$}}
442# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
443# CHECK-NEXT: $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
444# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
445# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
446# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
447
448name:            optimize_if_and_saveexec_xor_modify_copy_to_exec
449alignment:       0
450exposesReturnsTwice: false
451legalized:       false
452regBankSelected: false
453selected:        false
454tracksRegLiveness: true
455liveins:
456  - { reg: '$vgpr0' }
457frameInfo:
458  isFrameAddressTaken: false
459  isReturnAddressTaken: false
460  hasStackMap:     false
461  hasPatchPoint:   false
462  stackSize:       0
463  offsetAdjustment: 0
464  maxAlignment:    0
465  adjustsStack:    false
466  hasCalls:        false
467  maxCallFrameSize: 0
468  hasOpaqueSPAdjustment: false
469  hasVAStart:      false
470  hasMustTailInVarArgFunc: false
471body:             |
472  bb.0.main_body:
473    liveins: $vgpr0
474
475    $sgpr0_sgpr1 = COPY $exec
476    $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
477    $vgpr0 = V_MOV_B32_e32 4, implicit $exec
478    $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
479    $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
480    $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
481    $exec = S_MOV_B64_term killed $sgpr2_sgpr3
482    SI_MASK_BRANCH %bb.2, implicit $exec
483    S_BRANCH %bb.1
484
485  bb.1.if:
486    liveins: $sgpr0_sgpr1
487
488    $sgpr7 = S_MOV_B32 61440
489    $sgpr6 = S_MOV_B32 -1
490    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
491
492  bb.2.end:
493    liveins: $vgpr0, $sgpr0_sgpr1
494
495    $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
496    $sgpr0 = S_MOV_B32 0
497    $sgpr1 = S_MOV_B32 1
498    $sgpr2 = S_MOV_B32 -1
499    $sgpr3 = S_MOV_B32 61440
500    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`)
501    S_ENDPGM
502
503...
504---
505# CHECK-LABEL: name: optimize_if_and_saveexec_xor_live_out_setexec{{$}}
506# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
507# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
508# CHECK-NEXT: $exec = COPY $sgpr2_sgpr3
509# CHECK-NEXT: SI_MASK_BRANCH
510name:            optimize_if_and_saveexec_xor_live_out_setexec
511alignment:       0
512exposesReturnsTwice: false
513legalized:       false
514regBankSelected: false
515selected:        false
516tracksRegLiveness: true
517liveins:
518  - { reg: '$vgpr0' }
519frameInfo:
520  isFrameAddressTaken: false
521  isReturnAddressTaken: false
522  hasStackMap:     false
523  hasPatchPoint:   false
524  stackSize:       0
525  offsetAdjustment: 0
526  maxAlignment:    0
527  adjustsStack:    false
528  hasCalls:        false
529  maxCallFrameSize: 0
530  hasOpaqueSPAdjustment: false
531  hasVAStart:      false
532  hasMustTailInVarArgFunc: false
533body:             |
534  bb.0.main_body:
535    liveins: $vgpr0
536
537    $sgpr0_sgpr1 = COPY $exec
538    $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
539    $vgpr0 = V_MOV_B32_e32 4, implicit $exec
540    $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
541    $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
542    $exec = S_MOV_B64_term $sgpr2_sgpr3
543    SI_MASK_BRANCH %bb.2, implicit $exec
544    S_BRANCH %bb.1
545
546  bb.1.if:
547    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
548    S_SLEEP 0, implicit $sgpr2_sgpr3
549    $sgpr7 = S_MOV_B32 61440
550    $sgpr6 = S_MOV_B32 -1
551    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
552
553  bb.2.end:
554    liveins: $vgpr0, $sgpr0_sgpr1
555
556    $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
557    $sgpr3 = S_MOV_B32 61440
558    $sgpr2 = S_MOV_B32 -1
559    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`)
560    S_ENDPGM
561
562...
563
564# CHECK-LABEL: name: optimize_if_unknown_saveexec{{$}}
565# CHECK: $sgpr0_sgpr1 = COPY $exec
566# CHECK: $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
567# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
568# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
569
570name:            optimize_if_unknown_saveexec
571alignment:       0
572exposesReturnsTwice: false
573legalized:       false
574regBankSelected: false
575selected:        false
576tracksRegLiveness: true
577liveins:
578  - { reg: '$vgpr0' }
579frameInfo:
580  isFrameAddressTaken: false
581  isReturnAddressTaken: false
582  hasStackMap:     false
583  hasPatchPoint:   false
584  stackSize:       0
585  offsetAdjustment: 0
586  maxAlignment:    0
587  adjustsStack:    false
588  hasCalls:        false
589  maxCallFrameSize: 0
590  hasOpaqueSPAdjustment: false
591  hasVAStart:      false
592  hasMustTailInVarArgFunc: false
593body:             |
594  bb.0.main_body:
595    liveins: $vgpr0
596
597    $sgpr0_sgpr1 = COPY $exec
598    $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
599    $vgpr0 = V_MOV_B32_e32 4, implicit $exec
600    $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
601    $exec = S_MOV_B64_term killed $sgpr2_sgpr3
602    SI_MASK_BRANCH %bb.2, implicit $exec
603    S_BRANCH %bb.1
604
605  bb.1.if:
606    liveins: $sgpr0_sgpr1
607
608    $sgpr7 = S_MOV_B32 61440
609    $sgpr6 = S_MOV_B32 -1
610    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
611
612  bb.2.end:
613    liveins: $vgpr0, $sgpr0_sgpr1
614
615    $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
616    $sgpr3 = S_MOV_B32 61440
617    $sgpr2 = S_MOV_B32 -1
618    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`)
619    S_ENDPGM
620
621...
622---
623# CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}}
624# CHECK: $sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
625# CHECK-NEXT: SI_MASK_BRANCH
626
627name:            optimize_if_andn2_saveexec
628alignment:       0
629exposesReturnsTwice: false
630legalized:       false
631regBankSelected: false
632selected:        false
633tracksRegLiveness: true
634liveins:
635  - { reg: '$vgpr0' }
636frameInfo:
637  isFrameAddressTaken: false
638  isReturnAddressTaken: false
639  hasStackMap:     false
640  hasPatchPoint:   false
641  stackSize:       0
642  offsetAdjustment: 0
643  maxAlignment:    0
644  adjustsStack:    false
645  hasCalls:        false
646  maxCallFrameSize: 0
647  hasOpaqueSPAdjustment: false
648  hasVAStart:      false
649  hasMustTailInVarArgFunc: false
650body:             |
651  bb.0.main_body:
652    liveins: $vgpr0
653
654    $sgpr0_sgpr1 = COPY $exec
655    $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
656    $vgpr0 = V_MOV_B32_e32 4, implicit $exec
657    $sgpr2_sgpr3 = S_ANDN2_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
658    $exec = S_MOV_B64_term killed $sgpr2_sgpr3
659    SI_MASK_BRANCH %bb.2, implicit $exec
660    S_BRANCH %bb.1
661
662  bb.1.if:
663    liveins: $sgpr0_sgpr1
664
665    $sgpr7 = S_MOV_B32 61440
666    $sgpr6 = S_MOV_B32 -1
667    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
668
669  bb.2.end:
670    liveins: $vgpr0, $sgpr0_sgpr1
671
672    $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
673    $sgpr3 = S_MOV_B32 61440
674    $sgpr2 = S_MOV_B32 -1
675    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`)
676    S_ENDPGM
677
678...
679---
680# CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}}
681# CHECK: $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
682# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
683# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
684name:            optimize_if_andn2_saveexec_no_commute
685alignment:       0
686exposesReturnsTwice: false
687legalized:       false
688regBankSelected: false
689selected:        false
690tracksRegLiveness: true
691liveins:
692  - { reg: '$vgpr0' }
693frameInfo:
694  isFrameAddressTaken: false
695  isReturnAddressTaken: false
696  hasStackMap:     false
697  hasPatchPoint:   false
698  stackSize:       0
699  offsetAdjustment: 0
700  maxAlignment:    0
701  adjustsStack:    false
702  hasCalls:        false
703  maxCallFrameSize: 0
704  hasOpaqueSPAdjustment: false
705  hasVAStart:      false
706  hasMustTailInVarArgFunc: false
707body:             |
708  bb.0.main_body:
709    liveins: $vgpr0
710
711    $sgpr0_sgpr1 = COPY $exec
712    $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
713    $vgpr0 = V_MOV_B32_e32 4, implicit $exec
714    $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
715    $exec = S_MOV_B64_term killed $sgpr2_sgpr3
716    SI_MASK_BRANCH %bb.2, implicit $exec
717    S_BRANCH %bb.1
718
719  bb.1.if:
720    liveins: $sgpr0_sgpr1
721
722    $sgpr7 = S_MOV_B32 61440
723    $sgpr6 = S_MOV_B32 -1
724    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `i32 addrspace(1)* undef`)
725
726  bb.2.end:
727    liveins: $vgpr0, $sgpr0_sgpr1
728
729    $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
730    $sgpr3 = S_MOV_B32 61440
731    $sgpr2 = S_MOV_B32 -1
732    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(1)* undef`)
733    S_ENDPGM
734
735...
736