1# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -o - -run-pass=machine-cse %s | FileCheck %s 2 3# LLVM's current definition of `isConvergent` does not necessarily prove that 4# non-local CSE is illegal. The following test extends the definition of 5# `isConvergent` to assume a convergent instruction is dependent not only on 6# additional conditions, but also on fewer conditions. LLVM does not have a 7# MachineInstr attribute which expresses this extended definition, so it's 8# necessary to use `isConvergent` to prevent illegally CSE-ing the subset of 9# `isConvergent` instructions which do fall into this extended definition. 10 11# This is a coverage test for the MachineCSE change. It does not reproduce an 12# actual bug in the AMDGPU backend. The current open source GPU backends as is 13# do not appear to allow a reasonably simple test case that provably and 14# undeniably functionally breaks without the associated MachineCSE changes. 15 16# The test checks that we don't CSE non-local convergent instrs. Otherwise, 17# reusing defs of convergent instrs from different control flow scopes can 18# cause illegal codegen. Previously, the swizzle in bb2 would be CSE-ed in 19# favor of using the swizzle in bb1 despite bb2 being a different BBs. 20 21# CHECK-LABEL: name: no_cse 22# CHECK: bb.1.if.then 23# CHECK: [[SWIZZLE1:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[SRC:%[0-9]+]], 100, 0, implicit $exec 24# CHECK-NEXT: V_ADD_CO_U32_e64 [[SWIZZLE1]], {{%[0-9]+}}, 0, implicit $exec 25# CHECK-NEXT: S_CMP_LT_I32 {{.*}} implicit-def $scc 26# CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit $scc 27# CHECK-NEXT: S_BRANCH %bb.2 28# CHECK: bb.2.if.then.if.then 29# CHECK: [[SWIZZLE2:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[SRC]], 100, 0, implicit $exec 30# CHECK-NEXT: V_ADD_CO_U32_e64 [[SWIZZLE2]], {{%[0-9]+}}, 0, implicit $exec 31 32--- | 33 define amdgpu_kernel void @no_cse(i32 addrspace(1)*, i32, i1) { 34 entry: 35 unreachable 36 if.then: 37 unreachable 38 if.then.if.then: 39 unreachable 40 if.then.phi: 41 unreachable 42 exit: 43 unreachable 44 } 45... 46--- 47name: no_cse 48tracksRegLiveness: true 49body: | 50 bb.0.entry: 51 liveins: $sgpr4_sgpr5 52 %0:sgpr_64(p4) = COPY $sgpr4_sgpr5 53 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0(p4), 0, 0 54 %2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0(p4), 2, 0 55 %3:sreg_64 = COPY %1 56 %4:sreg_32 = COPY %2.sub1 57 %5:sreg_32 = S_MOV_B32 42 58 S_CMP_EQ_U32 %4, %5, implicit-def $scc 59 %6:vgpr_32 = COPY %5, implicit $exec 60 S_CBRANCH_SCC1 %bb.4, implicit $scc 61 S_BRANCH %bb.1 62 63 bb.1.if.then: 64 %7:sreg_32 = COPY %2.sub0 65 %8:vgpr_32 = COPY %7 66 %9:vgpr_32 = DS_SWIZZLE_B32 %8, 100, 0, implicit $exec 67 %10:vgpr_32, %21:sreg_32 = V_ADD_CO_U32_e64 %9, %5, 0, implicit $exec 68 S_CMP_LT_I32 %7, %5, implicit-def $scc 69 S_CBRANCH_SCC1 %bb.3, implicit $scc 70 S_BRANCH %bb.2 71 72 bb.2.if.then.if.then: 73 %11:sreg_32 = S_MOV_B32 64 74 %12:vgpr_32 = DS_SWIZZLE_B32 %8, 100, 0, implicit $exec 75 %13:vgpr_32, %24:sreg_32 = V_ADD_CO_U32_e64 %12, %11, 0, implicit $exec 76 77 bb.3.if.then.phi: 78 %14:vgpr_32 = PHI %10, %bb.1, %13, %bb.2 79 80 bb.4.exit: 81 %15:vgpr_32 = PHI %6, %bb.0, %14, %bb.3 82 %16:vreg_64 = COPY %3 83 FLAT_STORE_DWORD %16, %15, 0, 0, implicit $exec, implicit $flat_scr 84 S_ENDPGM 0 85 86... 87