1# RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s 2 3# GFX9-LABEL: name: diffoporder_add 4 5# GFX9: %{{[0-9]+}}:vreg_64_align2 = REG_SEQUENCE 6 7# GFX9: S_MOV_B32 6144 8# GFX9-NEXT: V_ADD_CO_U32 9# GFX9-NEXT: V_ADDC_U32 10# GFX9-NEXT: [[PTR0:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE 11# GFX9-NEXT: %{{[0-9]+}}:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[PTR0]], -2048, 0 12# GFX9: %{{[0-9]+}}:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[PTR0]], 0, 0 13 14name: diffoporder_add 15body: | 16 bb.0.entry: 17 %0:sgpr_64 = COPY $sgpr0_sgpr1 18 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 19 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 20 %4:sreg_32_xm0 = COPY $sgpr101 21 %5:sreg_32_xm0 = S_MOV_B32 0 22 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 23 $sgpr4 = COPY %4 24 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 25 %6:vreg_64_align2 = COPY $vgpr0_vgpr1 26 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec 27 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 28 %9:vreg_64_align2 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 29 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec 30 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec 31 %12:sgpr_32 = COPY %1.sub1 32 %13:vgpr_32 = COPY %5 33 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec 34 %16:vgpr_32 = COPY %12 35 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec 36 %19:vreg_64_align2 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 37 %20:vreg_64_align2 = V_LSHLREV_B64_e64 3, %9, implicit $exec 38 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec 39 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec 40 %25:sgpr_32 = S_MOV_B32 4096 41 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec 42 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec 43 %30:vreg_64_align2 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 44 %31:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec 45 %32:sgpr_32 = S_MOV_B32 6144 46 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec 47 %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec 48 %37:vreg_64_align2 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 49 %38:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec 50... 51--- 52 53# GFX9-LABEL: name: LowestInMiddle 54# GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200 55# GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] 56# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] 57# GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1 58# GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -3200, 0 59# 60# GFX9: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 6400 61# GFX9: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]] 62# GFX9: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]] 63# GFX9: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1 64# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 65# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 66 67name: LowestInMiddle 68body: | 69 bb.0.entry: 70 %0:sgpr_64 = COPY $sgpr0_sgpr1 71 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 72 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 73 %4:sreg_32_xm0 = COPY $sgpr101 74 %5:sreg_32_xm0 = S_MOV_B32 0 75 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 76 $sgpr4 = COPY %4 77 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 78 %6:vreg_64_align2 = COPY $vgpr0_vgpr1 79 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec 80 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 81 %9:vreg_64_align2 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 82 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec 83 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec 84 %12:sgpr_32 = COPY %1.sub1 85 %13:vgpr_32 = COPY %5 86 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec 87 %16:vgpr_32 = COPY %12 88 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec 89 %19:vreg_64_align2 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 90 %20:vreg_64_align2 = V_LSHLREV_B64_e64 3, %9, implicit $exec 91 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec 92 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec 93 %25:sgpr_32 = S_MOV_B32 8000 94 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec 95 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec 96 %30:vreg_64_align2 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 97 %31:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec 98 %32:sgpr_32 = S_MOV_B32 6400 99 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec 100 %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec 101 %37:vreg_64_align2 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 102 %38:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec 103 %39:sgpr_32 = S_MOV_B32 11200 104 %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec 105 %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec 106 %44:vreg_64_align2 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 107 %45:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec 108... 109--- 110 111# GFX9-LABEL: name: NegativeDistance 112# GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240 113# GFX9: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] 114# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] 115# GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_4]], %subreg.sub0, [[BASE_HI]], %subreg.sub1 116# GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -4096, 0 117# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0 118# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0 119 120name: NegativeDistance 121body: | 122 bb.0.entry: 123 %0:sgpr_64 = COPY $sgpr0_sgpr1 124 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 125 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 126 %4:sreg_32_xm0 = COPY $sgpr10 127 %5:sreg_32_xm0 = S_MOV_B32 0 128 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 129 $sgpr4 = COPY %4 130 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 131 %6:vreg_64_align2 = COPY $vgpr0_vgpr1 132 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec 133 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 134 %9:vreg_64_align2 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 135 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec 136 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec 137 %12:sgpr_32 = COPY %1.sub1 138 %13:vgpr_32 = COPY %5 139 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec 140 %16:vgpr_32 = COPY %12 141 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec 142 %19:vreg_64_align2 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 143 %20:vreg_64_align2 = V_LSHLREV_B64_e64 3, %9, implicit $exec 144 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec 145 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec 146 %25:sgpr_32 = S_MOV_B32 6144 147 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec 148 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec 149 %30:vreg_64_align2 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 150 %31:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec 151 %32:sgpr_32 = S_MOV_B32 8192 152 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec 153 %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec 154 %37:vreg_64_align2 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 155 %38:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec 156 %39:sgpr_32 = S_MOV_B32 10240 157 %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec 158 %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec 159 %44:vreg_64_align2 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 160 %45:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec 161... 162--- 163 164# Tests for a successful compilation. 165name: assert_hit 166body: | 167 bb.0.entry: 168 %0:sgpr_64 = COPY $sgpr0_sgpr1 169 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 170 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 171 %4:sreg_32_xm0 = COPY $sgpr101 172 %5:sreg_32_xm0 = S_MOV_B32 0 173 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 174 $sgpr4 = COPY %4 175 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 176 %6:vreg_64_align2 = COPY $vgpr0_vgpr1 177 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec 178 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 179 %9:vreg_64_align2 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 180 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec 181 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec 182 %12:sgpr_32 = COPY %1.sub1 183 %13:vgpr_32 = COPY %5 184 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec 185 %16:vgpr_32 = COPY %12 186 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec 187 %19:vreg_64_align2 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 188 %20:vreg_64_align2 = V_LSHLREV_B64_e64 3, %9, implicit $exec 189 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec 190 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec 191 192 %25:sgpr_32 = S_MOV_B32 6144 193 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec 194 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec 195 %30:vreg_64_align2 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 196 %31:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec 197... 198--- 199 200# GFX9-LABEL: name: diffoporder_add_store 201# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0, 202# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, 203 204name: diffoporder_add_store 205body: | 206 bb.0.entry: 207 208 %0:vreg_64_align2 = COPY $vgpr0_vgpr1 209 210 %1:sgpr_32 = S_MOV_B32 4000 211 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec 212 %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec 213 %6:vreg_64_align2 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 214 GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec 215 216 %8:sgpr_32 = S_MOV_B32 3000 217 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec 218 %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec 219 %13:vreg_64_align2 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 220 GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec 221... 222