1# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX10 %s 2 3# GFX10-LABEL: name: diffoporder_add 4# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0 5# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 6 7name: diffoporder_add 8body: | 9 bb.0.entry: 10 %0:sgpr_64 = COPY $sgpr0_sgpr1 11 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 12 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 13 %4:sreg_32_xm0 = COPY $sgpr101 14 %5:sreg_32_xm0 = S_MOV_B32 0 15 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 16 $sgpr4 = COPY %4 17 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 18 %6:vreg_64 = COPY $vgpr0_vgpr1 19 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec 20 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 21 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 22 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec 23 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec 24 %12:sgpr_32 = COPY %1.sub1 25 %13:vgpr_32 = COPY %5 26 %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec 27 %16:vgpr_32 = COPY %12 28 %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec 29 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 30 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec 31 %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec 32 %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec 33 %25:sgpr_32 = S_MOV_B32 4096 34 %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec 35 %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec 36 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 37 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec 38 %32:sgpr_32 = S_MOV_B32 6144 39 %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec 40 %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec 41 %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 42 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec 43... 44--- 45 46# GFX10-LABEL: name: LowestInMiddle 47# GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 6400 48# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] 49# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] 50# GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1 51# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 1600, 0 52# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 53# 54# GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 11200 55# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]] 56# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]] 57# GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1 58# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 59 60name: LowestInMiddle 61body: | 62 bb.0.entry: 63 %0:sgpr_64 = COPY $sgpr0_sgpr1 64 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 65 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 66 %4:sreg_32_xm0 = COPY $sgpr101 67 %5:sreg_32_xm0 = S_MOV_B32 0 68 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 69 $sgpr4 = COPY %4 70 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 71 %6:vreg_64 = COPY $vgpr0_vgpr1 72 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec 73 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 74 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 75 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec 76 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec 77 %12:sgpr_32 = COPY %1.sub1 78 %13:vgpr_32 = COPY %5 79 %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec 80 %16:vgpr_32 = COPY %12 81 %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec 82 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 83 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec 84 %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec 85 %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec 86 %25:sgpr_32 = S_MOV_B32 8000 87 %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec 88 %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec 89 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 90 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec 91 %32:sgpr_32 = S_MOV_B32 6400 92 %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec 93 %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec 94 %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 95 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec 96 %39:sgpr_32 = S_MOV_B32 11200 97 %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec 98 %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec 99 %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 100 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec 101... 102--- 103 104# GFX10-LABEL: name: NegativeDistance 105# GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 8192 106# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] 107# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] 108# GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1 109# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0 110# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0 111# GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 10240 112# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]] 113# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]] 114# GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1 115# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0 116 117name: NegativeDistance 118body: | 119 bb.0.entry: 120 %0:sgpr_64 = COPY $sgpr0_sgpr1 121 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 122 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 123 %4:sreg_32_xm0 = COPY $sgpr101 124 %5:sreg_32_xm0 = S_MOV_B32 0 125 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 126 $sgpr4 = COPY %4 127 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 128 %6:vreg_64 = COPY $vgpr0_vgpr1 129 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec 130 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 131 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 132 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec 133 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec 134 %12:sgpr_32 = COPY %1.sub1 135 %13:vgpr_32 = COPY %5 136 %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec 137 %16:vgpr_32 = COPY %12 138 %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec 139 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 140 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec 141 %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec 142 %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec 143 %25:sgpr_32 = S_MOV_B32 6144 144 %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec 145 %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec 146 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 147 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec 148 %32:sgpr_32 = S_MOV_B32 8192 149 %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec 150 %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec 151 %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 152 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec 153 %39:sgpr_32 = S_MOV_B32 10240 154 %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec 155 %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec 156 %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 157 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec 158... 159--- 160 161# Tests for a successful compilation. 162name: assert_hit 163body: | 164 bb.0.entry: 165 %0:sgpr_64 = COPY $sgpr0_sgpr1 166 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 167 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 168 %4:sreg_32_xm0 = COPY $sgpr101 169 %5:sreg_32_xm0 = S_MOV_B32 0 170 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 171 $sgpr4 = COPY %4 172 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 173 %6:vreg_64 = COPY $vgpr0_vgpr1 174 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec 175 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 176 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 177 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec 178 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec 179 %12:sgpr_32 = COPY %1.sub1 180 %13:vgpr_32 = COPY %5 181 %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec 182 %16:vgpr_32 = COPY %12 183 %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec 184 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 185 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec 186 %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec 187 %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec 188 189 %25:sgpr_32 = S_MOV_B32 6144 190 %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec 191 %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec 192 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 193 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec 194... 195--- 196 197# GFX10-LABEL: name: diffoporder_add_store 198# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0 199# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0 200 201name: diffoporder_add_store 202body: | 203 bb.0.entry: 204 205 %0:vreg_64 = COPY $vgpr0_vgpr1 206 207 %1:sgpr_32 = S_MOV_B32 4000 208 %2:vgpr_32, %3:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec 209 %4:vgpr_32, dead %5:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec 210 %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 211 GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec 212 213 %8:sgpr_32 = S_MOV_B32 3000 214 %9:vgpr_32, %10:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec 215 %11:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec 216 %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 217 GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec 218... 219