1# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN 2 3--- 4# old is undefined: only combine when masks are fully enabled and 5# bound_ctrl:0 is set, otherwise the result of DPP VALU op can be undefined. 6# GCN-LABEL: name: old_is_undef 7# GCN: %2:vgpr_32 = IMPLICIT_DEF 8# VOP2: 9# GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec 10# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec 11# GCN: %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec 12# GCN: %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec 13# VOP1: 14# GCN: %12:vgpr_32 = V_NOT_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 15# GCN: %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec 16# GCN: %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 17# GCN: %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 18name: old_is_undef 19tracksRegLiveness: true 20body: | 21 bb.0: 22 liveins: $vgpr0, $vgpr1 23 %0:vgpr_32 = COPY $vgpr0 24 %1:vgpr_32 = COPY $vgpr1 25 %2:vgpr_32 = IMPLICIT_DEF 26 27 ; VOP2 28 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 29 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 30 31 %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 32 %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec 33 34 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 35 %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec 36 37 %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 38 %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec 39 40 ; VOP1 41 %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 42 %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec 43 44 %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 45 %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec 46 47 %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 48 %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 49 50 %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 51 %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 52... 53 54# old is zero cases: 55 56# GCN-LABEL: name: old_is_0 57 58# VOP2: 59# case 1: old is zero, masks are fully enabled, bound_ctrl:0 is on: 60# the DPP mov result would be either zero ({src lane disabled}|{src lane is 61# out of range}) or active src lane result - can combine with old = undef. 62# undef is preffered as it makes life easier for the regalloc. 63# GCN: [[U1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 64# GCN: %4:vgpr_32 = V_ADD_U32_dpp [[U1]], %0, %1, 1, 15, 15, 1, implicit $exec 65 66# case 2: old is zero, masks are fully enabled, bound_ctrl:0 is off: 67# as the DPP mov old is zero this case is no different from case 1 - combine it 68# setting bound_ctrl0 on for the combined DPP VALU op to make old undefined 69# GCN: [[U2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 70# GCN: %6:vgpr_32 = V_ADD_U32_dpp [[U2]], %0, %1, 1, 15, 15, 1, implicit $exec 71 72# case 3: masks are partialy disabled, bound_ctrl:0 is on: 73# the DPP mov result would be either zero ({src lane disabled}|{src lane is 74# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or 75# active src lane result - can combine with old = src1 of the VALU op. 76# The VALU op should have the same masks as DPP mov as they select lanes 77# with identity value. 78# Special case: the bound_ctrl for the combined DPP VALU op isn't important 79# here but let's make it off to keep the combiner's logic simpler. 80# GCN: %8:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 81 82# case 4: masks are partialy disabled, bound_ctrl:0 is off: 83# the DPP mov result would be either zero ({src lane disabled}|{src lane is 84# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or 85# active src lane result - can combine with old = src1 of the VALU op. 86# The VALU op should have the same masks as DPP mov as they select 87# lanes with identity value 88# GCN: %10:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 89 90# VOP1: 91# see case 1 92# GCN: [[U3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 93# GCN: %12:vgpr_32 = V_NOT_B32_dpp [[U3]], %0, 1, 15, 15, 1, implicit $exec 94# see case 2 95# GCN: [[U4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 96# GCN: %14:vgpr_32 = V_NOT_B32_dpp [[U4]], %0, 1, 15, 15, 1, implicit $exec 97# case 3 and 4 not appliable as there is no way to specify unchanged result 98# for the unary VALU op 99# GCN: %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 100# GCN: %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 101 102name: old_is_0 103tracksRegLiveness: true 104body: | 105 bb.0: 106 liveins: $vgpr0, $vgpr1 107 %0:vgpr_32 = COPY $vgpr0 108 %1:vgpr_32 = COPY $vgpr1 109 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 110 111 ; VOP2 112 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 113 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 114 115 %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 116 %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec 117 118 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 119 %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec 120 121 %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 122 %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec 123 124 ; VOP1 125 %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 126 %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec 127 128 %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 129 %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec 130 131 %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 132 %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 133 134 %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 135 %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 136... 137 138# old is nonzero identity cases: 139 140# old is nonzero identity, masks are fully enabled, bound_ctrl:0 is off: 141# the DPP mov result would be either identity ({src lane disabled}|{out of 142# range}) or src lane result - can combine with old = src1 of the VALU op 143# The DPP VALU op should have the same masks (and bctrl) as DPP mov as they 144# select lanes with identity value 145 146# GCN-LABEL: name: nonzero_old_is_identity_masks_enabled_bctl_off 147# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 148# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 149# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 150# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 151 152name: nonzero_old_is_identity_masks_enabled_bctl_off 153tracksRegLiveness: true 154body: | 155 bb.0: 156 liveins: $vgpr0, $vgpr1 157 %0:vgpr_32 = COPY $vgpr0 158 %1:vgpr_32 = COPY $vgpr1 159 160 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 161 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 162 %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 163 164 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 165 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 0, implicit $exec 166 %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 167 168 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 169 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 15, 15, 0, implicit $exec 170 %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 171 172 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 173 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 15, 0, implicit $exec 174 %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 175... 176 177# old is nonzero identity, masks are partially enabled, bound_ctrl:0 is off: 178# the DPP mov result would be either identity ({src lane disabled}|{src lane is 179# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or 180# active src lane result - can combine with old = src1 of the VALU op. 181# The DPP VALU op should have the same masks (and bctrl) as DPP mov as they 182# select lanes with identity value 183 184# GCN-LABEL: name: nonzero_old_is_identity_masks_partially_disabled_bctl_off 185# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 186# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 187# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 188# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 189 190name: nonzero_old_is_identity_masks_partially_disabled_bctl_off 191tracksRegLiveness: true 192body: | 193 bb.0: 194 liveins: $vgpr0, $vgpr1 195 %0:vgpr_32 = COPY $vgpr0 196 %1:vgpr_32 = COPY $vgpr1 197 198 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 199 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 200 %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 201 202 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 203 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec 204 %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 205 206 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 207 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec 208 %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 209 210 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 211 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec 212 %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 213... 214 215# old is nonzero identity, masks are partially enabled, bound_ctrl:0 is on: 216# the DPP mov result may have 3 different values: 217# 1. the active src lane result 218# 2. 0 if the src lane is disabled|out of range 219# 3. DPP mov's old value if the mov's dest VGPR write is disabled by masks 220# can't combine 221 222# GCN-LABEL: name: nonzero_old_is_identity_masks_partially_disabled_bctl0 223# GCN: %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 224# GCN: %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 225# GCN: %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 226# GCN: %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 227 228name: nonzero_old_is_identity_masks_partially_disabled_bctl0 229tracksRegLiveness: true 230body: | 231 bb.0: 232 liveins: $vgpr0, $vgpr1 233 %0:vgpr_32 = COPY $vgpr0 234 %1:vgpr_32 = COPY $vgpr1 235 236 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 237 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 238 %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 239 240 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 241 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 1, implicit $exec 242 %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 243 244 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 245 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 1, implicit $exec 246 %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 247 248 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 249 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 1, implicit $exec 250 %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 251... 252 253# when the DPP source isn't a src0 operand the operation should be commuted if possible 254# GCN-LABEL: name: dpp_commute 255# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 256# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 257# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 258# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 259# GCN: %16:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 260# GCN: %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec 261name: dpp_commute 262tracksRegLiveness: true 263body: | 264 bb.0: 265 liveins: $vgpr0, $vgpr1 266 267 %0:vgpr_32 = COPY $vgpr0 268 %1:vgpr_32 = COPY $vgpr1 269 270 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 271 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 272 %4:vgpr_32 = V_MUL_U32_U24_e32 %1, %3, implicit $exec 273 274 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 275 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec 276 %7:vgpr_32 = V_AND_B32_e32 %1, %6, implicit $exec 277 278 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 279 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec 280 %10:vgpr_32 = V_MAX_I32_e32 %1, %9, implicit $exec 281 282 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 283 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec 284 %13:vgpr_32 = V_MIN_I32_e32 %1, %12, implicit $exec 285 286 %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 287 %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec 288 %16:vgpr_32 = V_SUB_CO_U32_e32 %1, %15, implicit-def $vcc, implicit $exec 289 290 ; this cannot be combined because immediate as src0 isn't commutable 291 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 292 %18:vgpr_32 = V_MOV_B32_dpp %17, %0, 1, 14, 15, 0, implicit $exec 293 %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec 294... 295 296--- 297 298# check for floating point modifiers 299# GCN-LABEL: name: add_f32_e64 300# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 301# GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec 302# GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 303# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 304# GCN: %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec 305 306name: add_f32_e64 307tracksRegLiveness: true 308body: | 309 bb.0: 310 liveins: $vgpr0, $vgpr1 311 312 %0:vgpr_32 = COPY $vgpr0 313 %1:vgpr_32 = COPY $vgpr1 314 %2:vgpr_32 = IMPLICIT_DEF 315 316 ; this shouldn't be combined as omod is set 317 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 318 %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec 319 320 ; this should be combined as all modifiers are default 321 %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 322 %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec 323 324 ; this should be combined as modifiers other than abs|neg are default 325 %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 326 %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec 327 328 ; this shouldn't be combined as modifiers aren't abs|neg 329 %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 330 %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec 331... 332 333# check for e64 modifiers 334# GCN-LABEL: name: add_u32_e64 335# GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec 336# GCN: %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec 337 338name: add_u32_e64 339tracksRegLiveness: true 340body: | 341 bb.0: 342 liveins: $vgpr0, $vgpr1 343 344 %0:vgpr_32 = COPY $vgpr0 345 %1:vgpr_32 = COPY $vgpr1 346 %2:vgpr_32 = IMPLICIT_DEF 347 348 ; this should be combined as all modifiers are default 349 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec 350 %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec 351 352 ; this shouldn't be combined as clamp is set 353 %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec 354 %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec 355... 356 357# tests on sequences of dpp consumers 358# GCN-LABEL: name: dpp_seq 359# GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 360# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 361# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 362# broken sequence: 363# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 364 365name: dpp_seq 366tracksRegLiveness: true 367body: | 368 bb.0: 369 liveins: $vgpr0, $vgpr1 370 %0:vgpr_32 = COPY $vgpr0 371 %1:vgpr_32 = COPY $vgpr1 372 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 373 374 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 375 %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec 376 %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec 377 %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec 378 379 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 380 %8:vgpr_32 = V_ADD_CO_U32_e32 %7, %1, implicit-def $vcc, implicit $exec 381 ; this breaks the sequence 382 %9:vgpr_32 = V_SUB_CO_U32_e32 5, %7, implicit-def $vcc, implicit $exec 383... 384 385# tests on sequences of dpp consumers followed by control flow 386# GCN-LABEL: name: dpp_seq_cf 387# GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 388# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 389# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 390 391name: dpp_seq_cf 392tracksRegLiveness: true 393body: | 394 bb.0: 395 successors: %bb.1, %bb.2 396 liveins: $vgpr0, $vgpr1 397 %0:vgpr_32 = COPY $vgpr0 398 %1:vgpr_32 = COPY $vgpr1 399 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 400 401 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 402 %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec 403 %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec 404 %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec 405 406 %7:sreg_64 = V_CMP_EQ_U32_e64 %5, %6, implicit $exec 407 %8:sreg_64 = SI_IF %7, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec 408 S_BRANCH %bb.1 409 410 bb.1: 411 successors: %bb.2 412 413 bb.2: 414 SI_END_CF %8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec 415... 416 417# old reg def is in diff BB - cannot combine 418# GCN-LABEL: name: old_in_diff_bb 419# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec 420 421name: old_in_diff_bb 422tracksRegLiveness: true 423body: | 424 bb.0: 425 successors: %bb.1 426 liveins: $vgpr0, $vgpr1 427 428 %0:vgpr_32 = COPY $vgpr0 429 %1:vgpr_32 = COPY $vgpr1 430 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 431 S_BRANCH %bb.1 432 433 bb.1: 434 %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec 435 %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec 436... 437 438# old reg def is in diff BB but bound_ctrl:0 - can combine 439# GCN-LABEL: name: old_in_diff_bb_bctrl_zero 440# GCN: %4:vgpr_32 = V_ADD_U32_dpp {{%[0-9]}}, %0, %1, 1, 15, 15, 1, implicit $exec 441 442name: old_in_diff_bb_bctrl_zero 443tracksRegLiveness: true 444body: | 445 bb.0: 446 successors: %bb.1 447 liveins: $vgpr0, $vgpr1 448 449 %0:vgpr_32 = COPY $vgpr0 450 %1:vgpr_32 = COPY $vgpr1 451 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 452 S_BRANCH %bb.1 453 454 bb.1: 455 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 456 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 457... 458 459# EXEC mask changed between def and use - cannot combine 460# GCN-LABEL: name: exec_changed 461# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 462 463name: exec_changed 464tracksRegLiveness: true 465body: | 466 bb.0: 467 liveins: $vgpr0, $vgpr1 468 469 %0:vgpr_32 = COPY $vgpr0 470 %1:vgpr_32 = COPY $vgpr1 471 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 472 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 473 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 474 %5:sreg_64 = COPY $exec, implicit-def $exec 475 %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 476... 477 478# test if $old definition is correctly tracked through subreg manipulation pseudos 479 480# GCN-LABEL: name: mul_old_subreg 481# GCN: %7:vgpr_32 = V_MUL_I32_I24_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec 482 483name: mul_old_subreg 484tracksRegLiveness: true 485body: | 486 bb.0: 487 liveins: $vgpr0, $vgpr1 488 489 %0:vreg_64 = COPY $vgpr0 490 %1:vgpr_32 = COPY $vgpr1 491 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 492 %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec 493 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 494 %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4 495 %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec 496 %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec 497... 498 499# GCN-LABEL: name: add_old_subreg 500# GCN: %5:vgpr_32 = V_ADD_U32_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec 501 502name: add_old_subreg 503tracksRegLiveness: true 504body: | 505 bb.0: 506 liveins: $vgpr0, $vgpr1 507 508 %0:vreg_64 = COPY $vgpr0 509 %1:vgpr_32 = COPY $vgpr1 510 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 511 %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted 512 %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec 513 %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec 514... 515 516# GCN-LABEL: name: add_old_subreg_undef 517# GCN: %5:vgpr_32 = V_ADD_U32_dpp undef %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec 518 519name: add_old_subreg_undef 520tracksRegLiveness: true 521body: | 522 bb.0: 523 liveins: $vgpr0, $vgpr1 524 525 %0:vreg_64 = COPY $vgpr0 526 %1:vgpr_32 = COPY $vgpr1 527 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 528 %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef 529 %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec 530 %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec 531... 532 533# Test instruction which does not have modifiers in VOP1 form but does in DPP form. 534# GCN-LABEL: name: dpp_vop1 535# GCN: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 536name: dpp_vop1 537tracksRegLiveness: true 538body: | 539 bb.0: 540 %1:vgpr_32 = IMPLICIT_DEF 541 %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 542 %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec 543... 544 545# Test instruction which does not have modifiers in VOP2 form but does in DPP form. 546# GCN-LABEL: name: dpp_min 547# GCN: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 548name: dpp_min 549tracksRegLiveness: true 550body: | 551 bb.0: 552 %1:vgpr_32 = IMPLICIT_DEF 553 %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 554 %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec 555... 556 557# Test an undef old operand 558# GCN-LABEL: name: dpp_undef_old 559# GCN: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 560name: dpp_undef_old 561tracksRegLiveness: true 562body: | 563 bb.0: 564 %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 565 %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec 566... 567 568# Do not combine a dpp mov which writes a physreg. 569# GCN-LABEL: name: phys_dpp_mov_dst 570# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec 571# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec 572name: phys_dpp_mov_dst 573tracksRegLiveness: true 574body: | 575 bb.0: 576 $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 577 %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec 578... 579 580# Do not combine a dpp mov which reads a physreg. 581# GCN-LABEL: name: phys_dpp_mov_old_src 582# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec 583# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec 584name: phys_dpp_mov_old_src 585tracksRegLiveness: true 586body: | 587 bb.0: 588 %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 589 %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec 590... 591 592# Do not combine a dpp mov which reads a physreg. 593# GCN-LABEL: name: phys_dpp_mov_src 594# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec 595# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec 596name: phys_dpp_mov_src 597tracksRegLiveness: true 598body: | 599 bb.0: 600 %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec 601 %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec 602... 603 604# GCN-LABEL: name: dpp_reg_sequence_both_combined 605# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 606# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 607# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 608# GCN: %9:vgpr_32 = IMPLICIT_DEF 609# GCN: %8:vgpr_32 = IMPLICIT_DEF 610# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 611# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 612name: dpp_reg_sequence_both_combined 613tracksRegLiveness: true 614body: | 615 bb.0: 616 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 617 618 %0:vreg_64 = COPY $vgpr0_vgpr1 619 %1:vreg_64 = COPY $vgpr2_vgpr3 620 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 621 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 622 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 623 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 624 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 625 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 626... 627 628# GCN-LABEL: name: dpp_reg_sequence_first_combined 629# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 630# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 631# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 632# GCN: %8:vgpr_32 = IMPLICIT_DEF 633# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 634# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1 635# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 636# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 637name: dpp_reg_sequence_first_combined 638tracksRegLiveness: true 639body: | 640 bb.0: 641 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 642 643 %0:vreg_64 = COPY $vgpr0_vgpr1 644 %1:vreg_64 = COPY $vgpr2_vgpr3 645 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 646 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 647 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 648 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 649 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 650 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 651... 652 653# GCN-LABEL: name: dpp_reg_sequence_second_combined 654# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 655# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 656# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 657# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 658# GCN: %8:vgpr_32 = IMPLICIT_DEF 659# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1 660# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec 661# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 662name: dpp_reg_sequence_second_combined 663tracksRegLiveness: true 664body: | 665 bb.0: 666 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 667 668 %0:vreg_64 = COPY $vgpr0_vgpr1 669 %1:vreg_64 = COPY $vgpr2_vgpr3 670 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 671 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 672 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 673 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 674 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 675 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 676... 677 678# GCN-LABEL: name: dpp_reg_sequence_none_combined 679# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 680# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 681# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 682# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 683# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 684# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 685# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec 686# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 687name: dpp_reg_sequence_none_combined 688tracksRegLiveness: true 689body: | 690 bb.0: 691 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 692 693 %0:vreg_64 = COPY $vgpr0_vgpr1 694 %1:vreg_64 = COPY $vgpr2_vgpr3 695 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 696 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 697 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 698 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 699 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 700 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 701... 702 703# GCN-LABEL: name: dpp_reg_sequence_exec_changed 704# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 705# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 706# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 707# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 708# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 709# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 710# GCN: S_BRANCH %bb.1 711# GCN: bb.1: 712# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec 713# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 714name: dpp_reg_sequence_exec_changed 715tracksRegLiveness: true 716body: | 717 bb.0: 718 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 719 720 %0:vreg_64 = COPY $vgpr0_vgpr1 721 %1:vreg_64 = COPY $vgpr2_vgpr3 722 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 723 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 724 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 725 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 726 S_BRANCH %bb.1 727 728 bb.1: 729 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 730 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 731... 732 733# GCN-LABEL: name: dpp_reg_sequence_subreg 734# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 735# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 736# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 737# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 738# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 739# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 740# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1 741# GCN: %7:vgpr_32 = V_ADD_CO_U32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec 742# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 743name: dpp_reg_sequence_subreg 744tracksRegLiveness: true 745body: | 746 bb.0: 747 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 748 749 %0:vreg_64 = COPY $vgpr0_vgpr1 750 %1:vreg_64 = COPY $vgpr2_vgpr3 751 %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 752 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 753 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 754 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 755 %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1 756 %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec 757 %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec 758... 759 760# GCN-LABEL: name: dpp64_add64_impdef 761# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 762# GCN: %5:vgpr_32 = V_ADDC_U32_dpp %1.sub1, %0.sub1, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 763name: dpp64_add64_impdef 764tracksRegLiveness: true 765body: | 766 bb.0: 767 %0:vreg_64 = IMPLICIT_DEF 768 %1:vreg_64 = IMPLICIT_DEF 769 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec 770 %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec 771 %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec 772... 773 774# GCN-LABEL: name: dpp64_add64_undef 775# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 776# GCN: %5:vgpr_32 = V_ADDC_U32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 777name: dpp64_add64_undef 778tracksRegLiveness: true 779body: | 780 bb.0: 781 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec 782 %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec 783 %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec 784... 785 786# GCN-LABEL: name: dpp64_add64_first_combined 787# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec 788# GCN: %0:vreg_64 = REG_SEQUENCE undef %7:vgpr_32, %subreg.sub0, %8, %subreg.sub1 789# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 790# GCN: %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %0.sub1, undef $vcc, 0, implicit $exec 791name: dpp64_add64_first_combined 792tracksRegLiveness: true 793body: | 794 bb.0: 795 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec 796 %4:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %3:vgpr_32, implicit-def $vcc, implicit $exec 797 %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %2.sub1, undef $vcc, 0, implicit $exec 798... 799 800# GCN-LABEL: name: dont_combine_cndmask_with_src2 801# GCN: %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec 802name: dont_combine_cndmask_with_src2 803tracksRegLiveness: true 804body: | 805 bb.0: 806 liveins: $vgpr0, $vgpr1 807 %0:vgpr_32 = COPY $vgpr0 808 %1:vgpr_32 = COPY $vgpr1 809 %2:vgpr_32 = IMPLICIT_DEF 810 811 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 812 %4:sreg_64_xexec = IMPLICIT_DEF 813 %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec 814... 815 816--- 817 818# Make sure flags aren't dropped 819# GCN-LABEL: name: flags_add_f32_e64 820# GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 821name: flags_add_f32_e64 822tracksRegLiveness: true 823body: | 824 bb.0: 825 liveins: $vgpr0, $vgpr1 826 827 %0:vgpr_32 = COPY $vgpr0 828 %1:vgpr_32 = COPY $vgpr1 829 %2:vgpr_32 = IMPLICIT_DEF 830 831 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 832 %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec 833 S_ENDPGM 0, implicit %4 834 835... 836 837# GCN-LABEL: name: dont_combine_more_than_one_operand 838# GCN: %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec 839name: dont_combine_more_than_one_operand 840tracksRegLiveness: true 841body: | 842 bb.0: 843 liveins: $vgpr0, $vgpr1 844 %0:vgpr_32 = COPY $vgpr0 845 %1:vgpr_32 = COPY $vgpr1 846 %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec 847 %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec 848... 849 850# GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence 851# GCN: %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec 852# GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec 853name: dont_combine_more_than_one_operand_dpp_reg_sequence 854tracksRegLiveness: true 855body: | 856 bb.0: 857 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 858 %0:vreg_64 = COPY $vgpr0_vgpr1 859 %1:vreg_64 = COPY $vgpr2_vgpr3 860 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 861 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 862 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 863 %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec 864 %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec 865... 866