1# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX1010 -check-prefix=GCN %s 2 3# GCN-LABEL: {{^}}name: vop1_instructions 4 5# GFX1010: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec 6# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec 7# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $mode, implicit $exec 8# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $mode, implicit $exec 9# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec 10 11# GFX1010: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 6, 0, 5, implicit $exec 12# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec 13# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $mode, implicit $exec 14# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $mode, implicit $exec 15# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec 16 17# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $mode, implicit $exec 18# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit $mode, implicit $exec 19# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit $mode, implicit $exec 20# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 1, 5, 0, 5, implicit $mode, implicit $exec 21 22--- 23name: vop1_instructions 24tracksRegLiveness: true 25registers: 26 - { id: 0, class: vreg_64 } 27 - { id: 1, class: vreg_64 } 28 - { id: 2, class: sreg_64 } 29 - { id: 3, class: vgpr_32 } 30 - { id: 4, class: sreg_32_xm0 } 31 - { id: 5, class: sreg_32_xm0 } 32 - { id: 6, class: sreg_32_xm0 } 33 - { id: 7, class: sreg_32_xm0 } 34 - { id: 8, class: sreg_32 } 35 - { id: 9, class: vgpr_32 } 36 - { id: 10, class: vgpr_32 } 37 - { id: 11, class: vgpr_32 } 38 - { id: 12, class: vgpr_32 } 39 - { id: 13, class: vgpr_32 } 40 - { id: 14, class: vgpr_32 } 41 - { id: 15, class: vgpr_32 } 42 - { id: 16, class: vgpr_32 } 43 - { id: 17, class: vgpr_32 } 44 - { id: 18, class: vgpr_32 } 45 - { id: 19, class: vgpr_32 } 46 - { id: 20, class: vgpr_32 } 47 - { id: 21, class: vgpr_32 } 48 - { id: 22, class: vgpr_32 } 49 - { id: 23, class: vgpr_32 } 50 - { id: 24, class: vgpr_32 } 51 - { id: 25, class: vgpr_32 } 52 - { id: 26, class: vgpr_32 } 53 - { id: 27, class: vgpr_32 } 54 - { id: 28, class: vgpr_32 } 55 - { id: 29, class: vgpr_32 } 56 - { id: 30, class: vgpr_32 } 57 - { id: 31, class: vgpr_32 } 58 - { id: 32, class: vgpr_32 } 59 - { id: 33, class: vgpr_32 } 60 - { id: 34, class: vgpr_32 } 61 - { id: 35, class: vgpr_32 } 62 - { id: 36, class: vgpr_32 } 63 - { id: 37, class: vgpr_32 } 64 - { id: 38, class: vgpr_32 } 65 - { id: 39, class: vgpr_32 } 66 - { id: 40, class: vgpr_32 } 67 - { id: 41, class: vgpr_32 } 68 - { id: 42, class: vgpr_32 } 69 - { id: 43, class: vgpr_32 } 70 - { id: 44, class: vgpr_32 } 71 - { id: 45, class: vgpr_32 } 72 - { id: 46, class: vgpr_32 } 73 - { id: 47, class: vgpr_32 } 74 - { id: 48, class: vgpr_32 } 75 - { id: 100, class: vgpr_32 } 76body: | 77 bb.0: 78 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31 79 80 %2 = COPY $sgpr30_sgpr31 81 %1 = COPY $vgpr2_vgpr3 82 %0 = COPY $vgpr0_vgpr1 83 %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) 84 85 %5 = S_MOV_B32 65535 86 %6 = S_MOV_B32 65535 87 88 %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec 89 %11 = V_MOV_B32_e32 %10, implicit $exec 90 %12 = V_LSHLREV_B32_e64 16, %11, implicit $exec 91 %14 = V_FRACT_F32_e32 123, implicit $mode, implicit $exec 92 %15 = V_LSHLREV_B32_e64 16, %14, implicit $exec 93 %16 = V_LSHRREV_B32_e64 16, %15, implicit $exec 94 %17 = V_SIN_F32_e32 %16, implicit $mode, implicit $exec 95 %18 = V_LSHLREV_B32_e64 16, %17, implicit $exec 96 %19 = V_LSHRREV_B32_e64 16, %18, implicit $exec 97 %20 = V_CVT_U32_F32_e32 %19, implicit $mode, implicit $exec 98 %21 = V_LSHLREV_B32_e64 16, %20, implicit $exec 99 %23 = V_CVT_F32_I32_e32 123, implicit $mode, implicit $exec 100 %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec 101 102 %25 = V_LSHRREV_B32_e64 16, %3, implicit $exec 103 %26 = V_MOV_B32_e64 %25, implicit $exec 104 %26 = V_LSHLREV_B32_e64 16, %26, implicit $exec 105 %27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec 106 %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec 107 %29 = V_LSHRREV_B32_e64 16, %28, implicit $exec 108 %30 = V_SIN_F32_e64 0, %29, 0, 0, implicit $mode, implicit $exec 109 %31 = V_LSHLREV_B32_e64 16, %30, implicit $exec 110 %32 = V_LSHRREV_B32_e64 16, %31, implicit $exec 111 %33 = V_CVT_U32_F32_e64 0, %32, 0, 0, implicit $mode, implicit $exec 112 %34 = V_LSHLREV_B32_e64 16, %33, implicit $exec 113 %35 = V_CVT_F32_I32_e64 %6, 0, 0, implicit $mode, implicit $exec 114 %36 = V_LSHLREV_B32_e64 16, %35, implicit $exec 115 116 117 %37 = V_LSHRREV_B32_e64 16, %36, implicit $exec 118 %38 = V_FRACT_F32_e64 1, %37, 0, 0, implicit $mode, implicit $exec 119 %39 = V_LSHLREV_B32_e64 16, %38, implicit $exec 120 %40 = V_LSHRREV_B32_e64 16, %39, implicit $exec 121 %41 = V_SIN_F32_e64 0, %40, 1, 0, implicit $mode, implicit $exec 122 %42 = V_LSHLREV_B32_e64 16, %41, implicit $exec 123 %43 = V_LSHRREV_B32_e64 16, %42, implicit $exec 124 %44 = V_CVT_U32_F32_e64 1, %43, 0, 0, implicit $mode, implicit $exec 125 %45 = V_LSHLREV_B32_e64 16, %44, implicit $exec 126 %46 = V_LSHRREV_B32_e64 16, %45, implicit $exec 127 %47 = V_CVT_F32_I32_e64 %46, 0, 1, implicit $mode, implicit $exec 128 %48 = V_LSHLREV_B32_e64 16, %47, implicit $exec 129 130 131 %100 = V_MOV_B32_e32 %48, implicit $exec 132 133 FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) 134 $sgpr30_sgpr31 = COPY %2 135 S_SETPC_B64_return $sgpr30_sgpr31 136 137... 138--- 139# GCN-LABEL: {{^}}name: vop2_instructions 140 141# GFX1010: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit $exec 142# GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec 143# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit $mode, implicit $exec 144# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $mode, implicit $exec 145# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $mode, implicit $exec 146 147# GFX1010: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit $exec 148# GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec 149# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $mode, implicit $exec 150# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e64 0, 23, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $mode, implicit $exec 151# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e64 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $mode, implicit $exec 152 153# GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec 154# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $mode, implicit $exec 155# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e64 1, 23, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 0, implicit $mode, implicit $exec 156# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit $mode, implicit $exec 157 158name: vop2_instructions 159tracksRegLiveness: true 160registers: 161 - { id: 0, class: vreg_64 } 162 - { id: 1, class: vreg_64 } 163 - { id: 2, class: sreg_64 } 164 - { id: 3, class: vgpr_32 } 165 - { id: 4, class: sreg_32_xm0 } 166 - { id: 5, class: sreg_32_xm0 } 167 - { id: 6, class: sreg_32_xm0 } 168 - { id: 7, class: sreg_32_xm0 } 169 - { id: 8, class: sreg_32 } 170 - { id: 9, class: vgpr_32 } 171 - { id: 10, class: vgpr_32 } 172 - { id: 11, class: vgpr_32 } 173 - { id: 12, class: vgpr_32 } 174 - { id: 13, class: vgpr_32 } 175 - { id: 14, class: vgpr_32 } 176 - { id: 15, class: vgpr_32 } 177 - { id: 16, class: vgpr_32 } 178 - { id: 17, class: vgpr_32 } 179 - { id: 18, class: vgpr_32 } 180 - { id: 19, class: vgpr_32 } 181 - { id: 20, class: vgpr_32 } 182 - { id: 21, class: vgpr_32 } 183 - { id: 22, class: vgpr_32 } 184 - { id: 23, class: vgpr_32 } 185 - { id: 24, class: vgpr_32 } 186 - { id: 25, class: vgpr_32 } 187 - { id: 26, class: vgpr_32 } 188 - { id: 27, class: vgpr_32 } 189 - { id: 28, class: vgpr_32 } 190 - { id: 29, class: vgpr_32 } 191 - { id: 30, class: vgpr_32 } 192 - { id: 31, class: vgpr_32 } 193 - { id: 32, class: vgpr_32 } 194 - { id: 33, class: vgpr_32 } 195 - { id: 34, class: vgpr_32 } 196 - { id: 35, class: vgpr_32 } 197 - { id: 36, class: vgpr_32 } 198 - { id: 37, class: vgpr_32 } 199 - { id: 38, class: vgpr_32 } 200 - { id: 39, class: vgpr_32 } 201 - { id: 40, class: vgpr_32 } 202 - { id: 41, class: vgpr_32 } 203 - { id: 42, class: vgpr_32 } 204 - { id: 43, class: vgpr_32 } 205 - { id: 44, class: vgpr_32 } 206 - { id: 45, class: vgpr_32 } 207 - { id: 46, class: vgpr_32 } 208 - { id: 47, class: vgpr_32 } 209 - { id: 48, class: vgpr_32 } 210 - { id: 49, class: vgpr_32 } 211 - { id: 50, class: vgpr_32 } 212 - { id: 51, class: vgpr_32 } 213 - { id: 52, class: vgpr_32 } 214 - { id: 53, class: vgpr_32 } 215 - { id: 54, class: vgpr_32 } 216 - { id: 55, class: vgpr_32 } 217 - { id: 56, class: vgpr_32 } 218 - { id: 57, class: vgpr_32 } 219 - { id: 58, class: vgpr_32 } 220 - { id: 59, class: vgpr_32 } 221 - { id: 60, class: vgpr_32 } 222 - { id: 100, class: vgpr_32 } 223body: | 224 bb.0: 225 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31 226 227 %2 = COPY $sgpr30_sgpr31 228 %1 = COPY $vgpr2_vgpr3 229 %0 = COPY $vgpr0_vgpr1 230 %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) 231 232 %5 = S_MOV_B32 65535 233 %6 = S_MOV_B32 65535 234 235 %11 = V_LSHRREV_B32_e64 16, %3, implicit $exec 236 %12 = V_AND_B32_e32 %6, %11, implicit $exec 237 %13 = V_LSHLREV_B32_e64 16, %12, implicit $exec 238 %14 = V_LSHRREV_B32_e64 16, %13, implicit $exec 239 %15 = V_BFE_U32_e64 %13, 8, 8, implicit $exec 240 %16 = V_ADD_F32_e32 %14, %15, implicit $mode, implicit $exec 241 %17 = V_LSHLREV_B32_e64 16, %16, implicit $exec 242 %18 = V_LSHRREV_B32_e64 16, %17, implicit $exec 243 %19 = V_BFE_U32_e64 %17, 8, 8, implicit $exec 244 %20 = V_SUB_F16_e32 %18, %19, implicit $mode, implicit $exec 245 %21 = V_LSHLREV_B32_e64 16, %20, implicit $exec 246 %22 = V_BFE_U32_e64 %20, 8, 8, implicit $exec 247 %23 = V_FMAC_F32_e32 %21, %22, %22, implicit $mode, implicit $exec 248 %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec 249 %25 = V_LSHRREV_B32_e64 16, %24, implicit $exec 250 %26 = V_BFE_U32_e64 %24, 8, 8, implicit $exec 251 %27 = V_FMAC_F16_e32 %25, %26, %26, implicit $mode, implicit $exec 252 %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec 253 254 %29 = V_LSHRREV_B32_e64 16, %28, implicit $exec 255 %30 = V_AND_B32_e64 23, %29, implicit $exec 256 %31 = V_LSHLREV_B32_e64 16, %30, implicit $exec 257 %32 = V_LSHRREV_B32_e64 16, %31, implicit $exec 258 %33 = V_BFE_U32_e64 %31, 8, 8, implicit $exec 259 %34 = V_ADD_F32_e64 0, %32, 0, %33, 0, 0, implicit $mode, implicit $exec 260 %35 = V_LSHLREV_B32_e64 16, %34, implicit $exec 261 %37 = V_BFE_U32_e64 %35, 8, 8, implicit $exec 262 %38 = V_SUB_F16_e64 0, 23, 0, %37, 0, 0, implicit $mode, implicit $exec 263 %39 = V_LSHLREV_B32_e64 16, %38, implicit $exec 264 %40 = V_BFE_U32_e64 %39, 8, 8, implicit $exec 265 %41 = V_FMAC_F32_e64 0, 23, 0, %40, 0, %40, 0, 0, implicit $mode, implicit $exec 266 %42 = V_LSHLREV_B32_e64 16, %41, implicit $exec 267 %43 = V_LSHRREV_B32_e64 16, %42, implicit $exec 268 %44 = V_BFE_U32_e64 %42, 8, 8, implicit $exec 269 %45 = V_FMAC_F16_e64 0, %43, 0, %44, 0, %44, 0, 0, implicit $mode, implicit $exec 270 %46 = V_LSHLREV_B32_e64 16, %45, implicit $exec 271 272 %47 = V_LSHRREV_B32_e64 16, %46, implicit $exec 273 %48 = V_BFE_U32_e64 %46, 8, 8, implicit $exec 274 %49 = V_ADD_F32_e64 0, %47, 1, %48, 0, 0, implicit $mode, implicit $exec 275 %50 = V_LSHLREV_B32_e64 16, %49, implicit $exec 276 %51 = V_BFE_U32_e64 %50, 8, 8, implicit $exec 277 %52 = V_SUB_F16_e64 1, 23, 1, %51, 0, 0, implicit $mode, implicit $exec 278 %53 = V_LSHLREV_B32_e64 16, %52, implicit $exec 279 %54 = V_BFE_U32_e64 %53, 8, 8, implicit $exec 280 %55 = V_FMAC_F32_e64 1, 23, 1, %54, 1, %54, 1, 0, implicit $mode, implicit $exec 281 %56 = V_LSHLREV_B32_e64 16, %55, implicit $exec 282 %57 = V_LSHRREV_B32_e64 16, %56, implicit $exec 283 %58 = V_BFE_U32_e64 %56, 8, 8, implicit $exec 284 %59 = V_FMAC_F16_e64 1, %57, 1, %58, 1, %58, 0, 2, implicit $mode, implicit $exec 285 %60 = V_LSHLREV_B32_e64 16, %59, implicit $exec 286 287 %100 = V_MOV_B32_e32 %60, implicit $exec 288 289 FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) 290 $sgpr30_sgpr31 = COPY %2 291 S_SETPC_B64_return $sgpr30_sgpr31 292 293... 294