1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s 5 6; Test gfx9+ s_shl[1-4]_add_u32 pattern matching 7 8define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) { 9; GFX9-LABEL: s_shl1_add_u32: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s1 12; GFX9-NEXT: ; return to shader part epilog 13; 14; GFX8-LABEL: s_shl1_add_u32: 15; GFX8: ; %bb.0: 16; GFX8-NEXT: s_lshl_b32 s0, s0, 1 17; GFX8-NEXT: s_add_i32 s0, s0, s1 18; GFX8-NEXT: ; return to shader part epilog 19; 20; GFX10-LABEL: s_shl1_add_u32: 21; GFX10: ; %bb.0: 22; GFX10-NEXT: s_lshl1_add_u32 s0, s0, s1 23; GFX10-NEXT: ; return to shader part epilog 24 %shl = shl i32 %src0, 1 25 %add = add i32 %shl, %src1 26 ret i32 %add 27} 28 29define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) { 30; GFX9-LABEL: s_shl2_add_u32: 31; GFX9: ; %bb.0: 32; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s1 33; GFX9-NEXT: ; return to shader part epilog 34; 35; GFX8-LABEL: s_shl2_add_u32: 36; GFX8: ; %bb.0: 37; GFX8-NEXT: s_lshl_b32 s0, s0, 2 38; GFX8-NEXT: s_add_i32 s0, s0, s1 39; GFX8-NEXT: ; return to shader part epilog 40; 41; GFX10-LABEL: s_shl2_add_u32: 42; GFX10: ; %bb.0: 43; GFX10-NEXT: s_lshl2_add_u32 s0, s0, s1 44; GFX10-NEXT: ; return to shader part epilog 45 %shl = shl i32 %src0, 2 46 %add = add i32 %shl, %src1 47 ret i32 %add 48} 49 50define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) { 51; GFX9-LABEL: s_shl3_add_u32: 52; GFX9: ; %bb.0: 53; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s1 54; GFX9-NEXT: ; return to shader part epilog 55; 56; GFX8-LABEL: s_shl3_add_u32: 57; GFX8: ; %bb.0: 58; GFX8-NEXT: s_lshl_b32 s0, s0, 3 59; GFX8-NEXT: s_add_i32 s0, s0, s1 60; GFX8-NEXT: ; return to shader part epilog 61; 62; GFX10-LABEL: s_shl3_add_u32: 63; GFX10: ; %bb.0: 64; GFX10-NEXT: s_lshl3_add_u32 s0, s0, s1 65; GFX10-NEXT: ; return to shader part epilog 66 %shl = shl i32 %src0, 3 67 %add = add i32 %shl, %src1 68 ret i32 %add 69} 70 71define amdgpu_ps i32 @s_shl4_add_u32(i32 inreg %src0, i32 inreg %src1) { 72; GFX9-LABEL: s_shl4_add_u32: 73; GFX9: ; %bb.0: 74; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s1 75; GFX9-NEXT: ; return to shader part epilog 76; 77; GFX8-LABEL: s_shl4_add_u32: 78; GFX8: ; %bb.0: 79; GFX8-NEXT: s_lshl_b32 s0, s0, 4 80; GFX8-NEXT: s_add_i32 s0, s0, s1 81; GFX8-NEXT: ; return to shader part epilog 82; 83; GFX10-LABEL: s_shl4_add_u32: 84; GFX10: ; %bb.0: 85; GFX10-NEXT: s_lshl4_add_u32 s0, s0, s1 86; GFX10-NEXT: ; return to shader part epilog 87 %shl = shl i32 %src0, 4 88 %add = add i32 %shl, %src1 89 ret i32 %add 90} 91 92define amdgpu_ps i32 @s_shl5_add_u32(i32 inreg %src0, i32 inreg %src1) { 93; GCN-LABEL: s_shl5_add_u32: 94; GCN: ; %bb.0: 95; GCN-NEXT: s_lshl_b32 s0, s0, 5 96; GCN-NEXT: s_add_i32 s0, s0, s1 97; GCN-NEXT: ; return to shader part epilog 98 %shl = shl i32 %src0, 5 99 %add = add i32 %shl, %src1 100 ret i32 %add 101} 102 103define i32 @v_shl1_add_u32(i32 %src0, i32 %src1) { 104; GFX9-LABEL: v_shl1_add_u32: 105; GFX9: ; %bb.0: 106; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; GFX9-NEXT: v_lshl_add_u32 v0, v0, 1, v1 108; GFX9-NEXT: s_setpc_b64 s[30:31] 109; 110; GFX8-LABEL: v_shl1_add_u32: 111; GFX8: ; %bb.0: 112; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 114; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 115; GFX8-NEXT: s_setpc_b64 s[30:31] 116; 117; GFX10-LABEL: v_shl1_add_u32: 118; GFX10: ; %bb.0: 119; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 121; GFX10-NEXT: v_lshl_add_u32 v0, v0, 1, v1 122; GFX10-NEXT: s_setpc_b64 s[30:31] 123 %shl = shl i32 %src0, 1 124 %add = add i32 %shl, %src1 125 ret i32 %add 126} 127 128define i32 @v_shl2_add_u32(i32 %src0, i32 %src1) { 129; GFX9-LABEL: v_shl2_add_u32: 130; GFX9: ; %bb.0: 131; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 132; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 133; GFX9-NEXT: s_setpc_b64 s[30:31] 134; 135; GFX8-LABEL: v_shl2_add_u32: 136; GFX8: ; %bb.0: 137; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 138; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 139; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 140; GFX8-NEXT: s_setpc_b64 s[30:31] 141; 142; GFX10-LABEL: v_shl2_add_u32: 143; GFX10: ; %bb.0: 144; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 146; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v1 147; GFX10-NEXT: s_setpc_b64 s[30:31] 148 %shl = shl i32 %src0, 2 149 %add = add i32 %shl, %src1 150 ret i32 %add 151} 152 153define i32 @v_shl3_add_u32(i32 %src0, i32 %src1) { 154; GFX9-LABEL: v_shl3_add_u32: 155; GFX9: ; %bb.0: 156; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX9-NEXT: v_lshl_add_u32 v0, v0, 3, v1 158; GFX9-NEXT: s_setpc_b64 s[30:31] 159; 160; GFX8-LABEL: v_shl3_add_u32: 161; GFX8: ; %bb.0: 162; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 163; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0 164; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 165; GFX8-NEXT: s_setpc_b64 s[30:31] 166; 167; GFX10-LABEL: v_shl3_add_u32: 168; GFX10: ; %bb.0: 169; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 171; GFX10-NEXT: v_lshl_add_u32 v0, v0, 3, v1 172; GFX10-NEXT: s_setpc_b64 s[30:31] 173 %shl = shl i32 %src0, 3 174 %add = add i32 %shl, %src1 175 ret i32 %add 176} 177 178define i32 @v_shl4_add_u32(i32 %src0, i32 %src1) { 179; GFX9-LABEL: v_shl4_add_u32: 180; GFX9: ; %bb.0: 181; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 182; GFX9-NEXT: v_lshl_add_u32 v0, v0, 4, v1 183; GFX9-NEXT: s_setpc_b64 s[30:31] 184; 185; GFX8-LABEL: v_shl4_add_u32: 186; GFX8: ; %bb.0: 187; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 188; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0 189; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 190; GFX8-NEXT: s_setpc_b64 s[30:31] 191; 192; GFX10-LABEL: v_shl4_add_u32: 193; GFX10: ; %bb.0: 194; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 196; GFX10-NEXT: v_lshl_add_u32 v0, v0, 4, v1 197; GFX10-NEXT: s_setpc_b64 s[30:31] 198 %shl = shl i32 %src0, 4 199 %add = add i32 %shl, %src1 200 ret i32 %add 201} 202 203define i32 @v_shl5_add_u32(i32 %src0, i32 %src1) { 204; GFX9-LABEL: v_shl5_add_u32: 205; GFX9: ; %bb.0: 206; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 207; GFX9-NEXT: v_lshl_add_u32 v0, v0, 5, v1 208; GFX9-NEXT: s_setpc_b64 s[30:31] 209; 210; GFX8-LABEL: v_shl5_add_u32: 211; GFX8: ; %bb.0: 212; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 213; GFX8-NEXT: v_lshlrev_b32_e32 v0, 5, v0 214; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 215; GFX8-NEXT: s_setpc_b64 s[30:31] 216; 217; GFX10-LABEL: v_shl5_add_u32: 218; GFX10: ; %bb.0: 219; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 220; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 221; GFX10-NEXT: v_lshl_add_u32 v0, v0, 5, v1 222; GFX10-NEXT: s_setpc_b64 s[30:31] 223 %shl = shl i32 %src0, 5 224 %add = add i32 %shl, %src1 225 ret i32 %add 226} 227 228; FIXME: Use v_lshl_add_u32 229; shift is scalar, but add is vector. 230define amdgpu_ps float @shl1_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { 231; GFX9-LABEL: shl1_add_u32_vgpr1: 232; GFX9: ; %bb.0: 233; GFX9-NEXT: s_lshl_b32 s0, s0, 1 234; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 235; GFX9-NEXT: ; return to shader part epilog 236; 237; GFX8-LABEL: shl1_add_u32_vgpr1: 238; GFX8: ; %bb.0: 239; GFX8-NEXT: s_lshl_b32 s0, s0, 1 240; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0 241; GFX8-NEXT: ; return to shader part epilog 242; 243; GFX10-LABEL: shl1_add_u32_vgpr1: 244; GFX10: ; %bb.0: 245; GFX10-NEXT: s_lshl_b32 s0, s0, 1 246; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0 247; GFX10-NEXT: ; return to shader part epilog 248 %shl = shl i32 %src0, 1 249 %add = add i32 %shl, %src1 250 %cast = bitcast i32 %add to float 251 ret float %cast 252} 253 254define amdgpu_ps float @shl2_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { 255; GFX9-LABEL: shl2_add_u32_vgpr1: 256; GFX9: ; %bb.0: 257; GFX9-NEXT: s_lshl_b32 s0, s0, 2 258; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 259; GFX9-NEXT: ; return to shader part epilog 260; 261; GFX8-LABEL: shl2_add_u32_vgpr1: 262; GFX8: ; %bb.0: 263; GFX8-NEXT: s_lshl_b32 s0, s0, 2 264; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0 265; GFX8-NEXT: ; return to shader part epilog 266; 267; GFX10-LABEL: shl2_add_u32_vgpr1: 268; GFX10: ; %bb.0: 269; GFX10-NEXT: s_lshl_b32 s0, s0, 2 270; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0 271; GFX10-NEXT: ; return to shader part epilog 272 %shl = shl i32 %src0, 2 273 %add = add i32 %shl, %src1 274 %cast = bitcast i32 %add to float 275 ret float %cast 276} 277 278define amdgpu_ps float @shl3_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { 279; GFX9-LABEL: shl3_add_u32_vgpr1: 280; GFX9: ; %bb.0: 281; GFX9-NEXT: s_lshl_b32 s0, s0, 3 282; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 283; GFX9-NEXT: ; return to shader part epilog 284; 285; GFX8-LABEL: shl3_add_u32_vgpr1: 286; GFX8: ; %bb.0: 287; GFX8-NEXT: s_lshl_b32 s0, s0, 3 288; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0 289; GFX8-NEXT: ; return to shader part epilog 290; 291; GFX10-LABEL: shl3_add_u32_vgpr1: 292; GFX10: ; %bb.0: 293; GFX10-NEXT: s_lshl_b32 s0, s0, 3 294; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0 295; GFX10-NEXT: ; return to shader part epilog 296 %shl = shl i32 %src0, 3 297 %add = add i32 %shl, %src1 298 %cast = bitcast i32 %add to float 299 ret float %cast 300} 301 302define amdgpu_ps float @shl4_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { 303; GFX9-LABEL: shl4_add_u32_vgpr1: 304; GFX9: ; %bb.0: 305; GFX9-NEXT: s_lshl_b32 s0, s0, 4 306; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 307; GFX9-NEXT: ; return to shader part epilog 308; 309; GFX8-LABEL: shl4_add_u32_vgpr1: 310; GFX8: ; %bb.0: 311; GFX8-NEXT: s_lshl_b32 s0, s0, 4 312; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0 313; GFX8-NEXT: ; return to shader part epilog 314; 315; GFX10-LABEL: shl4_add_u32_vgpr1: 316; GFX10: ; %bb.0: 317; GFX10-NEXT: s_lshl_b32 s0, s0, 4 318; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0 319; GFX10-NEXT: ; return to shader part epilog 320 %shl = shl i32 %src0, 4 321 %add = add i32 %shl, %src1 322 %cast = bitcast i32 %add to float 323 ret float %cast 324} 325 326define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { 327; GFX9-LABEL: shl5_add_u32_vgpr1: 328; GFX9: ; %bb.0: 329; GFX9-NEXT: s_lshl_b32 s0, s0, 5 330; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 331; GFX9-NEXT: ; return to shader part epilog 332; 333; GFX8-LABEL: shl5_add_u32_vgpr1: 334; GFX8: ; %bb.0: 335; GFX8-NEXT: s_lshl_b32 s0, s0, 5 336; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0 337; GFX8-NEXT: ; return to shader part epilog 338; 339; GFX10-LABEL: shl5_add_u32_vgpr1: 340; GFX10: ; %bb.0: 341; GFX10-NEXT: s_lshl_b32 s0, s0, 5 342; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0 343; GFX10-NEXT: ; return to shader part epilog 344 %shl = shl i32 %src0, 5 345 %add = add i32 %shl, %src1 346 %cast = bitcast i32 %add to float 347 ret float %cast 348} 349 350define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { 351; GFX9-LABEL: s_shl1_add_u32_v2: 352; GFX9: ; %bb.0: 353; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s2 354; GFX9-NEXT: s_lshl1_add_u32 s1, s1, s3 355; GFX9-NEXT: ; return to shader part epilog 356; 357; GFX8-LABEL: s_shl1_add_u32_v2: 358; GFX8: ; %bb.0: 359; GFX8-NEXT: s_lshl_b32 s0, s0, 1 360; GFX8-NEXT: s_lshl_b32 s1, s1, 1 361; GFX8-NEXT: s_add_i32 s0, s0, s2 362; GFX8-NEXT: s_add_i32 s1, s1, s3 363; GFX8-NEXT: ; return to shader part epilog 364; 365; GFX10-LABEL: s_shl1_add_u32_v2: 366; GFX10: ; %bb.0: 367; GFX10-NEXT: s_lshl1_add_u32 s0, s0, s2 368; GFX10-NEXT: s_lshl1_add_u32 s1, s1, s3 369; GFX10-NEXT: ; return to shader part epilog 370 %shl = shl <2 x i32> %src0, <i32 1, i32 1> 371 %add = add <2 x i32> %shl, %src1 372 ret <2 x i32> %add 373} 374 375define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { 376; GFX9-LABEL: s_shl2_add_u32_v2: 377; GFX9: ; %bb.0: 378; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2 379; GFX9-NEXT: s_lshl2_add_u32 s1, s1, s3 380; GFX9-NEXT: ; return to shader part epilog 381; 382; GFX8-LABEL: s_shl2_add_u32_v2: 383; GFX8: ; %bb.0: 384; GFX8-NEXT: s_lshl_b32 s0, s0, 2 385; GFX8-NEXT: s_lshl_b32 s1, s1, 2 386; GFX8-NEXT: s_add_i32 s0, s0, s2 387; GFX8-NEXT: s_add_i32 s1, s1, s3 388; GFX8-NEXT: ; return to shader part epilog 389; 390; GFX10-LABEL: s_shl2_add_u32_v2: 391; GFX10: ; %bb.0: 392; GFX10-NEXT: s_lshl2_add_u32 s0, s0, s2 393; GFX10-NEXT: s_lshl2_add_u32 s1, s1, s3 394; GFX10-NEXT: ; return to shader part epilog 395 %shl = shl <2 x i32> %src0, <i32 2, i32 2> 396 %add = add <2 x i32> %shl, %src1 397 ret <2 x i32> %add 398} 399 400define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { 401; GFX9-LABEL: s_shl3_add_u32_v2: 402; GFX9: ; %bb.0: 403; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s2 404; GFX9-NEXT: s_lshl3_add_u32 s1, s1, s3 405; GFX9-NEXT: ; return to shader part epilog 406; 407; GFX8-LABEL: s_shl3_add_u32_v2: 408; GFX8: ; %bb.0: 409; GFX8-NEXT: s_lshl_b32 s0, s0, 3 410; GFX8-NEXT: s_lshl_b32 s1, s1, 3 411; GFX8-NEXT: s_add_i32 s0, s0, s2 412; GFX8-NEXT: s_add_i32 s1, s1, s3 413; GFX8-NEXT: ; return to shader part epilog 414; 415; GFX10-LABEL: s_shl3_add_u32_v2: 416; GFX10: ; %bb.0: 417; GFX10-NEXT: s_lshl3_add_u32 s0, s0, s2 418; GFX10-NEXT: s_lshl3_add_u32 s1, s1, s3 419; GFX10-NEXT: ; return to shader part epilog 420 %shl = shl <2 x i32> %src0, <i32 3, i32 3> 421 %add = add <2 x i32> %shl, %src1 422 ret <2 x i32> %add 423} 424 425define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { 426; GFX9-LABEL: s_shl4_add_u32_v2: 427; GFX9: ; %bb.0: 428; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s2 429; GFX9-NEXT: s_lshl4_add_u32 s1, s1, s3 430; GFX9-NEXT: ; return to shader part epilog 431; 432; GFX8-LABEL: s_shl4_add_u32_v2: 433; GFX8: ; %bb.0: 434; GFX8-NEXT: s_lshl_b32 s0, s0, 4 435; GFX8-NEXT: s_lshl_b32 s1, s1, 4 436; GFX8-NEXT: s_add_i32 s0, s0, s2 437; GFX8-NEXT: s_add_i32 s1, s1, s3 438; GFX8-NEXT: ; return to shader part epilog 439; 440; GFX10-LABEL: s_shl4_add_u32_v2: 441; GFX10: ; %bb.0: 442; GFX10-NEXT: s_lshl4_add_u32 s0, s0, s2 443; GFX10-NEXT: s_lshl4_add_u32 s1, s1, s3 444; GFX10-NEXT: ; return to shader part epilog 445 %shl = shl <2 x i32> %src0, <i32 4, i32 4> 446 %add = add <2 x i32> %shl, %src1 447 ret <2 x i32> %add 448} 449 450define amdgpu_ps <2 x i32> @s_shl_2_4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { 451; GFX9-LABEL: s_shl_2_4_add_u32_v2: 452; GFX9: ; %bb.0: 453; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2 454; GFX9-NEXT: s_lshl4_add_u32 s1, s1, s3 455; GFX9-NEXT: ; return to shader part epilog 456; 457; GFX8-LABEL: s_shl_2_4_add_u32_v2: 458; GFX8: ; %bb.0: 459; GFX8-NEXT: s_lshl_b32 s0, s0, 2 460; GFX8-NEXT: s_lshl_b32 s1, s1, 4 461; GFX8-NEXT: s_add_i32 s0, s0, s2 462; GFX8-NEXT: s_add_i32 s1, s1, s3 463; GFX8-NEXT: ; return to shader part epilog 464; 465; GFX10-LABEL: s_shl_2_4_add_u32_v2: 466; GFX10: ; %bb.0: 467; GFX10-NEXT: s_lshl2_add_u32 s0, s0, s2 468; GFX10-NEXT: s_lshl4_add_u32 s1, s1, s3 469; GFX10-NEXT: ; return to shader part epilog 470 %shl = shl <2 x i32> %src0, <i32 2, i32 4> 471 %add = add <2 x i32> %shl, %src1 472 ret <2 x i32> %add 473} 474 475define amdgpu_ps { i32, i32 } @s_shl4_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) { 476; GCN-LABEL: s_shl4_add_u32_multi_use: 477; GCN: ; %bb.0: 478; GCN-NEXT: s_lshl_b32 s0, s0, 4 479; GCN-NEXT: s_add_i32 s1, s0, s1 480; GCN-NEXT: ; return to shader part epilog 481 %shl = shl i32 %src0, 4 482 %add = add i32 %shl, %src1 483 %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0 484 %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1 485 ret { i32, i32 } %insert1 486} 487 488define amdgpu_ps { i32, i32 } @s_shl3_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) { 489; GCN-LABEL: s_shl3_add_u32_multi_use: 490; GCN: ; %bb.0: 491; GCN-NEXT: s_lshl_b32 s0, s0, 3 492; GCN-NEXT: s_add_i32 s1, s0, s1 493; GCN-NEXT: ; return to shader part epilog 494 %shl = shl i32 %src0, 3 495 %add = add i32 %shl, %src1 496 %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0 497 %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1 498 ret { i32, i32 } %insert1 499} 500 501define amdgpu_ps { i32, i32 } @s_shl2_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) { 502; GCN-LABEL: s_shl2_add_u32_multi_use: 503; GCN: ; %bb.0: 504; GCN-NEXT: s_lshl_b32 s0, s0, 2 505; GCN-NEXT: s_add_i32 s1, s0, s1 506; GCN-NEXT: ; return to shader part epilog 507 %shl = shl i32 %src0, 2 508 %add = add i32 %shl, %src1 509 %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0 510 %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1 511 ret { i32, i32 } %insert1 512} 513 514 515define amdgpu_ps { i32, i32 } @s_shl1_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) { 516; GCN-LABEL: s_shl1_add_u32_multi_use: 517; GCN: ; %bb.0: 518; GCN-NEXT: s_lshl_b32 s0, s0, 1 519; GCN-NEXT: s_add_i32 s1, s0, s1 520; GCN-NEXT: ; return to shader part epilog 521 %shl = shl i32 %src0, 1 522 %add = add i32 %shl, %src1 523 %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0 524 %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1 525 ret { i32, i32 } %insert1 526} 527