1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s 5 6define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(<4 x i16> addrspace(4)* inreg %ptr, i32 inreg %idx) { 7; GCN-LABEL: extractelement_sgpr_v4i16_sgpr_idx: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 10; GCN-NEXT: s_lshr_b32 s2, s4, 1 11; GCN-NEXT: s_cmp_eq_u32 s2, 1 12; GCN-NEXT: s_waitcnt lgkmcnt(0) 13; GCN-NEXT: s_cselect_b32 s0, s1, s0 14; GCN-NEXT: s_and_b32 s1, s4, 1 15; GCN-NEXT: s_lshl_b32 s1, s1, 4 16; GCN-NEXT: s_lshr_b32 s0, s0, s1 17; GCN-NEXT: ; return to shader part epilog 18 %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr 19 %element = extractelement <4 x i16> %vector, i32 %idx 20 ret i16 %element 21} 22 23define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(<4 x i16> addrspace(1)* %ptr, i32 inreg %idx) { 24; GFX9-LABEL: extractelement_vgpr_v4i16_sgpr_idx: 25; GFX9: ; %bb.0: 26; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 27; GFX9-NEXT: s_lshr_b32 s0, s2, 1 28; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 29; GFX9-NEXT: s_and_b32 s1, s2, 1 30; GFX9-NEXT: s_lshl_b32 s0, s1, 4 31; GFX9-NEXT: s_waitcnt vmcnt(0) 32; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 33; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 34; GFX9-NEXT: v_readfirstlane_b32 s0, v0 35; GFX9-NEXT: ; return to shader part epilog 36; 37; GFX8-LABEL: extractelement_vgpr_v4i16_sgpr_idx: 38; GFX8: ; %bb.0: 39; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 40; GFX8-NEXT: s_lshr_b32 s0, s2, 1 41; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 42; GFX8-NEXT: s_and_b32 s1, s2, 1 43; GFX8-NEXT: s_lshl_b32 s0, s1, 4 44; GFX8-NEXT: s_waitcnt vmcnt(0) 45; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 46; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 47; GFX8-NEXT: v_readfirstlane_b32 s0, v0 48; GFX8-NEXT: ; return to shader part epilog 49; 50; GFX7-LABEL: extractelement_vgpr_v4i16_sgpr_idx: 51; GFX7: ; %bb.0: 52; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 53; GFX7-NEXT: s_lshr_b32 s0, s2, 1 54; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 55; GFX7-NEXT: s_and_b32 s1, s2, 1 56; GFX7-NEXT: s_lshl_b32 s0, s1, 4 57; GFX7-NEXT: s_waitcnt vmcnt(0) 58; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 59; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 60; GFX7-NEXT: v_readfirstlane_b32 s0, v0 61; GFX7-NEXT: ; return to shader part epilog 62 %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr 63 %element = extractelement <4 x i16> %vector, i32 %idx 64 ret i16 %element 65} 66 67define i16 @extractelement_vgpr_v4i16_vgpr_idx(<4 x i16> addrspace(1)* %ptr, i32 %idx) { 68; GFX9-LABEL: extractelement_vgpr_v4i16_vgpr_idx: 69; GFX9: ; %bb.0: 70; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 71; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 72; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v2 73; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 74; GFX9-NEXT: v_and_b32_e32 v2, 1, v2 75; GFX9-NEXT: s_waitcnt vmcnt(0) 76; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 77; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v2 78; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 79; GFX9-NEXT: s_setpc_b64 s[30:31] 80; 81; GFX8-LABEL: extractelement_vgpr_v4i16_vgpr_idx: 82; GFX8: ; %bb.0: 83; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 84; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 85; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v2 86; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 87; GFX8-NEXT: v_and_b32_e32 v2, 1, v2 88; GFX8-NEXT: s_waitcnt vmcnt(0) 89; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 90; GFX8-NEXT: v_lshlrev_b32_e32 v1, 4, v2 91; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 92; GFX8-NEXT: s_setpc_b64 s[30:31] 93; 94; GFX7-LABEL: extractelement_vgpr_v4i16_vgpr_idx: 95; GFX7: ; %bb.0: 96; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 98; GFX7-NEXT: v_lshrrev_b32_e32 v3, 1, v2 99; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 100; GFX7-NEXT: v_and_b32_e32 v2, 1, v2 101; GFX7-NEXT: s_waitcnt vmcnt(0) 102; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 103; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v2 104; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 105; GFX7-NEXT: s_setpc_b64 s[30:31] 106 %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr 107 %element = extractelement <4 x i16> %vector, i32 %idx 108 ret i16 %element 109} 110 111define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(<4 x i16> addrspace(4)* inreg %ptr, i32 %idx) { 112; GCN-LABEL: extractelement_sgpr_v4i16_vgpr_idx: 113; GCN: ; %bb.0: 114; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 115; GCN-NEXT: v_lshrrev_b32_e32 v1, 1, v0 116; GCN-NEXT: v_and_b32_e32 v0, 1, v0 117; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 118; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0 119; GCN-NEXT: s_waitcnt lgkmcnt(0) 120; GCN-NEXT: v_mov_b32_e32 v2, s0 121; GCN-NEXT: v_mov_b32_e32 v3, s1 122; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 123; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1 124; GCN-NEXT: v_readfirstlane_b32 s0, v0 125; GCN-NEXT: ; return to shader part epilog 126 %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr 127 %element = extractelement <4 x i16> %vector, i32 %idx 128 ret i16 %element 129} 130 131define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx0(<4 x i16> addrspace(4)* inreg %ptr) { 132; GCN-LABEL: extractelement_sgpr_v4i16_idx0: 133; GCN: ; %bb.0: 134; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 135; GCN-NEXT: s_waitcnt lgkmcnt(0) 136; GCN-NEXT: ; return to shader part epilog 137 %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr 138 %element = extractelement <4 x i16> %vector, i32 0 139 ret i16 %element 140} 141 142define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx1(<4 x i16> addrspace(4)* inreg %ptr) { 143; GCN-LABEL: extractelement_sgpr_v4i16_idx1: 144; GCN: ; %bb.0: 145; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 146; GCN-NEXT: s_waitcnt lgkmcnt(0) 147; GCN-NEXT: s_lshr_b32 s0, s0, 16 148; GCN-NEXT: ; return to shader part epilog 149 %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr 150 %element = extractelement <4 x i16> %vector, i32 1 151 ret i16 %element 152} 153 154define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx2(<4 x i16> addrspace(4)* inreg %ptr) { 155; GCN-LABEL: extractelement_sgpr_v4i16_idx2: 156; GCN: ; %bb.0: 157; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 158; GCN-NEXT: s_waitcnt lgkmcnt(0) 159; GCN-NEXT: s_mov_b32 s0, s1 160; GCN-NEXT: ; return to shader part epilog 161 %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr 162 %element = extractelement <4 x i16> %vector, i32 2 163 ret i16 %element 164} 165 166define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx3(<4 x i16> addrspace(4)* inreg %ptr) { 167; GCN-LABEL: extractelement_sgpr_v4i16_idx3: 168; GCN: ; %bb.0: 169; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 170; GCN-NEXT: s_waitcnt lgkmcnt(0) 171; GCN-NEXT: s_lshr_b32 s0, s1, 16 172; GCN-NEXT: ; return to shader part epilog 173 %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr 174 %element = extractelement <4 x i16> %vector, i32 3 175 ret i16 %element 176} 177 178define i16 @extractelement_vgpr_v4i16_idx0(<4 x i16> addrspace(1)* %ptr) { 179; GFX9-LABEL: extractelement_vgpr_v4i16_idx0: 180; GFX9: ; %bb.0: 181; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 182; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 183; GFX9-NEXT: s_waitcnt vmcnt(0) 184; GFX9-NEXT: s_setpc_b64 s[30:31] 185; 186; GFX8-LABEL: extractelement_vgpr_v4i16_idx0: 187; GFX8: ; %bb.0: 188; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 189; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 190; GFX8-NEXT: s_waitcnt vmcnt(0) 191; GFX8-NEXT: s_setpc_b64 s[30:31] 192; 193; GFX7-LABEL: extractelement_vgpr_v4i16_idx0: 194; GFX7: ; %bb.0: 195; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 196; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 197; GFX7-NEXT: s_waitcnt vmcnt(0) 198; GFX7-NEXT: s_setpc_b64 s[30:31] 199 %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr 200 %element = extractelement <4 x i16> %vector, i32 0 201 ret i16 %element 202} 203 204define i16 @extractelement_vgpr_v4i16_idx1(<4 x i16> addrspace(1)* %ptr) { 205; GFX9-LABEL: extractelement_vgpr_v4i16_idx1: 206; GFX9: ; %bb.0: 207; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 208; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 209; GFX9-NEXT: s_waitcnt vmcnt(0) 210; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 211; GFX9-NEXT: s_setpc_b64 s[30:31] 212; 213; GFX8-LABEL: extractelement_vgpr_v4i16_idx1: 214; GFX8: ; %bb.0: 215; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 216; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 217; GFX8-NEXT: s_waitcnt vmcnt(0) 218; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 219; GFX8-NEXT: s_setpc_b64 s[30:31] 220; 221; GFX7-LABEL: extractelement_vgpr_v4i16_idx1: 222; GFX7: ; %bb.0: 223; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 224; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 225; GFX7-NEXT: s_waitcnt vmcnt(0) 226; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 227; GFX7-NEXT: s_setpc_b64 s[30:31] 228 %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr 229 %element = extractelement <4 x i16> %vector, i32 1 230 ret i16 %element 231} 232 233define i16 @extractelement_vgpr_v4i16_idx2(<4 x i16> addrspace(1)* %ptr) { 234; GFX9-LABEL: extractelement_vgpr_v4i16_idx2: 235; GFX9: ; %bb.0: 236; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 237; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 238; GFX9-NEXT: s_waitcnt vmcnt(0) 239; GFX9-NEXT: v_mov_b32_e32 v0, v1 240; GFX9-NEXT: s_setpc_b64 s[30:31] 241; 242; GFX8-LABEL: extractelement_vgpr_v4i16_idx2: 243; GFX8: ; %bb.0: 244; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 246; GFX8-NEXT: s_waitcnt vmcnt(0) 247; GFX8-NEXT: v_mov_b32_e32 v0, v1 248; GFX8-NEXT: s_setpc_b64 s[30:31] 249; 250; GFX7-LABEL: extractelement_vgpr_v4i16_idx2: 251; GFX7: ; %bb.0: 252; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 253; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 254; GFX7-NEXT: s_waitcnt vmcnt(0) 255; GFX7-NEXT: v_mov_b32_e32 v0, v1 256; GFX7-NEXT: s_setpc_b64 s[30:31] 257 %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr 258 %element = extractelement <4 x i16> %vector, i32 2 259 ret i16 %element 260} 261 262define i16 @extractelement_vgpr_v4i16_idx3(<4 x i16> addrspace(1)* %ptr) { 263; GFX9-LABEL: extractelement_vgpr_v4i16_idx3: 264; GFX9: ; %bb.0: 265; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 267; GFX9-NEXT: s_waitcnt vmcnt(0) 268; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v1 269; GFX9-NEXT: s_setpc_b64 s[30:31] 270; 271; GFX8-LABEL: extractelement_vgpr_v4i16_idx3: 272; GFX8: ; %bb.0: 273; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 274; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 275; GFX8-NEXT: s_waitcnt vmcnt(0) 276; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v1 277; GFX8-NEXT: s_setpc_b64 s[30:31] 278; 279; GFX7-LABEL: extractelement_vgpr_v4i16_idx3: 280; GFX7: ; %bb.0: 281; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 282; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 283; GFX7-NEXT: s_waitcnt vmcnt(0) 284; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v1 285; GFX7-NEXT: s_setpc_b64 s[30:31] 286 %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr 287 %element = extractelement <4 x i16> %vector, i32 3 288 ret i16 %element 289} 290 291define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(<8 x i16> addrspace(4)* inreg %ptr, i32 inreg %idx) { 292; GCN-LABEL: extractelement_sgpr_v8i16_sgpr_idx: 293; GCN: ; %bb.0: 294; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 295; GCN-NEXT: s_lshr_b32 s5, s4, 1 296; GCN-NEXT: s_cmp_eq_u32 s5, 1 297; GCN-NEXT: s_waitcnt lgkmcnt(0) 298; GCN-NEXT: s_cselect_b32 s0, s1, s0 299; GCN-NEXT: s_cmp_eq_u32 s5, 2 300; GCN-NEXT: s_cselect_b32 s0, s2, s0 301; GCN-NEXT: s_cmp_eq_u32 s5, 3 302; GCN-NEXT: s_cselect_b32 s0, s3, s0 303; GCN-NEXT: s_and_b32 s1, s4, 1 304; GCN-NEXT: s_lshl_b32 s1, s1, 4 305; GCN-NEXT: s_lshr_b32 s0, s0, s1 306; GCN-NEXT: ; return to shader part epilog 307 %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr 308 %element = extractelement <8 x i16> %vector, i32 %idx 309 ret i16 %element 310} 311 312define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(<8 x i16> addrspace(1)* %ptr, i32 inreg %idx) { 313; GFX9-LABEL: extractelement_vgpr_v8i16_sgpr_idx: 314; GFX9: ; %bb.0: 315; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 316; GFX9-NEXT: s_lshr_b32 s0, s2, 1 317; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 318; GFX9-NEXT: s_and_b32 s1, s2, 1 319; GFX9-NEXT: s_waitcnt vmcnt(0) 320; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 321; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2 322; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 323; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3 324; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 325; GFX9-NEXT: s_lshl_b32 s0, s1, 4 326; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 327; GFX9-NEXT: v_readfirstlane_b32 s0, v0 328; GFX9-NEXT: ; return to shader part epilog 329; 330; GFX8-LABEL: extractelement_vgpr_v8i16_sgpr_idx: 331; GFX8: ; %bb.0: 332; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 333; GFX8-NEXT: s_lshr_b32 s0, s2, 1 334; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 335; GFX8-NEXT: s_and_b32 s1, s2, 1 336; GFX8-NEXT: s_waitcnt vmcnt(0) 337; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 338; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2 339; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 340; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3 341; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 342; GFX8-NEXT: s_lshl_b32 s0, s1, 4 343; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 344; GFX8-NEXT: v_readfirstlane_b32 s0, v0 345; GFX8-NEXT: ; return to shader part epilog 346; 347; GFX7-LABEL: extractelement_vgpr_v8i16_sgpr_idx: 348; GFX7: ; %bb.0: 349; GFX7-NEXT: s_mov_b32 s6, 0 350; GFX7-NEXT: s_mov_b32 s7, 0xf000 351; GFX7-NEXT: s_mov_b64 s[4:5], 0 352; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 353; GFX7-NEXT: s_lshr_b32 s0, s2, 1 354; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 355; GFX7-NEXT: s_and_b32 s1, s2, 1 356; GFX7-NEXT: s_waitcnt vmcnt(0) 357; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 358; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2 359; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 360; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3 361; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 362; GFX7-NEXT: s_lshl_b32 s0, s1, 4 363; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 364; GFX7-NEXT: v_readfirstlane_b32 s0, v0 365; GFX7-NEXT: ; return to shader part epilog 366 %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 367 %element = extractelement <8 x i16> %vector, i32 %idx 368 ret i16 %element 369} 370 371define i16 @extractelement_vgpr_v8i16_vgpr_idx(<8 x i16> addrspace(1)* %ptr, i32 %idx) { 372; GFX9-LABEL: extractelement_vgpr_v8i16_vgpr_idx: 373; GFX9: ; %bb.0: 374; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 375; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off 376; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v2 377; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 378; GFX9-NEXT: v_and_b32_e32 v1, 1, v2 379; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v1 380; GFX9-NEXT: s_waitcnt vmcnt(0) 381; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc 382; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 383; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 384; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 385; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc 386; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 387; GFX9-NEXT: s_setpc_b64 s[30:31] 388; 389; GFX8-LABEL: extractelement_vgpr_v8i16_vgpr_idx: 390; GFX8: ; %bb.0: 391; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 392; GFX8-NEXT: flat_load_dwordx4 v[3:6], v[0:1] 393; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v2 394; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 395; GFX8-NEXT: v_and_b32_e32 v1, 1, v2 396; GFX8-NEXT: v_lshlrev_b32_e32 v1, 4, v1 397; GFX8-NEXT: s_waitcnt vmcnt(0) 398; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc 399; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 400; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 401; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 402; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc 403; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 404; GFX8-NEXT: s_setpc_b64 s[30:31] 405; 406; GFX7-LABEL: extractelement_vgpr_v8i16_vgpr_idx: 407; GFX7: ; %bb.0: 408; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 409; GFX7-NEXT: s_mov_b32 s6, 0 410; GFX7-NEXT: s_mov_b32 s7, 0xf000 411; GFX7-NEXT: s_mov_b64 s[4:5], 0 412; GFX7-NEXT: buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64 413; GFX7-NEXT: v_lshrrev_b32_e32 v0, 1, v2 414; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 415; GFX7-NEXT: v_and_b32_e32 v1, 1, v2 416; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1 417; GFX7-NEXT: s_waitcnt vmcnt(0) 418; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc 419; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 420; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 421; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 422; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc 423; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 424; GFX7-NEXT: s_setpc_b64 s[30:31] 425 %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 426 %element = extractelement <8 x i16> %vector, i32 %idx 427 ret i16 %element 428} 429 430define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(<8 x i16> addrspace(4)* inreg %ptr, i32 %idx) { 431; GCN-LABEL: extractelement_sgpr_v8i16_vgpr_idx: 432; GCN: ; %bb.0: 433; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 434; GCN-NEXT: v_lshrrev_b32_e32 v1, 1, v0 435; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 436; GCN-NEXT: v_and_b32_e32 v0, 1, v0 437; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0 438; GCN-NEXT: s_waitcnt lgkmcnt(0) 439; GCN-NEXT: v_mov_b32_e32 v2, s0 440; GCN-NEXT: v_mov_b32_e32 v3, s1 441; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 442; GCN-NEXT: v_mov_b32_e32 v4, s2 443; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 444; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 445; GCN-NEXT: v_mov_b32_e32 v5, s3 446; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 447; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc 448; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1 449; GCN-NEXT: v_readfirstlane_b32 s0, v0 450; GCN-NEXT: ; return to shader part epilog 451 %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr 452 %element = extractelement <8 x i16> %vector, i32 %idx 453 ret i16 %element 454} 455 456define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx0(<8 x i16> addrspace(4)* inreg %ptr) { 457; GCN-LABEL: extractelement_sgpr_v8i16_idx0: 458; GCN: ; %bb.0: 459; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 460; GCN-NEXT: s_waitcnt lgkmcnt(0) 461; GCN-NEXT: ; return to shader part epilog 462 %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr 463 %element = extractelement <8 x i16> %vector, i32 0 464 ret i16 %element 465} 466 467define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx1(<8 x i16> addrspace(4)* inreg %ptr) { 468; GCN-LABEL: extractelement_sgpr_v8i16_idx1: 469; GCN: ; %bb.0: 470; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 471; GCN-NEXT: s_waitcnt lgkmcnt(0) 472; GCN-NEXT: s_lshr_b32 s0, s0, 16 473; GCN-NEXT: ; return to shader part epilog 474 %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr 475 %element = extractelement <8 x i16> %vector, i32 1 476 ret i16 %element 477} 478 479define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx2(<8 x i16> addrspace(4)* inreg %ptr) { 480; GCN-LABEL: extractelement_sgpr_v8i16_idx2: 481; GCN: ; %bb.0: 482; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 483; GCN-NEXT: s_waitcnt lgkmcnt(0) 484; GCN-NEXT: s_mov_b32 s0, s1 485; GCN-NEXT: ; return to shader part epilog 486 %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr 487 %element = extractelement <8 x i16> %vector, i32 2 488 ret i16 %element 489} 490 491define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx3(<8 x i16> addrspace(4)* inreg %ptr) { 492; GCN-LABEL: extractelement_sgpr_v8i16_idx3: 493; GCN: ; %bb.0: 494; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 495; GCN-NEXT: s_waitcnt lgkmcnt(0) 496; GCN-NEXT: s_lshr_b32 s0, s1, 16 497; GCN-NEXT: ; return to shader part epilog 498 %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr 499 %element = extractelement <8 x i16> %vector, i32 3 500 ret i16 %element 501} 502 503define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx4(<8 x i16> addrspace(4)* inreg %ptr) { 504; GCN-LABEL: extractelement_sgpr_v8i16_idx4: 505; GCN: ; %bb.0: 506; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 507; GCN-NEXT: s_waitcnt lgkmcnt(0) 508; GCN-NEXT: s_mov_b32 s0, s2 509; GCN-NEXT: ; return to shader part epilog 510 %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr 511 %element = extractelement <8 x i16> %vector, i32 4 512 ret i16 %element 513} 514 515define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx5(<8 x i16> addrspace(4)* inreg %ptr) { 516; GCN-LABEL: extractelement_sgpr_v8i16_idx5: 517; GCN: ; %bb.0: 518; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 519; GCN-NEXT: s_waitcnt lgkmcnt(0) 520; GCN-NEXT: s_lshr_b32 s0, s2, 16 521; GCN-NEXT: ; return to shader part epilog 522 %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr 523 %element = extractelement <8 x i16> %vector, i32 5 524 ret i16 %element 525} 526 527define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx6(<8 x i16> addrspace(4)* inreg %ptr) { 528; GCN-LABEL: extractelement_sgpr_v8i16_idx6: 529; GCN: ; %bb.0: 530; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 531; GCN-NEXT: s_waitcnt lgkmcnt(0) 532; GCN-NEXT: s_mov_b32 s0, s3 533; GCN-NEXT: ; return to shader part epilog 534 %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr 535 %element = extractelement <8 x i16> %vector, i32 6 536 ret i16 %element 537} 538 539define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx7(<8 x i16> addrspace(4)* inreg %ptr) { 540; GCN-LABEL: extractelement_sgpr_v8i16_idx7: 541; GCN: ; %bb.0: 542; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 543; GCN-NEXT: s_waitcnt lgkmcnt(0) 544; GCN-NEXT: s_lshr_b32 s0, s3, 16 545; GCN-NEXT: ; return to shader part epilog 546 %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr 547 %element = extractelement <8 x i16> %vector, i32 7 548 ret i16 %element 549} 550 551define i16 @extractelement_vgpr_v8i16_idx0(<8 x i16> addrspace(1)* %ptr) { 552; GFX9-LABEL: extractelement_vgpr_v8i16_idx0: 553; GFX9: ; %bb.0: 554; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 555; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 556; GFX9-NEXT: s_waitcnt vmcnt(0) 557; GFX9-NEXT: s_setpc_b64 s[30:31] 558; 559; GFX8-LABEL: extractelement_vgpr_v8i16_idx0: 560; GFX8: ; %bb.0: 561; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 562; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 563; GFX8-NEXT: s_waitcnt vmcnt(0) 564; GFX8-NEXT: s_setpc_b64 s[30:31] 565; 566; GFX7-LABEL: extractelement_vgpr_v8i16_idx0: 567; GFX7: ; %bb.0: 568; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 569; GFX7-NEXT: s_mov_b32 s6, 0 570; GFX7-NEXT: s_mov_b32 s7, 0xf000 571; GFX7-NEXT: s_mov_b64 s[4:5], 0 572; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 573; GFX7-NEXT: s_waitcnt vmcnt(0) 574; GFX7-NEXT: s_setpc_b64 s[30:31] 575 %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 576 %element = extractelement <8 x i16> %vector, i32 0 577 ret i16 %element 578} 579 580define i16 @extractelement_vgpr_v8i16_idx1(<8 x i16> addrspace(1)* %ptr) { 581; GFX9-LABEL: extractelement_vgpr_v8i16_idx1: 582; GFX9: ; %bb.0: 583; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 584; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 585; GFX9-NEXT: s_waitcnt vmcnt(0) 586; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 587; GFX9-NEXT: s_setpc_b64 s[30:31] 588; 589; GFX8-LABEL: extractelement_vgpr_v8i16_idx1: 590; GFX8: ; %bb.0: 591; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 592; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 593; GFX8-NEXT: s_waitcnt vmcnt(0) 594; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 595; GFX8-NEXT: s_setpc_b64 s[30:31] 596; 597; GFX7-LABEL: extractelement_vgpr_v8i16_idx1: 598; GFX7: ; %bb.0: 599; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 600; GFX7-NEXT: s_mov_b32 s6, 0 601; GFX7-NEXT: s_mov_b32 s7, 0xf000 602; GFX7-NEXT: s_mov_b64 s[4:5], 0 603; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 604; GFX7-NEXT: s_waitcnt vmcnt(0) 605; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 606; GFX7-NEXT: s_setpc_b64 s[30:31] 607 %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 608 %element = extractelement <8 x i16> %vector, i32 1 609 ret i16 %element 610} 611 612define i16 @extractelement_vgpr_v8i16_idx2(<8 x i16> addrspace(1)* %ptr) { 613; GFX9-LABEL: extractelement_vgpr_v8i16_idx2: 614; GFX9: ; %bb.0: 615; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 616; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 617; GFX9-NEXT: s_waitcnt vmcnt(0) 618; GFX9-NEXT: v_mov_b32_e32 v0, v1 619; GFX9-NEXT: s_setpc_b64 s[30:31] 620; 621; GFX8-LABEL: extractelement_vgpr_v8i16_idx2: 622; GFX8: ; %bb.0: 623; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 624; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 625; GFX8-NEXT: s_waitcnt vmcnt(0) 626; GFX8-NEXT: v_mov_b32_e32 v0, v1 627; GFX8-NEXT: s_setpc_b64 s[30:31] 628; 629; GFX7-LABEL: extractelement_vgpr_v8i16_idx2: 630; GFX7: ; %bb.0: 631; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 632; GFX7-NEXT: s_mov_b32 s6, 0 633; GFX7-NEXT: s_mov_b32 s7, 0xf000 634; GFX7-NEXT: s_mov_b64 s[4:5], 0 635; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 636; GFX7-NEXT: s_waitcnt vmcnt(0) 637; GFX7-NEXT: v_mov_b32_e32 v0, v1 638; GFX7-NEXT: s_setpc_b64 s[30:31] 639 %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 640 %element = extractelement <8 x i16> %vector, i32 2 641 ret i16 %element 642} 643 644define i16 @extractelement_vgpr_v8i16_idx3(<8 x i16> addrspace(1)* %ptr) { 645; GFX9-LABEL: extractelement_vgpr_v8i16_idx3: 646; GFX9: ; %bb.0: 647; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 648; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 649; GFX9-NEXT: s_waitcnt vmcnt(0) 650; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v1 651; GFX9-NEXT: s_setpc_b64 s[30:31] 652; 653; GFX8-LABEL: extractelement_vgpr_v8i16_idx3: 654; GFX8: ; %bb.0: 655; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 656; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 657; GFX8-NEXT: s_waitcnt vmcnt(0) 658; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v1 659; GFX8-NEXT: s_setpc_b64 s[30:31] 660; 661; GFX7-LABEL: extractelement_vgpr_v8i16_idx3: 662; GFX7: ; %bb.0: 663; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 664; GFX7-NEXT: s_mov_b32 s6, 0 665; GFX7-NEXT: s_mov_b32 s7, 0xf000 666; GFX7-NEXT: s_mov_b64 s[4:5], 0 667; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 668; GFX7-NEXT: s_waitcnt vmcnt(0) 669; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v1 670; GFX7-NEXT: s_setpc_b64 s[30:31] 671 %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 672 %element = extractelement <8 x i16> %vector, i32 3 673 ret i16 %element 674} 675 676define i16 @extractelement_vgpr_v8i16_idx4(<8 x i16> addrspace(1)* %ptr) { 677; GFX9-LABEL: extractelement_vgpr_v8i16_idx4: 678; GFX9: ; %bb.0: 679; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 680; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 681; GFX9-NEXT: s_waitcnt vmcnt(0) 682; GFX9-NEXT: v_mov_b32_e32 v0, v2 683; GFX9-NEXT: s_setpc_b64 s[30:31] 684; 685; GFX8-LABEL: extractelement_vgpr_v8i16_idx4: 686; GFX8: ; %bb.0: 687; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 688; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 689; GFX8-NEXT: s_waitcnt vmcnt(0) 690; GFX8-NEXT: v_mov_b32_e32 v0, v2 691; GFX8-NEXT: s_setpc_b64 s[30:31] 692; 693; GFX7-LABEL: extractelement_vgpr_v8i16_idx4: 694; GFX7: ; %bb.0: 695; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 696; GFX7-NEXT: s_mov_b32 s6, 0 697; GFX7-NEXT: s_mov_b32 s7, 0xf000 698; GFX7-NEXT: s_mov_b64 s[4:5], 0 699; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 700; GFX7-NEXT: s_waitcnt vmcnt(0) 701; GFX7-NEXT: v_mov_b32_e32 v0, v2 702; GFX7-NEXT: s_setpc_b64 s[30:31] 703 %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 704 %element = extractelement <8 x i16> %vector, i32 4 705 ret i16 %element 706} 707 708define i16 @extractelement_vgpr_v8i16_idx5(<8 x i16> addrspace(1)* %ptr) { 709; GFX9-LABEL: extractelement_vgpr_v8i16_idx5: 710; GFX9: ; %bb.0: 711; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 712; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 713; GFX9-NEXT: s_waitcnt vmcnt(0) 714; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v2 715; GFX9-NEXT: s_setpc_b64 s[30:31] 716; 717; GFX8-LABEL: extractelement_vgpr_v8i16_idx5: 718; GFX8: ; %bb.0: 719; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 720; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 721; GFX8-NEXT: s_waitcnt vmcnt(0) 722; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v2 723; GFX8-NEXT: s_setpc_b64 s[30:31] 724; 725; GFX7-LABEL: extractelement_vgpr_v8i16_idx5: 726; GFX7: ; %bb.0: 727; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 728; GFX7-NEXT: s_mov_b32 s6, 0 729; GFX7-NEXT: s_mov_b32 s7, 0xf000 730; GFX7-NEXT: s_mov_b64 s[4:5], 0 731; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 732; GFX7-NEXT: s_waitcnt vmcnt(0) 733; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v2 734; GFX7-NEXT: s_setpc_b64 s[30:31] 735 %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 736 %element = extractelement <8 x i16> %vector, i32 5 737 ret i16 %element 738} 739 740define i16 @extractelement_vgpr_v8i16_idx6(<8 x i16> addrspace(1)* %ptr) { 741; GFX9-LABEL: extractelement_vgpr_v8i16_idx6: 742; GFX9: ; %bb.0: 743; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 744; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 745; GFX9-NEXT: s_waitcnt vmcnt(0) 746; GFX9-NEXT: v_mov_b32_e32 v0, v3 747; GFX9-NEXT: s_setpc_b64 s[30:31] 748; 749; GFX8-LABEL: extractelement_vgpr_v8i16_idx6: 750; GFX8: ; %bb.0: 751; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 752; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 753; GFX8-NEXT: s_waitcnt vmcnt(0) 754; GFX8-NEXT: v_mov_b32_e32 v0, v3 755; GFX8-NEXT: s_setpc_b64 s[30:31] 756; 757; GFX7-LABEL: extractelement_vgpr_v8i16_idx6: 758; GFX7: ; %bb.0: 759; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 760; GFX7-NEXT: s_mov_b32 s6, 0 761; GFX7-NEXT: s_mov_b32 s7, 0xf000 762; GFX7-NEXT: s_mov_b64 s[4:5], 0 763; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 764; GFX7-NEXT: s_waitcnt vmcnt(0) 765; GFX7-NEXT: v_mov_b32_e32 v0, v3 766; GFX7-NEXT: s_setpc_b64 s[30:31] 767 %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 768 %element = extractelement <8 x i16> %vector, i32 6 769 ret i16 %element 770} 771 772define i16 @extractelement_vgpr_v8i16_idx7(<8 x i16> addrspace(1)* %ptr) { 773; GFX9-LABEL: extractelement_vgpr_v8i16_idx7: 774; GFX9: ; %bb.0: 775; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 776; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 777; GFX9-NEXT: s_waitcnt vmcnt(0) 778; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v3 779; GFX9-NEXT: s_setpc_b64 s[30:31] 780; 781; GFX8-LABEL: extractelement_vgpr_v8i16_idx7: 782; GFX8: ; %bb.0: 783; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 784; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 785; GFX8-NEXT: s_waitcnt vmcnt(0) 786; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v3 787; GFX8-NEXT: s_setpc_b64 s[30:31] 788; 789; GFX7-LABEL: extractelement_vgpr_v8i16_idx7: 790; GFX7: ; %bb.0: 791; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 792; GFX7-NEXT: s_mov_b32 s6, 0 793; GFX7-NEXT: s_mov_b32 s7, 0xf000 794; GFX7-NEXT: s_mov_b64 s[4:5], 0 795; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 796; GFX7-NEXT: s_waitcnt vmcnt(0) 797; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v3 798; GFX7-NEXT: s_setpc_b64 s[30:31] 799 %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 800 %element = extractelement <8 x i16> %vector, i32 7 801 ret i16 %element 802} 803