1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s 5 6define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { 7; GCN-LABEL: extractelement_sgpr_v4i8_sgpr_idx: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 10; GCN-NEXT: s_movk_i32 s5, 0xff 11; GCN-NEXT: s_waitcnt lgkmcnt(0) 12; GCN-NEXT: s_lshr_b32 s1, s0, 8 13; GCN-NEXT: s_and_b32 s1, s1, s5 14; GCN-NEXT: s_lshr_b32 s2, s0, 16 15; GCN-NEXT: s_lshr_b32 s3, s0, 24 16; GCN-NEXT: s_and_b32 s0, s0, s5 17; GCN-NEXT: s_lshl_b32 s1, s1, 8 18; GCN-NEXT: s_or_b32 s0, s0, s1 19; GCN-NEXT: s_and_b32 s1, s2, s5 20; GCN-NEXT: s_lshl_b32 s1, s1, 16 21; GCN-NEXT: s_or_b32 s0, s0, s1 22; GCN-NEXT: s_lshl_b32 s1, s3, 24 23; GCN-NEXT: s_or_b32 s0, s0, s1 24; GCN-NEXT: s_and_b32 s1, s4, 3 25; GCN-NEXT: s_lshl_b32 s1, s1, 3 26; GCN-NEXT: s_lshr_b32 s0, s0, s1 27; GCN-NEXT: ; return to shader part epilog 28 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 29 %element = extractelement <4 x i8> %vector, i32 %idx 30 ret i8 %element 31} 32 33define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 inreg %idx) { 34; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 35; GFX9: ; %bb.0: 36; GFX9-NEXT: global_load_dword v0, v[0:1], off 37; GFX9-NEXT: s_mov_b32 s0, 8 38; GFX9-NEXT: s_movk_i32 s1, 0xff 39; GFX9-NEXT: s_and_b32 s2, s2, 3 40; GFX9-NEXT: s_waitcnt vmcnt(0) 41; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 42; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 43; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 44; GFX9-NEXT: v_and_b32_sdwa v3, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 45; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 46; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v1 47; GFX9-NEXT: v_or3_b32 v0, v0, v3, v2 48; GFX9-NEXT: s_lshl_b32 s0, s2, 3 49; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 50; GFX9-NEXT: v_readfirstlane_b32 s0, v0 51; GFX9-NEXT: ; return to shader part epilog 52; 53; GFX8-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 54; GFX8: ; %bb.0: 55; GFX8-NEXT: flat_load_dword v0, v[0:1] 56; GFX8-NEXT: s_movk_i32 s0, 0xff 57; GFX8-NEXT: v_mov_b32_e32 v1, 8 58; GFX8-NEXT: v_mov_b32_e32 v2, s0 59; GFX8-NEXT: s_and_b32 s0, s2, 3 60; GFX8-NEXT: s_lshl_b32 s0, s0, 3 61; GFX8-NEXT: s_waitcnt vmcnt(0) 62; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 63; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 64; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 65; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 66; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 67; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v4 68; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 69; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 70; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 71; GFX8-NEXT: v_readfirstlane_b32 s0, v0 72; GFX8-NEXT: ; return to shader part epilog 73; 74; GFX7-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 75; GFX7: ; %bb.0: 76; GFX7-NEXT: s_mov_b32 s6, 0 77; GFX7-NEXT: s_mov_b32 s7, 0xf000 78; GFX7-NEXT: s_mov_b64 s[4:5], 0 79; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 80; GFX7-NEXT: s_movk_i32 s0, 0xff 81; GFX7-NEXT: s_and_b32 s1, s2, 3 82; GFX7-NEXT: s_waitcnt vmcnt(0) 83; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 84; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 85; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 86; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 87; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 88; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 89; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 90; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 91; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 92; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 93; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 94; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 95; GFX7-NEXT: s_lshl_b32 s0, s1, 3 96; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 97; GFX7-NEXT: v_readfirstlane_b32 s0, v0 98; GFX7-NEXT: ; return to shader part epilog 99 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 100 %element = extractelement <4 x i8> %vector, i32 %idx 101 ret i8 %element 102} 103 104define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %idx) { 105; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 106; GFX9: ; %bb.0: 107; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GFX9-NEXT: global_load_dword v0, v[0:1], off 109; GFX9-NEXT: v_and_b32_e32 v1, 3, v2 110; GFX9-NEXT: s_mov_b32 s4, 8 111; GFX9-NEXT: s_movk_i32 s5, 0xff 112; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v1 113; GFX9-NEXT: s_waitcnt vmcnt(0) 114; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 115; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 116; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 117; GFX9-NEXT: v_and_b32_sdwa v4, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 118; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 119; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v2 120; GFX9-NEXT: v_or3_b32 v0, v0, v4, v3 121; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 122; GFX9-NEXT: s_setpc_b64 s[30:31] 123; 124; GFX8-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 125; GFX8: ; %bb.0: 126; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GFX8-NEXT: flat_load_dword v0, v[0:1] 128; GFX8-NEXT: s_movk_i32 s4, 0xff 129; GFX8-NEXT: v_mov_b32_e32 v1, 8 130; GFX8-NEXT: v_mov_b32_e32 v3, s4 131; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 132; GFX8-NEXT: s_waitcnt vmcnt(0) 133; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v0 134; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 135; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v0 136; GFX8-NEXT: v_and_b32_sdwa v3, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 137; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 138; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v5 139; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 140; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 141; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 142; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 143; GFX8-NEXT: s_setpc_b64 s[30:31] 144; 145; GFX7-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 146; GFX7: ; %bb.0: 147; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 148; GFX7-NEXT: s_mov_b32 s6, 0 149; GFX7-NEXT: s_mov_b32 s7, 0xf000 150; GFX7-NEXT: s_mov_b64 s[4:5], 0 151; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 152; GFX7-NEXT: v_and_b32_e32 v1, 3, v2 153; GFX7-NEXT: s_movk_i32 s4, 0xff 154; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v1 155; GFX7-NEXT: s_waitcnt vmcnt(0) 156; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 157; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 158; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 159; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 160; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 161; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 162; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 163; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 164; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 165; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 166; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 167; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 168; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 169; GFX7-NEXT: s_setpc_b64 s[30:31] 170 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 171 %element = extractelement <4 x i8> %vector, i32 %idx 172 ret i8 %element 173} 174 175define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 %idx) { 176; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 177; GFX9: ; %bb.0: 178; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 179; GFX9-NEXT: s_movk_i32 s4, 0xff 180; GFX9-NEXT: v_and_b32_e32 v0, 3, v0 181; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 182; GFX9-NEXT: s_waitcnt lgkmcnt(0) 183; GFX9-NEXT: s_lshr_b32 s1, s0, 8 184; GFX9-NEXT: s_and_b32 s1, s1, s4 185; GFX9-NEXT: s_lshr_b32 s2, s0, 16 186; GFX9-NEXT: s_lshr_b32 s3, s0, 24 187; GFX9-NEXT: s_and_b32 s0, s0, s4 188; GFX9-NEXT: s_lshl_b32 s1, s1, 8 189; GFX9-NEXT: s_or_b32 s0, s0, s1 190; GFX9-NEXT: s_and_b32 s1, s2, s4 191; GFX9-NEXT: s_lshl_b32 s1, s1, 16 192; GFX9-NEXT: s_or_b32 s0, s0, s1 193; GFX9-NEXT: s_lshl_b32 s1, s3, 24 194; GFX9-NEXT: s_or_b32 s0, s0, s1 195; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s0 196; GFX9-NEXT: v_readfirstlane_b32 s0, v0 197; GFX9-NEXT: ; return to shader part epilog 198; 199; GFX8-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 200; GFX8: ; %bb.0: 201; GFX8-NEXT: s_load_dword s0, s[2:3], 0x0 202; GFX8-NEXT: s_movk_i32 s4, 0xff 203; GFX8-NEXT: v_and_b32_e32 v0, 3, v0 204; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0 205; GFX8-NEXT: s_waitcnt lgkmcnt(0) 206; GFX8-NEXT: s_lshr_b32 s1, s0, 8 207; GFX8-NEXT: s_and_b32 s1, s1, s4 208; GFX8-NEXT: s_lshr_b32 s2, s0, 16 209; GFX8-NEXT: s_lshr_b32 s3, s0, 24 210; GFX8-NEXT: s_and_b32 s0, s0, s4 211; GFX8-NEXT: s_lshl_b32 s1, s1, 8 212; GFX8-NEXT: s_or_b32 s0, s0, s1 213; GFX8-NEXT: s_and_b32 s1, s2, s4 214; GFX8-NEXT: s_lshl_b32 s1, s1, 16 215; GFX8-NEXT: s_or_b32 s0, s0, s1 216; GFX8-NEXT: s_lshl_b32 s1, s3, 24 217; GFX8-NEXT: s_or_b32 s0, s0, s1 218; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s0 219; GFX8-NEXT: v_readfirstlane_b32 s0, v0 220; GFX8-NEXT: ; return to shader part epilog 221; 222; GFX7-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 223; GFX7: ; %bb.0: 224; GFX7-NEXT: s_load_dword s0, s[2:3], 0x0 225; GFX7-NEXT: s_movk_i32 s4, 0xff 226; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 227; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 228; GFX7-NEXT: s_waitcnt lgkmcnt(0) 229; GFX7-NEXT: s_lshr_b32 s1, s0, 8 230; GFX7-NEXT: s_and_b32 s1, s1, s4 231; GFX7-NEXT: s_lshr_b32 s2, s0, 16 232; GFX7-NEXT: s_lshr_b32 s3, s0, 24 233; GFX7-NEXT: s_and_b32 s0, s0, s4 234; GFX7-NEXT: s_lshl_b32 s1, s1, 8 235; GFX7-NEXT: s_or_b32 s0, s0, s1 236; GFX7-NEXT: s_and_b32 s1, s2, s4 237; GFX7-NEXT: s_lshl_b32 s1, s1, 16 238; GFX7-NEXT: s_or_b32 s0, s0, s1 239; GFX7-NEXT: s_lshl_b32 s1, s3, 24 240; GFX7-NEXT: s_or_b32 s0, s0, s1 241; GFX7-NEXT: v_lshr_b32_e32 v0, s0, v0 242; GFX7-NEXT: v_readfirstlane_b32 s0, v0 243; GFX7-NEXT: ; return to shader part epilog 244 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 245 %element = extractelement <4 x i8> %vector, i32 %idx 246 ret i8 %element 247} 248 249define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(<4 x i8> addrspace(4)* inreg %ptr) { 250; GCN-LABEL: extractelement_sgpr_v4i8_idx0: 251; GCN: ; %bb.0: 252; GCN-NEXT: s_load_dword s1, s[2:3], 0x0 253; GCN-NEXT: s_movk_i32 s0, 0xff 254; GCN-NEXT: s_waitcnt lgkmcnt(0) 255; GCN-NEXT: s_lshr_b32 s2, s1, 8 256; GCN-NEXT: s_and_b32 s2, s2, s0 257; GCN-NEXT: s_lshr_b32 s3, s1, 16 258; GCN-NEXT: s_lshr_b32 s4, s1, 24 259; GCN-NEXT: s_and_b32 s1, s1, s0 260; GCN-NEXT: s_and_b32 s0, s3, s0 261; GCN-NEXT: s_lshl_b32 s2, s2, 8 262; GCN-NEXT: s_or_b32 s1, s1, s2 263; GCN-NEXT: s_lshl_b32 s0, s0, 16 264; GCN-NEXT: s_or_b32 s0, s1, s0 265; GCN-NEXT: s_lshl_b32 s1, s4, 24 266; GCN-NEXT: s_or_b32 s0, s0, s1 267; GCN-NEXT: ; return to shader part epilog 268 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 269 %element = extractelement <4 x i8> %vector, i32 0 270 ret i8 %element 271} 272 273define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg %ptr) { 274; GCN-LABEL: extractelement_sgpr_v4i8_idx1: 275; GCN: ; %bb.0: 276; GCN-NEXT: s_load_dword s1, s[2:3], 0x0 277; GCN-NEXT: s_movk_i32 s0, 0xff 278; GCN-NEXT: s_waitcnt lgkmcnt(0) 279; GCN-NEXT: s_lshr_b32 s2, s1, 8 280; GCN-NEXT: s_and_b32 s2, s2, s0 281; GCN-NEXT: s_lshr_b32 s3, s1, 16 282; GCN-NEXT: s_lshr_b32 s4, s1, 24 283; GCN-NEXT: s_and_b32 s1, s1, s0 284; GCN-NEXT: s_and_b32 s0, s3, s0 285; GCN-NEXT: s_lshl_b32 s2, s2, 8 286; GCN-NEXT: s_or_b32 s1, s1, s2 287; GCN-NEXT: s_lshl_b32 s0, s0, 16 288; GCN-NEXT: s_or_b32 s0, s1, s0 289; GCN-NEXT: s_lshl_b32 s1, s4, 24 290; GCN-NEXT: s_or_b32 s0, s0, s1 291; GCN-NEXT: s_lshr_b32 s0, s0, 8 292; GCN-NEXT: ; return to shader part epilog 293 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 294 %element = extractelement <4 x i8> %vector, i32 1 295 ret i8 %element 296} 297 298define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg %ptr) { 299; GCN-LABEL: extractelement_sgpr_v4i8_idx2: 300; GCN: ; %bb.0: 301; GCN-NEXT: s_load_dword s1, s[2:3], 0x0 302; GCN-NEXT: s_movk_i32 s0, 0xff 303; GCN-NEXT: s_waitcnt lgkmcnt(0) 304; GCN-NEXT: s_lshr_b32 s2, s1, 8 305; GCN-NEXT: s_and_b32 s2, s2, s0 306; GCN-NEXT: s_lshr_b32 s3, s1, 16 307; GCN-NEXT: s_lshr_b32 s4, s1, 24 308; GCN-NEXT: s_and_b32 s1, s1, s0 309; GCN-NEXT: s_and_b32 s0, s3, s0 310; GCN-NEXT: s_lshl_b32 s2, s2, 8 311; GCN-NEXT: s_or_b32 s1, s1, s2 312; GCN-NEXT: s_lshl_b32 s0, s0, 16 313; GCN-NEXT: s_or_b32 s0, s1, s0 314; GCN-NEXT: s_lshl_b32 s1, s4, 24 315; GCN-NEXT: s_or_b32 s0, s0, s1 316; GCN-NEXT: s_lshr_b32 s0, s0, 16 317; GCN-NEXT: ; return to shader part epilog 318 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 319 %element = extractelement <4 x i8> %vector, i32 2 320 ret i8 %element 321} 322 323define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(<4 x i8> addrspace(4)* inreg %ptr) { 324; GCN-LABEL: extractelement_sgpr_v4i8_idx3: 325; GCN: ; %bb.0: 326; GCN-NEXT: s_load_dword s1, s[2:3], 0x0 327; GCN-NEXT: s_movk_i32 s0, 0xff 328; GCN-NEXT: s_waitcnt lgkmcnt(0) 329; GCN-NEXT: s_lshr_b32 s2, s1, 8 330; GCN-NEXT: s_and_b32 s2, s2, s0 331; GCN-NEXT: s_lshr_b32 s3, s1, 16 332; GCN-NEXT: s_lshr_b32 s4, s1, 24 333; GCN-NEXT: s_and_b32 s1, s1, s0 334; GCN-NEXT: s_and_b32 s0, s3, s0 335; GCN-NEXT: s_lshl_b32 s2, s2, 8 336; GCN-NEXT: s_or_b32 s1, s1, s2 337; GCN-NEXT: s_lshl_b32 s0, s0, 16 338; GCN-NEXT: s_or_b32 s0, s1, s0 339; GCN-NEXT: s_lshl_b32 s1, s4, 24 340; GCN-NEXT: s_or_b32 s0, s0, s1 341; GCN-NEXT: s_lshr_b32 s0, s0, 24 342; GCN-NEXT: ; return to shader part epilog 343 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 344 %element = extractelement <4 x i8> %vector, i32 3 345 ret i8 %element 346} 347 348define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) { 349; GFX9-LABEL: extractelement_vgpr_v4i8_idx0: 350; GFX9: ; %bb.0: 351; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 352; GFX9-NEXT: global_load_dword v0, v[0:1], off 353; GFX9-NEXT: s_mov_b32 s4, 8 354; GFX9-NEXT: s_movk_i32 s5, 0xff 355; GFX9-NEXT: s_waitcnt vmcnt(0) 356; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 357; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 358; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 359; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 360; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 361; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 362; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 363; GFX9-NEXT: s_setpc_b64 s[30:31] 364; 365; GFX8-LABEL: extractelement_vgpr_v4i8_idx0: 366; GFX8: ; %bb.0: 367; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 368; GFX8-NEXT: flat_load_dword v0, v[0:1] 369; GFX8-NEXT: s_movk_i32 s4, 0xff 370; GFX8-NEXT: v_mov_b32_e32 v1, 8 371; GFX8-NEXT: v_mov_b32_e32 v2, s4 372; GFX8-NEXT: s_waitcnt vmcnt(0) 373; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 374; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 375; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 376; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 377; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 378; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 379; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 380; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 381; GFX8-NEXT: s_setpc_b64 s[30:31] 382; 383; GFX7-LABEL: extractelement_vgpr_v4i8_idx0: 384; GFX7: ; %bb.0: 385; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 386; GFX7-NEXT: s_mov_b32 s6, 0 387; GFX7-NEXT: s_mov_b32 s7, 0xf000 388; GFX7-NEXT: s_mov_b64 s[4:5], 0 389; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 390; GFX7-NEXT: s_movk_i32 s4, 0xff 391; GFX7-NEXT: s_waitcnt vmcnt(0) 392; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 393; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 394; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 395; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 396; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 397; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 398; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 399; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 400; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 401; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 402; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 403; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 404; GFX7-NEXT: s_setpc_b64 s[30:31] 405 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 406 %element = extractelement <4 x i8> %vector, i32 0 407 ret i8 %element 408} 409 410define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) { 411; GFX9-LABEL: extractelement_vgpr_v4i8_idx1: 412; GFX9: ; %bb.0: 413; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 414; GFX9-NEXT: global_load_dword v0, v[0:1], off 415; GFX9-NEXT: s_mov_b32 s4, 8 416; GFX9-NEXT: s_movk_i32 s5, 0xff 417; GFX9-NEXT: s_waitcnt vmcnt(0) 418; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 419; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 420; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 421; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 422; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 423; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 424; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 425; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 426; GFX9-NEXT: s_setpc_b64 s[30:31] 427; 428; GFX8-LABEL: extractelement_vgpr_v4i8_idx1: 429; GFX8: ; %bb.0: 430; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 431; GFX8-NEXT: flat_load_dword v0, v[0:1] 432; GFX8-NEXT: s_movk_i32 s4, 0xff 433; GFX8-NEXT: v_mov_b32_e32 v1, 8 434; GFX8-NEXT: v_mov_b32_e32 v2, s4 435; GFX8-NEXT: s_waitcnt vmcnt(0) 436; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 437; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 438; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 439; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 440; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 441; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 442; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 443; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 444; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 445; GFX8-NEXT: s_setpc_b64 s[30:31] 446; 447; GFX7-LABEL: extractelement_vgpr_v4i8_idx1: 448; GFX7: ; %bb.0: 449; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 450; GFX7-NEXT: s_mov_b32 s6, 0 451; GFX7-NEXT: s_mov_b32 s7, 0xf000 452; GFX7-NEXT: s_mov_b64 s[4:5], 0 453; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 454; GFX7-NEXT: s_movk_i32 s4, 0xff 455; GFX7-NEXT: s_waitcnt vmcnt(0) 456; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 457; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 458; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 459; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 460; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 461; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 462; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 463; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 464; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 465; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 466; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 467; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 468; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 469; GFX7-NEXT: s_setpc_b64 s[30:31] 470 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 471 %element = extractelement <4 x i8> %vector, i32 1 472 ret i8 %element 473} 474 475define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) { 476; GFX9-LABEL: extractelement_vgpr_v4i8_idx2: 477; GFX9: ; %bb.0: 478; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 479; GFX9-NEXT: global_load_dword v0, v[0:1], off 480; GFX9-NEXT: s_mov_b32 s4, 8 481; GFX9-NEXT: s_movk_i32 s5, 0xff 482; GFX9-NEXT: s_waitcnt vmcnt(0) 483; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 484; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 485; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 486; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 487; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 488; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 489; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 490; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 491; GFX9-NEXT: s_setpc_b64 s[30:31] 492; 493; GFX8-LABEL: extractelement_vgpr_v4i8_idx2: 494; GFX8: ; %bb.0: 495; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 496; GFX8-NEXT: flat_load_dword v0, v[0:1] 497; GFX8-NEXT: s_movk_i32 s4, 0xff 498; GFX8-NEXT: v_mov_b32_e32 v1, 8 499; GFX8-NEXT: v_mov_b32_e32 v2, s4 500; GFX8-NEXT: s_waitcnt vmcnt(0) 501; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 502; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 503; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 504; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 505; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 506; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 507; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 508; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 509; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 510; GFX8-NEXT: s_setpc_b64 s[30:31] 511; 512; GFX7-LABEL: extractelement_vgpr_v4i8_idx2: 513; GFX7: ; %bb.0: 514; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 515; GFX7-NEXT: s_mov_b32 s6, 0 516; GFX7-NEXT: s_mov_b32 s7, 0xf000 517; GFX7-NEXT: s_mov_b64 s[4:5], 0 518; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 519; GFX7-NEXT: s_movk_i32 s4, 0xff 520; GFX7-NEXT: s_waitcnt vmcnt(0) 521; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 522; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 523; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 524; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 525; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 526; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 527; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 528; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 529; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 530; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 531; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 532; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 533; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 534; GFX7-NEXT: s_setpc_b64 s[30:31] 535 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 536 %element = extractelement <4 x i8> %vector, i32 2 537 ret i8 %element 538} 539 540define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) { 541; GFX9-LABEL: extractelement_vgpr_v4i8_idx3: 542; GFX9: ; %bb.0: 543; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 544; GFX9-NEXT: global_load_dword v0, v[0:1], off 545; GFX9-NEXT: s_mov_b32 s4, 8 546; GFX9-NEXT: s_movk_i32 s5, 0xff 547; GFX9-NEXT: s_waitcnt vmcnt(0) 548; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 549; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 550; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 551; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 552; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 553; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 554; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 555; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 556; GFX9-NEXT: s_setpc_b64 s[30:31] 557; 558; GFX8-LABEL: extractelement_vgpr_v4i8_idx3: 559; GFX8: ; %bb.0: 560; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 561; GFX8-NEXT: flat_load_dword v0, v[0:1] 562; GFX8-NEXT: s_movk_i32 s4, 0xff 563; GFX8-NEXT: v_mov_b32_e32 v1, 8 564; GFX8-NEXT: v_mov_b32_e32 v2, s4 565; GFX8-NEXT: s_waitcnt vmcnt(0) 566; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 567; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 568; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 569; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 570; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 571; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 572; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 573; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 574; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 575; GFX8-NEXT: s_setpc_b64 s[30:31] 576; 577; GFX7-LABEL: extractelement_vgpr_v4i8_idx3: 578; GFX7: ; %bb.0: 579; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 580; GFX7-NEXT: s_mov_b32 s6, 0 581; GFX7-NEXT: s_mov_b32 s7, 0xf000 582; GFX7-NEXT: s_mov_b64 s[4:5], 0 583; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 584; GFX7-NEXT: s_movk_i32 s4, 0xff 585; GFX7-NEXT: s_waitcnt vmcnt(0) 586; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 587; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 588; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 589; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 590; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 591; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 592; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 593; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 594; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 595; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 596; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 597; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 598; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 599; GFX7-NEXT: s_setpc_b64 s[30:31] 600 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 601 %element = extractelement <4 x i8> %vector, i32 3 602 ret i8 %element 603} 604 605define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { 606; GCN-LABEL: extractelement_sgpr_v8i8_sgpr_idx: 607; GCN: ; %bb.0: 608; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 609; GCN-NEXT: s_movk_i32 s9, 0xff 610; GCN-NEXT: s_waitcnt lgkmcnt(0) 611; GCN-NEXT: s_lshr_b32 s2, s0, 8 612; GCN-NEXT: s_and_b32 s2, s2, s9 613; GCN-NEXT: s_lshr_b32 s3, s0, 16 614; GCN-NEXT: s_lshr_b32 s5, s0, 24 615; GCN-NEXT: s_and_b32 s0, s0, s9 616; GCN-NEXT: s_lshl_b32 s2, s2, 8 617; GCN-NEXT: s_or_b32 s0, s0, s2 618; GCN-NEXT: s_and_b32 s2, s3, s9 619; GCN-NEXT: s_lshl_b32 s2, s2, 16 620; GCN-NEXT: s_or_b32 s0, s0, s2 621; GCN-NEXT: s_lshl_b32 s2, s5, 24 622; GCN-NEXT: s_lshr_b32 s6, s1, 8 623; GCN-NEXT: s_or_b32 s0, s0, s2 624; GCN-NEXT: s_and_b32 s2, s6, s9 625; GCN-NEXT: s_lshr_b32 s7, s1, 16 626; GCN-NEXT: s_lshr_b32 s8, s1, 24 627; GCN-NEXT: s_and_b32 s1, s1, s9 628; GCN-NEXT: s_lshl_b32 s2, s2, 8 629; GCN-NEXT: s_or_b32 s1, s1, s2 630; GCN-NEXT: s_and_b32 s2, s7, s9 631; GCN-NEXT: s_lshl_b32 s2, s2, 16 632; GCN-NEXT: s_or_b32 s1, s1, s2 633; GCN-NEXT: s_lshl_b32 s2, s8, 24 634; GCN-NEXT: s_or_b32 s1, s1, s2 635; GCN-NEXT: s_lshr_b32 s2, s4, 2 636; GCN-NEXT: s_cmp_eq_u32 s2, 1 637; GCN-NEXT: s_cselect_b32 s0, s1, s0 638; GCN-NEXT: s_and_b32 s1, s4, 3 639; GCN-NEXT: s_lshl_b32 s1, s1, 3 640; GCN-NEXT: s_lshr_b32 s0, s0, s1 641; GCN-NEXT: ; return to shader part epilog 642 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 643 %element = extractelement <8 x i8> %vector, i32 %idx 644 ret i8 %element 645} 646 647define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 inreg %idx) { 648; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 649; GFX9: ; %bb.0: 650; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 651; GFX9-NEXT: s_mov_b32 s0, 8 652; GFX9-NEXT: s_movk_i32 s1, 0xff 653; GFX9-NEXT: s_lshr_b32 s3, s2, 2 654; GFX9-NEXT: s_and_b32 s2, s2, 3 655; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 1 656; GFX9-NEXT: s_waitcnt vmcnt(0) 657; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 658; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 659; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 660; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 661; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 662; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 663; GFX9-NEXT: v_and_b32_sdwa v6, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 664; GFX9-NEXT: v_and_b32_sdwa v7, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 665; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 666; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v2 667; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v5 668; GFX9-NEXT: v_and_or_b32 v1, v1, s1, v4 669; GFX9-NEXT: v_or3_b32 v0, v0, v6, v3 670; GFX9-NEXT: v_or3_b32 v1, v1, v7, v5 671; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 672; GFX9-NEXT: s_lshl_b32 s0, s2, 3 673; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 674; GFX9-NEXT: v_readfirstlane_b32 s0, v0 675; GFX9-NEXT: ; return to shader part epilog 676; 677; GFX8-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 678; GFX8: ; %bb.0: 679; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 680; GFX8-NEXT: s_movk_i32 s0, 0xff 681; GFX8-NEXT: v_mov_b32_e32 v2, 8 682; GFX8-NEXT: v_mov_b32_e32 v3, 8 683; GFX8-NEXT: v_mov_b32_e32 v4, s0 684; GFX8-NEXT: s_lshr_b32 s0, s2, 2 685; GFX8-NEXT: s_and_b32 s1, s2, 3 686; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 687; GFX8-NEXT: s_lshl_b32 s0, s1, 3 688; GFX8-NEXT: s_waitcnt vmcnt(0) 689; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v0 690; GFX8-NEXT: v_lshrrev_b32_e32 v7, 8, v1 691; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 692; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 693; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v0 694; GFX8-NEXT: v_and_b32_sdwa v9, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 695; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 696; GFX8-NEXT: v_lshrrev_b32_e32 v8, 24, v1 697; GFX8-NEXT: v_and_b32_sdwa v4, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 698; GFX8-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 699; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v6 700; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 701; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v8 702; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 703; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 704; GFX8-NEXT: v_or_b32_e32 v1, v1, v6 705; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 706; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 707; GFX8-NEXT: v_readfirstlane_b32 s0, v0 708; GFX8-NEXT: ; return to shader part epilog 709; 710; GFX7-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 711; GFX7: ; %bb.0: 712; GFX7-NEXT: s_mov_b32 s6, 0 713; GFX7-NEXT: s_mov_b32 s7, 0xf000 714; GFX7-NEXT: s_mov_b64 s[4:5], 0 715; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 716; GFX7-NEXT: s_movk_i32 s0, 0xff 717; GFX7-NEXT: s_lshr_b32 s1, s2, 2 718; GFX7-NEXT: s_and_b32 s2, s2, 3 719; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 1 720; GFX7-NEXT: s_waitcnt vmcnt(0) 721; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 722; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1 723; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 724; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 725; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 726; GFX7-NEXT: v_and_b32_e32 v5, s0, v5 727; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 728; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 729; GFX7-NEXT: v_and_b32_e32 v3, s0, v3 730; GFX7-NEXT: v_and_b32_e32 v6, s0, v6 731; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 732; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 733; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 734; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 735; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 736; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 737; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 738; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 739; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 740; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 741; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 742; GFX7-NEXT: v_or_b32_e32 v1, v1, v6 743; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 744; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 745; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 746; GFX7-NEXT: s_lshl_b32 s0, s2, 3 747; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 748; GFX7-NEXT: v_readfirstlane_b32 s0, v0 749; GFX7-NEXT: ; return to shader part epilog 750 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 751 %element = extractelement <8 x i8> %vector, i32 %idx 752 ret i8 %element 753} 754 755define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %idx) { 756; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 757; GFX9: ; %bb.0: 758; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 759; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 760; GFX9-NEXT: s_mov_b32 s4, 8 761; GFX9-NEXT: s_movk_i32 s5, 0xff 762; GFX9-NEXT: v_lshrrev_b32_e32 v3, 2, v2 763; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 764; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 765; GFX9-NEXT: s_waitcnt vmcnt(0) 766; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v0 767; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v1 768; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v0 769; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 770; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 771; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 772; GFX9-NEXT: v_and_b32_sdwa v8, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 773; GFX9-NEXT: v_and_b32_sdwa v9, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 774; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v5 775; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v4 776; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v7 777; GFX9-NEXT: v_and_or_b32 v1, v1, s5, v6 778; GFX9-NEXT: v_or3_b32 v0, v0, v8, v5 779; GFX9-NEXT: v_or3_b32 v1, v1, v9, v7 780; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 781; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2 782; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 783; GFX9-NEXT: s_setpc_b64 s[30:31] 784; 785; GFX8-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 786; GFX8: ; %bb.0: 787; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 788; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 789; GFX8-NEXT: s_movk_i32 s4, 0xff 790; GFX8-NEXT: v_mov_b32_e32 v3, 8 791; GFX8-NEXT: v_mov_b32_e32 v4, 8 792; GFX8-NEXT: v_mov_b32_e32 v5, s4 793; GFX8-NEXT: v_lshrrev_b32_e32 v6, 2, v2 794; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 795; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 796; GFX8-NEXT: s_waitcnt vmcnt(0) 797; GFX8-NEXT: v_lshrrev_b32_e32 v7, 8, v0 798; GFX8-NEXT: v_lshrrev_b32_e32 v9, 8, v1 799; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 800; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 801; GFX8-NEXT: v_lshrrev_b32_e32 v8, 24, v0 802; GFX8-NEXT: v_and_b32_sdwa v11, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 803; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 804; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v1 805; GFX8-NEXT: v_and_b32_sdwa v5, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 806; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 807; GFX8-NEXT: v_lshlrev_b32_e32 v7, 24, v8 808; GFX8-NEXT: v_or_b32_e32 v0, v0, v11 809; GFX8-NEXT: v_lshlrev_b32_e32 v8, 24, v10 810; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 811; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 812; GFX8-NEXT: v_or_b32_e32 v1, v1, v8 813; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 814; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 815; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 816; GFX8-NEXT: s_setpc_b64 s[30:31] 817; 818; GFX7-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 819; GFX7: ; %bb.0: 820; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 821; GFX7-NEXT: s_mov_b32 s6, 0 822; GFX7-NEXT: s_mov_b32 s7, 0xf000 823; GFX7-NEXT: s_mov_b64 s[4:5], 0 824; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 825; GFX7-NEXT: s_movk_i32 s4, 0xff 826; GFX7-NEXT: v_lshrrev_b32_e32 v3, 2, v2 827; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 828; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 829; GFX7-NEXT: s_waitcnt vmcnt(0) 830; GFX7-NEXT: v_lshrrev_b32_e32 v4, 8, v0 831; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v1 832; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v0 833; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v1 834; GFX7-NEXT: v_and_b32_e32 v4, s4, v4 835; GFX7-NEXT: v_and_b32_e32 v7, s4, v7 836; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v0 837; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v1 838; GFX7-NEXT: v_and_b32_e32 v5, s4, v5 839; GFX7-NEXT: v_and_b32_e32 v8, s4, v8 840; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 841; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 842; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 843; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 844; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 845; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 846; GFX7-NEXT: v_lshlrev_b32_e32 v8, 16, v8 847; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 848; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 849; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 850; GFX7-NEXT: v_lshlrev_b32_e32 v9, 24, v9 851; GFX7-NEXT: v_or_b32_e32 v1, v1, v8 852; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 853; GFX7-NEXT: v_or_b32_e32 v1, v1, v9 854; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 855; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v2 856; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 857; GFX7-NEXT: s_setpc_b64 s[30:31] 858 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 859 %element = extractelement <8 x i8> %vector, i32 %idx 860 ret i8 %element 861} 862 863define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 %idx) { 864; GCN-LABEL: extractelement_sgpr_v8i8_vgpr_idx: 865; GCN: ; %bb.0: 866; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 867; GCN-NEXT: s_movk_i32 s8, 0xff 868; GCN-NEXT: v_lshrrev_b32_e32 v1, 2, v0 869; GCN-NEXT: v_and_b32_e32 v0, 3, v0 870; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 871; GCN-NEXT: s_waitcnt lgkmcnt(0) 872; GCN-NEXT: s_lshr_b32 s2, s0, 8 873; GCN-NEXT: s_and_b32 s2, s2, s8 874; GCN-NEXT: s_lshr_b32 s3, s0, 16 875; GCN-NEXT: s_lshr_b32 s4, s0, 24 876; GCN-NEXT: s_and_b32 s0, s0, s8 877; GCN-NEXT: s_lshl_b32 s2, s2, 8 878; GCN-NEXT: s_or_b32 s0, s0, s2 879; GCN-NEXT: s_and_b32 s2, s3, s8 880; GCN-NEXT: s_lshl_b32 s2, s2, 16 881; GCN-NEXT: s_or_b32 s0, s0, s2 882; GCN-NEXT: s_lshl_b32 s2, s4, 24 883; GCN-NEXT: s_lshr_b32 s5, s1, 8 884; GCN-NEXT: s_or_b32 s0, s0, s2 885; GCN-NEXT: s_and_b32 s2, s5, s8 886; GCN-NEXT: s_lshr_b32 s6, s1, 16 887; GCN-NEXT: s_lshr_b32 s7, s1, 24 888; GCN-NEXT: s_and_b32 s1, s1, s8 889; GCN-NEXT: s_lshl_b32 s2, s2, 8 890; GCN-NEXT: s_or_b32 s1, s1, s2 891; GCN-NEXT: s_and_b32 s2, s6, s8 892; GCN-NEXT: s_lshl_b32 s2, s2, 16 893; GCN-NEXT: s_or_b32 s1, s1, s2 894; GCN-NEXT: s_lshl_b32 s2, s7, 24 895; GCN-NEXT: s_or_b32 s1, s1, s2 896; GCN-NEXT: v_mov_b32_e32 v2, s0 897; GCN-NEXT: v_mov_b32_e32 v3, s1 898; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 899; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 900; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1 901; GCN-NEXT: v_readfirstlane_b32 s0, v0 902; GCN-NEXT: ; return to shader part epilog 903 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 904 %element = extractelement <8 x i8> %vector, i32 %idx 905 ret i8 %element 906} 907 908define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(<8 x i8> addrspace(4)* inreg %ptr) { 909; GCN-LABEL: extractelement_sgpr_v8i8_idx0: 910; GCN: ; %bb.0: 911; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 912; GCN-NEXT: s_movk_i32 s4, 0xff 913; GCN-NEXT: s_waitcnt lgkmcnt(0) 914; GCN-NEXT: s_lshr_b32 s1, s0, 8 915; GCN-NEXT: s_and_b32 s1, s1, s4 916; GCN-NEXT: s_lshr_b32 s2, s0, 16 917; GCN-NEXT: s_lshr_b32 s3, s0, 24 918; GCN-NEXT: s_and_b32 s0, s0, s4 919; GCN-NEXT: s_lshl_b32 s1, s1, 8 920; GCN-NEXT: s_or_b32 s0, s0, s1 921; GCN-NEXT: s_and_b32 s1, s2, s4 922; GCN-NEXT: s_lshl_b32 s1, s1, 16 923; GCN-NEXT: s_or_b32 s0, s0, s1 924; GCN-NEXT: s_lshl_b32 s1, s3, 24 925; GCN-NEXT: s_or_b32 s0, s0, s1 926; GCN-NEXT: ; return to shader part epilog 927 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 928 %element = extractelement <8 x i8> %vector, i32 0 929 ret i8 %element 930} 931 932define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(<8 x i8> addrspace(4)* inreg %ptr) { 933; GCN-LABEL: extractelement_sgpr_v8i8_idx1: 934; GCN: ; %bb.0: 935; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 936; GCN-NEXT: s_movk_i32 s4, 0xff 937; GCN-NEXT: s_waitcnt lgkmcnt(0) 938; GCN-NEXT: s_lshr_b32 s1, s0, 8 939; GCN-NEXT: s_and_b32 s1, s1, s4 940; GCN-NEXT: s_lshr_b32 s2, s0, 16 941; GCN-NEXT: s_lshr_b32 s3, s0, 24 942; GCN-NEXT: s_and_b32 s0, s0, s4 943; GCN-NEXT: s_lshl_b32 s1, s1, 8 944; GCN-NEXT: s_or_b32 s0, s0, s1 945; GCN-NEXT: s_and_b32 s1, s2, s4 946; GCN-NEXT: s_lshl_b32 s1, s1, 16 947; GCN-NEXT: s_or_b32 s0, s0, s1 948; GCN-NEXT: s_lshl_b32 s1, s3, 24 949; GCN-NEXT: s_or_b32 s0, s0, s1 950; GCN-NEXT: s_lshr_b32 s0, s0, 8 951; GCN-NEXT: ; return to shader part epilog 952 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 953 %element = extractelement <8 x i8> %vector, i32 1 954 ret i8 %element 955} 956 957define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(<8 x i8> addrspace(4)* inreg %ptr) { 958; GCN-LABEL: extractelement_sgpr_v8i8_idx2: 959; GCN: ; %bb.0: 960; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 961; GCN-NEXT: s_movk_i32 s4, 0xff 962; GCN-NEXT: s_waitcnt lgkmcnt(0) 963; GCN-NEXT: s_lshr_b32 s1, s0, 8 964; GCN-NEXT: s_and_b32 s1, s1, s4 965; GCN-NEXT: s_lshr_b32 s2, s0, 16 966; GCN-NEXT: s_lshr_b32 s3, s0, 24 967; GCN-NEXT: s_and_b32 s0, s0, s4 968; GCN-NEXT: s_lshl_b32 s1, s1, 8 969; GCN-NEXT: s_or_b32 s0, s0, s1 970; GCN-NEXT: s_and_b32 s1, s2, s4 971; GCN-NEXT: s_lshl_b32 s1, s1, 16 972; GCN-NEXT: s_or_b32 s0, s0, s1 973; GCN-NEXT: s_lshl_b32 s1, s3, 24 974; GCN-NEXT: s_or_b32 s0, s0, s1 975; GCN-NEXT: s_lshr_b32 s0, s0, 16 976; GCN-NEXT: ; return to shader part epilog 977 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 978 %element = extractelement <8 x i8> %vector, i32 2 979 ret i8 %element 980} 981 982define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(<8 x i8> addrspace(4)* inreg %ptr) { 983; GCN-LABEL: extractelement_sgpr_v8i8_idx3: 984; GCN: ; %bb.0: 985; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 986; GCN-NEXT: s_movk_i32 s4, 0xff 987; GCN-NEXT: s_waitcnt lgkmcnt(0) 988; GCN-NEXT: s_lshr_b32 s1, s0, 8 989; GCN-NEXT: s_and_b32 s1, s1, s4 990; GCN-NEXT: s_lshr_b32 s2, s0, 16 991; GCN-NEXT: s_lshr_b32 s3, s0, 24 992; GCN-NEXT: s_and_b32 s0, s0, s4 993; GCN-NEXT: s_lshl_b32 s1, s1, 8 994; GCN-NEXT: s_or_b32 s0, s0, s1 995; GCN-NEXT: s_and_b32 s1, s2, s4 996; GCN-NEXT: s_lshl_b32 s1, s1, 16 997; GCN-NEXT: s_or_b32 s0, s0, s1 998; GCN-NEXT: s_lshl_b32 s1, s3, 24 999; GCN-NEXT: s_or_b32 s0, s0, s1 1000; GCN-NEXT: s_lshr_b32 s0, s0, 24 1001; GCN-NEXT: ; return to shader part epilog 1002 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1003 %element = extractelement <8 x i8> %vector, i32 3 1004 ret i8 %element 1005} 1006 1007define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(<8 x i8> addrspace(4)* inreg %ptr) { 1008; GCN-LABEL: extractelement_sgpr_v8i8_idx4: 1009; GCN: ; %bb.0: 1010; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1011; GCN-NEXT: s_movk_i32 s4, 0xff 1012; GCN-NEXT: s_waitcnt lgkmcnt(0) 1013; GCN-NEXT: s_lshr_b32 s0, s1, 8 1014; GCN-NEXT: s_and_b32 s0, s0, s4 1015; GCN-NEXT: s_lshr_b32 s2, s1, 16 1016; GCN-NEXT: s_lshr_b32 s3, s1, 24 1017; GCN-NEXT: s_and_b32 s1, s1, s4 1018; GCN-NEXT: s_lshl_b32 s0, s0, 8 1019; GCN-NEXT: s_or_b32 s0, s1, s0 1020; GCN-NEXT: s_and_b32 s1, s2, s4 1021; GCN-NEXT: s_lshl_b32 s1, s1, 16 1022; GCN-NEXT: s_or_b32 s0, s0, s1 1023; GCN-NEXT: s_lshl_b32 s1, s3, 24 1024; GCN-NEXT: s_or_b32 s0, s0, s1 1025; GCN-NEXT: ; return to shader part epilog 1026 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1027 %element = extractelement <8 x i8> %vector, i32 4 1028 ret i8 %element 1029} 1030 1031define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(<8 x i8> addrspace(4)* inreg %ptr) { 1032; GCN-LABEL: extractelement_sgpr_v8i8_idx5: 1033; GCN: ; %bb.0: 1034; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1035; GCN-NEXT: s_movk_i32 s4, 0xff 1036; GCN-NEXT: s_waitcnt lgkmcnt(0) 1037; GCN-NEXT: s_lshr_b32 s0, s1, 8 1038; GCN-NEXT: s_and_b32 s0, s0, s4 1039; GCN-NEXT: s_lshr_b32 s2, s1, 16 1040; GCN-NEXT: s_lshr_b32 s3, s1, 24 1041; GCN-NEXT: s_and_b32 s1, s1, s4 1042; GCN-NEXT: s_lshl_b32 s0, s0, 8 1043; GCN-NEXT: s_or_b32 s0, s1, s0 1044; GCN-NEXT: s_and_b32 s1, s2, s4 1045; GCN-NEXT: s_lshl_b32 s1, s1, 16 1046; GCN-NEXT: s_or_b32 s0, s0, s1 1047; GCN-NEXT: s_lshl_b32 s1, s3, 24 1048; GCN-NEXT: s_or_b32 s0, s0, s1 1049; GCN-NEXT: s_lshr_b32 s0, s0, 8 1050; GCN-NEXT: ; return to shader part epilog 1051 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1052 %element = extractelement <8 x i8> %vector, i32 5 1053 ret i8 %element 1054} 1055 1056define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(<8 x i8> addrspace(4)* inreg %ptr) { 1057; GCN-LABEL: extractelement_sgpr_v8i8_idx6: 1058; GCN: ; %bb.0: 1059; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1060; GCN-NEXT: s_movk_i32 s4, 0xff 1061; GCN-NEXT: s_waitcnt lgkmcnt(0) 1062; GCN-NEXT: s_lshr_b32 s0, s1, 8 1063; GCN-NEXT: s_and_b32 s0, s0, s4 1064; GCN-NEXT: s_lshr_b32 s2, s1, 16 1065; GCN-NEXT: s_lshr_b32 s3, s1, 24 1066; GCN-NEXT: s_and_b32 s1, s1, s4 1067; GCN-NEXT: s_lshl_b32 s0, s0, 8 1068; GCN-NEXT: s_or_b32 s0, s1, s0 1069; GCN-NEXT: s_and_b32 s1, s2, s4 1070; GCN-NEXT: s_lshl_b32 s1, s1, 16 1071; GCN-NEXT: s_or_b32 s0, s0, s1 1072; GCN-NEXT: s_lshl_b32 s1, s3, 24 1073; GCN-NEXT: s_or_b32 s0, s0, s1 1074; GCN-NEXT: s_lshr_b32 s0, s0, 16 1075; GCN-NEXT: ; return to shader part epilog 1076 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1077 %element = extractelement <8 x i8> %vector, i32 6 1078 ret i8 %element 1079} 1080 1081define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(<8 x i8> addrspace(4)* inreg %ptr) { 1082; GCN-LABEL: extractelement_sgpr_v8i8_idx7: 1083; GCN: ; %bb.0: 1084; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1085; GCN-NEXT: s_movk_i32 s4, 0xff 1086; GCN-NEXT: s_waitcnt lgkmcnt(0) 1087; GCN-NEXT: s_lshr_b32 s0, s1, 8 1088; GCN-NEXT: s_and_b32 s0, s0, s4 1089; GCN-NEXT: s_lshr_b32 s2, s1, 16 1090; GCN-NEXT: s_lshr_b32 s3, s1, 24 1091; GCN-NEXT: s_and_b32 s1, s1, s4 1092; GCN-NEXT: s_lshl_b32 s0, s0, 8 1093; GCN-NEXT: s_or_b32 s0, s1, s0 1094; GCN-NEXT: s_and_b32 s1, s2, s4 1095; GCN-NEXT: s_lshl_b32 s1, s1, 16 1096; GCN-NEXT: s_or_b32 s0, s0, s1 1097; GCN-NEXT: s_lshl_b32 s1, s3, 24 1098; GCN-NEXT: s_or_b32 s0, s0, s1 1099; GCN-NEXT: s_lshr_b32 s0, s0, 24 1100; GCN-NEXT: ; return to shader part epilog 1101 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1102 %element = extractelement <8 x i8> %vector, i32 7 1103 ret i8 %element 1104} 1105 1106define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) { 1107; GFX9-LABEL: extractelement_vgpr_v8i8_idx0: 1108; GFX9: ; %bb.0: 1109; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1110; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1111; GFX9-NEXT: s_mov_b32 s4, 8 1112; GFX9-NEXT: s_movk_i32 s5, 0xff 1113; GFX9-NEXT: s_waitcnt vmcnt(0) 1114; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 1115; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 1116; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1117; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1118; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 1119; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 1120; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 1121; GFX9-NEXT: s_setpc_b64 s[30:31] 1122; 1123; GFX8-LABEL: extractelement_vgpr_v8i8_idx0: 1124; GFX8: ; %bb.0: 1125; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1126; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1127; GFX8-NEXT: s_movk_i32 s4, 0xff 1128; GFX8-NEXT: s_waitcnt vmcnt(0) 1129; GFX8-NEXT: v_mov_b32_e32 v1, 8 1130; GFX8-NEXT: v_mov_b32_e32 v2, s4 1131; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1132; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1133; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 1134; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1135; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1136; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1137; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 1138; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1139; GFX8-NEXT: s_setpc_b64 s[30:31] 1140; 1141; GFX7-LABEL: extractelement_vgpr_v8i8_idx0: 1142; GFX7: ; %bb.0: 1143; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1144; GFX7-NEXT: s_mov_b32 s6, 0 1145; GFX7-NEXT: s_mov_b32 s7, 0xf000 1146; GFX7-NEXT: s_mov_b64 s[4:5], 0 1147; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1148; GFX7-NEXT: s_movk_i32 s4, 0xff 1149; GFX7-NEXT: s_waitcnt vmcnt(0) 1150; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 1151; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 1152; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 1153; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 1154; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1155; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 1156; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 1157; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1158; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1159; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 1160; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1161; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1162; GFX7-NEXT: s_setpc_b64 s[30:31] 1163 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1164 %element = extractelement <8 x i8> %vector, i32 0 1165 ret i8 %element 1166} 1167 1168define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) { 1169; GFX9-LABEL: extractelement_vgpr_v8i8_idx1: 1170; GFX9: ; %bb.0: 1171; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1172; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1173; GFX9-NEXT: s_mov_b32 s4, 8 1174; GFX9-NEXT: s_movk_i32 s5, 0xff 1175; GFX9-NEXT: s_waitcnt vmcnt(0) 1176; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 1177; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 1178; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1179; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1180; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 1181; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 1182; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 1183; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1184; GFX9-NEXT: s_setpc_b64 s[30:31] 1185; 1186; GFX8-LABEL: extractelement_vgpr_v8i8_idx1: 1187; GFX8: ; %bb.0: 1188; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1189; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1190; GFX8-NEXT: s_movk_i32 s4, 0xff 1191; GFX8-NEXT: s_waitcnt vmcnt(0) 1192; GFX8-NEXT: v_mov_b32_e32 v1, 8 1193; GFX8-NEXT: v_mov_b32_e32 v2, s4 1194; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1195; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1196; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 1197; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1198; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1199; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1200; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 1201; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1202; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1203; GFX8-NEXT: s_setpc_b64 s[30:31] 1204; 1205; GFX7-LABEL: extractelement_vgpr_v8i8_idx1: 1206; GFX7: ; %bb.0: 1207; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1208; GFX7-NEXT: s_mov_b32 s6, 0 1209; GFX7-NEXT: s_mov_b32 s7, 0xf000 1210; GFX7-NEXT: s_mov_b64 s[4:5], 0 1211; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1212; GFX7-NEXT: s_movk_i32 s4, 0xff 1213; GFX7-NEXT: s_waitcnt vmcnt(0) 1214; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 1215; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 1216; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 1217; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 1218; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1219; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 1220; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 1221; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1222; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1223; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 1224; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1225; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1226; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1227; GFX7-NEXT: s_setpc_b64 s[30:31] 1228 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1229 %element = extractelement <8 x i8> %vector, i32 1 1230 ret i8 %element 1231} 1232 1233define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) { 1234; GFX9-LABEL: extractelement_vgpr_v8i8_idx2: 1235; GFX9: ; %bb.0: 1236; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1237; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1238; GFX9-NEXT: s_mov_b32 s4, 8 1239; GFX9-NEXT: s_movk_i32 s5, 0xff 1240; GFX9-NEXT: s_waitcnt vmcnt(0) 1241; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 1242; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 1243; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1244; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1245; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 1246; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 1247; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 1248; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1249; GFX9-NEXT: s_setpc_b64 s[30:31] 1250; 1251; GFX8-LABEL: extractelement_vgpr_v8i8_idx2: 1252; GFX8: ; %bb.0: 1253; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1254; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1255; GFX8-NEXT: s_movk_i32 s4, 0xff 1256; GFX8-NEXT: s_waitcnt vmcnt(0) 1257; GFX8-NEXT: v_mov_b32_e32 v1, 8 1258; GFX8-NEXT: v_mov_b32_e32 v2, s4 1259; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1260; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1261; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 1262; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1263; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1264; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1265; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 1266; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1267; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1268; GFX8-NEXT: s_setpc_b64 s[30:31] 1269; 1270; GFX7-LABEL: extractelement_vgpr_v8i8_idx2: 1271; GFX7: ; %bb.0: 1272; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1273; GFX7-NEXT: s_mov_b32 s6, 0 1274; GFX7-NEXT: s_mov_b32 s7, 0xf000 1275; GFX7-NEXT: s_mov_b64 s[4:5], 0 1276; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1277; GFX7-NEXT: s_movk_i32 s4, 0xff 1278; GFX7-NEXT: s_waitcnt vmcnt(0) 1279; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 1280; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 1281; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 1282; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 1283; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1284; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 1285; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 1286; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1287; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1288; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 1289; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1290; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1291; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1292; GFX7-NEXT: s_setpc_b64 s[30:31] 1293 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1294 %element = extractelement <8 x i8> %vector, i32 2 1295 ret i8 %element 1296} 1297 1298define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) { 1299; GFX9-LABEL: extractelement_vgpr_v8i8_idx3: 1300; GFX9: ; %bb.0: 1301; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1302; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1303; GFX9-NEXT: s_mov_b32 s4, 8 1304; GFX9-NEXT: s_movk_i32 s5, 0xff 1305; GFX9-NEXT: s_waitcnt vmcnt(0) 1306; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 1307; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 1308; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1309; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1310; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 1311; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 1312; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 1313; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 1314; GFX9-NEXT: s_setpc_b64 s[30:31] 1315; 1316; GFX8-LABEL: extractelement_vgpr_v8i8_idx3: 1317; GFX8: ; %bb.0: 1318; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1319; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1320; GFX8-NEXT: s_movk_i32 s4, 0xff 1321; GFX8-NEXT: s_waitcnt vmcnt(0) 1322; GFX8-NEXT: v_mov_b32_e32 v1, 8 1323; GFX8-NEXT: v_mov_b32_e32 v2, s4 1324; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1325; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1326; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 1327; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1328; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1329; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1330; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 1331; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1332; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 1333; GFX8-NEXT: s_setpc_b64 s[30:31] 1334; 1335; GFX7-LABEL: extractelement_vgpr_v8i8_idx3: 1336; GFX7: ; %bb.0: 1337; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1338; GFX7-NEXT: s_mov_b32 s6, 0 1339; GFX7-NEXT: s_mov_b32 s7, 0xf000 1340; GFX7-NEXT: s_mov_b64 s[4:5], 0 1341; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1342; GFX7-NEXT: s_movk_i32 s4, 0xff 1343; GFX7-NEXT: s_waitcnt vmcnt(0) 1344; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 1345; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 1346; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 1347; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 1348; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1349; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 1350; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 1351; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1352; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1353; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 1354; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1355; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1356; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 1357; GFX7-NEXT: s_setpc_b64 s[30:31] 1358 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1359 %element = extractelement <8 x i8> %vector, i32 3 1360 ret i8 %element 1361} 1362 1363define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) { 1364; GFX9-LABEL: extractelement_vgpr_v8i8_idx4: 1365; GFX9: ; %bb.0: 1366; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1367; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1368; GFX9-NEXT: s_mov_b32 s4, 8 1369; GFX9-NEXT: s_movk_i32 s5, 0xff 1370; GFX9-NEXT: s_waitcnt vmcnt(0) 1371; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 1372; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1373; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 1374; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1375; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 1376; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 1377; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 1378; GFX9-NEXT: s_setpc_b64 s[30:31] 1379; 1380; GFX8-LABEL: extractelement_vgpr_v8i8_idx4: 1381; GFX8: ; %bb.0: 1382; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1383; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1384; GFX8-NEXT: s_movk_i32 s4, 0xff 1385; GFX8-NEXT: s_waitcnt vmcnt(0) 1386; GFX8-NEXT: v_mov_b32_e32 v0, 8 1387; GFX8-NEXT: v_mov_b32_e32 v2, s4 1388; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 1389; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1390; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 1391; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1392; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1393; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1394; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 1395; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1396; GFX8-NEXT: s_setpc_b64 s[30:31] 1397; 1398; GFX7-LABEL: extractelement_vgpr_v8i8_idx4: 1399; GFX7: ; %bb.0: 1400; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1401; GFX7-NEXT: s_mov_b32 s6, 0 1402; GFX7-NEXT: s_mov_b32 s7, 0xf000 1403; GFX7-NEXT: s_mov_b64 s[4:5], 0 1404; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1405; GFX7-NEXT: s_movk_i32 s4, 0xff 1406; GFX7-NEXT: s_waitcnt vmcnt(0) 1407; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 1408; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 1409; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 1410; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 1411; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 1412; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 1413; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 1414; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 1415; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1416; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 1417; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1418; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1419; GFX7-NEXT: s_setpc_b64 s[30:31] 1420 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1421 %element = extractelement <8 x i8> %vector, i32 4 1422 ret i8 %element 1423} 1424 1425define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) { 1426; GFX9-LABEL: extractelement_vgpr_v8i8_idx5: 1427; GFX9: ; %bb.0: 1428; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1429; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1430; GFX9-NEXT: s_mov_b32 s4, 8 1431; GFX9-NEXT: s_movk_i32 s5, 0xff 1432; GFX9-NEXT: s_waitcnt vmcnt(0) 1433; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 1434; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1435; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 1436; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1437; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 1438; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 1439; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 1440; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1441; GFX9-NEXT: s_setpc_b64 s[30:31] 1442; 1443; GFX8-LABEL: extractelement_vgpr_v8i8_idx5: 1444; GFX8: ; %bb.0: 1445; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1446; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1447; GFX8-NEXT: s_movk_i32 s4, 0xff 1448; GFX8-NEXT: s_waitcnt vmcnt(0) 1449; GFX8-NEXT: v_mov_b32_e32 v0, 8 1450; GFX8-NEXT: v_mov_b32_e32 v2, s4 1451; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 1452; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1453; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 1454; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1455; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1456; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1457; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 1458; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1459; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1460; GFX8-NEXT: s_setpc_b64 s[30:31] 1461; 1462; GFX7-LABEL: extractelement_vgpr_v8i8_idx5: 1463; GFX7: ; %bb.0: 1464; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1465; GFX7-NEXT: s_mov_b32 s6, 0 1466; GFX7-NEXT: s_mov_b32 s7, 0xf000 1467; GFX7-NEXT: s_mov_b64 s[4:5], 0 1468; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1469; GFX7-NEXT: s_movk_i32 s4, 0xff 1470; GFX7-NEXT: s_waitcnt vmcnt(0) 1471; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 1472; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 1473; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 1474; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 1475; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 1476; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 1477; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 1478; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 1479; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1480; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 1481; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1482; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1483; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1484; GFX7-NEXT: s_setpc_b64 s[30:31] 1485 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1486 %element = extractelement <8 x i8> %vector, i32 5 1487 ret i8 %element 1488} 1489 1490define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) { 1491; GFX9-LABEL: extractelement_vgpr_v8i8_idx6: 1492; GFX9: ; %bb.0: 1493; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1494; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1495; GFX9-NEXT: s_mov_b32 s4, 8 1496; GFX9-NEXT: s_movk_i32 s5, 0xff 1497; GFX9-NEXT: s_waitcnt vmcnt(0) 1498; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 1499; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1500; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 1501; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1502; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 1503; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 1504; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 1505; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1506; GFX9-NEXT: s_setpc_b64 s[30:31] 1507; 1508; GFX8-LABEL: extractelement_vgpr_v8i8_idx6: 1509; GFX8: ; %bb.0: 1510; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1511; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1512; GFX8-NEXT: s_movk_i32 s4, 0xff 1513; GFX8-NEXT: s_waitcnt vmcnt(0) 1514; GFX8-NEXT: v_mov_b32_e32 v0, 8 1515; GFX8-NEXT: v_mov_b32_e32 v2, s4 1516; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 1517; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1518; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 1519; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1520; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1521; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1522; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 1523; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1524; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1525; GFX8-NEXT: s_setpc_b64 s[30:31] 1526; 1527; GFX7-LABEL: extractelement_vgpr_v8i8_idx6: 1528; GFX7: ; %bb.0: 1529; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1530; GFX7-NEXT: s_mov_b32 s6, 0 1531; GFX7-NEXT: s_mov_b32 s7, 0xf000 1532; GFX7-NEXT: s_mov_b64 s[4:5], 0 1533; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1534; GFX7-NEXT: s_movk_i32 s4, 0xff 1535; GFX7-NEXT: s_waitcnt vmcnt(0) 1536; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 1537; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 1538; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 1539; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 1540; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 1541; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 1542; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 1543; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 1544; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1545; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 1546; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1547; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1548; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1549; GFX7-NEXT: s_setpc_b64 s[30:31] 1550 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1551 %element = extractelement <8 x i8> %vector, i32 6 1552 ret i8 %element 1553} 1554 1555define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) { 1556; GFX9-LABEL: extractelement_vgpr_v8i8_idx7: 1557; GFX9: ; %bb.0: 1558; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1559; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1560; GFX9-NEXT: s_mov_b32 s4, 8 1561; GFX9-NEXT: s_movk_i32 s5, 0xff 1562; GFX9-NEXT: s_waitcnt vmcnt(0) 1563; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 1564; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1565; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 1566; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1567; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 1568; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 1569; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 1570; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 1571; GFX9-NEXT: s_setpc_b64 s[30:31] 1572; 1573; GFX8-LABEL: extractelement_vgpr_v8i8_idx7: 1574; GFX8: ; %bb.0: 1575; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1576; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1577; GFX8-NEXT: s_movk_i32 s4, 0xff 1578; GFX8-NEXT: s_waitcnt vmcnt(0) 1579; GFX8-NEXT: v_mov_b32_e32 v0, 8 1580; GFX8-NEXT: v_mov_b32_e32 v2, s4 1581; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 1582; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1583; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 1584; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1585; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1586; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1587; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 1588; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1589; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 1590; GFX8-NEXT: s_setpc_b64 s[30:31] 1591; 1592; GFX7-LABEL: extractelement_vgpr_v8i8_idx7: 1593; GFX7: ; %bb.0: 1594; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1595; GFX7-NEXT: s_mov_b32 s6, 0 1596; GFX7-NEXT: s_mov_b32 s7, 0xf000 1597; GFX7-NEXT: s_mov_b64 s[4:5], 0 1598; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1599; GFX7-NEXT: s_movk_i32 s4, 0xff 1600; GFX7-NEXT: s_waitcnt vmcnt(0) 1601; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 1602; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 1603; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 1604; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 1605; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 1606; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 1607; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 1608; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 1609; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1610; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 1611; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1612; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1613; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 1614; GFX7-NEXT: s_setpc_b64 s[30:31] 1615 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1616 %element = extractelement <8 x i8> %vector, i32 7 1617 ret i8 %element 1618} 1619 1620define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { 1621; GCN-LABEL: extractelement_sgpr_v16i8_sgpr_idx: 1622; GCN: ; %bb.0: 1623; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 1624; GCN-NEXT: s_movk_i32 s17, 0xff 1625; GCN-NEXT: s_waitcnt lgkmcnt(0) 1626; GCN-NEXT: s_lshr_b32 s5, s0, 8 1627; GCN-NEXT: s_and_b32 s5, s5, s17 1628; GCN-NEXT: s_lshr_b32 s6, s0, 16 1629; GCN-NEXT: s_lshr_b32 s7, s0, 24 1630; GCN-NEXT: s_and_b32 s0, s0, s17 1631; GCN-NEXT: s_lshl_b32 s5, s5, 8 1632; GCN-NEXT: s_or_b32 s0, s0, s5 1633; GCN-NEXT: s_and_b32 s5, s6, s17 1634; GCN-NEXT: s_lshl_b32 s5, s5, 16 1635; GCN-NEXT: s_or_b32 s0, s0, s5 1636; GCN-NEXT: s_lshl_b32 s5, s7, 24 1637; GCN-NEXT: s_lshr_b32 s8, s1, 8 1638; GCN-NEXT: s_or_b32 s0, s0, s5 1639; GCN-NEXT: s_and_b32 s5, s8, s17 1640; GCN-NEXT: s_lshr_b32 s9, s1, 16 1641; GCN-NEXT: s_lshr_b32 s10, s1, 24 1642; GCN-NEXT: s_and_b32 s1, s1, s17 1643; GCN-NEXT: s_lshl_b32 s5, s5, 8 1644; GCN-NEXT: s_or_b32 s1, s1, s5 1645; GCN-NEXT: s_and_b32 s5, s9, s17 1646; GCN-NEXT: s_lshl_b32 s5, s5, 16 1647; GCN-NEXT: s_or_b32 s1, s1, s5 1648; GCN-NEXT: s_lshl_b32 s5, s10, 24 1649; GCN-NEXT: s_lshr_b32 s11, s2, 8 1650; GCN-NEXT: s_or_b32 s1, s1, s5 1651; GCN-NEXT: s_and_b32 s5, s11, s17 1652; GCN-NEXT: s_lshr_b32 s12, s2, 16 1653; GCN-NEXT: s_lshr_b32 s13, s2, 24 1654; GCN-NEXT: s_and_b32 s2, s2, s17 1655; GCN-NEXT: s_lshl_b32 s5, s5, 8 1656; GCN-NEXT: s_or_b32 s2, s2, s5 1657; GCN-NEXT: s_and_b32 s5, s12, s17 1658; GCN-NEXT: s_lshl_b32 s5, s5, 16 1659; GCN-NEXT: s_or_b32 s2, s2, s5 1660; GCN-NEXT: s_lshl_b32 s5, s13, 24 1661; GCN-NEXT: s_lshr_b32 s14, s3, 8 1662; GCN-NEXT: s_or_b32 s2, s2, s5 1663; GCN-NEXT: s_and_b32 s5, s14, s17 1664; GCN-NEXT: s_lshr_b32 s15, s3, 16 1665; GCN-NEXT: s_lshr_b32 s16, s3, 24 1666; GCN-NEXT: s_and_b32 s3, s3, s17 1667; GCN-NEXT: s_lshl_b32 s5, s5, 8 1668; GCN-NEXT: s_or_b32 s3, s3, s5 1669; GCN-NEXT: s_and_b32 s5, s15, s17 1670; GCN-NEXT: s_lshl_b32 s5, s5, 16 1671; GCN-NEXT: s_or_b32 s3, s3, s5 1672; GCN-NEXT: s_lshl_b32 s5, s16, 24 1673; GCN-NEXT: s_or_b32 s3, s3, s5 1674; GCN-NEXT: s_lshr_b32 s5, s4, 2 1675; GCN-NEXT: s_cmp_eq_u32 s5, 1 1676; GCN-NEXT: s_cselect_b32 s0, s1, s0 1677; GCN-NEXT: s_cmp_eq_u32 s5, 2 1678; GCN-NEXT: s_cselect_b32 s0, s2, s0 1679; GCN-NEXT: s_cmp_eq_u32 s5, 3 1680; GCN-NEXT: s_cselect_b32 s0, s3, s0 1681; GCN-NEXT: s_and_b32 s1, s4, 3 1682; GCN-NEXT: s_lshl_b32 s1, s1, 3 1683; GCN-NEXT: s_lshr_b32 s0, s0, s1 1684; GCN-NEXT: ; return to shader part epilog 1685 %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr 1686 %element = extractelement <16 x i8> %vector, i32 %idx 1687 ret i8 %element 1688} 1689 1690define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 inreg %idx) { 1691; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 1692; GFX9: ; %bb.0: 1693; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 1694; GFX9-NEXT: s_mov_b32 s0, 8 1695; GFX9-NEXT: v_mov_b32_e32 v5, 8 1696; GFX9-NEXT: s_movk_i32 s1, 0xff 1697; GFX9-NEXT: s_lshr_b32 s3, s2, 2 1698; GFX9-NEXT: v_mov_b32_e32 v4, 0xff 1699; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 1 1700; GFX9-NEXT: s_and_b32 s2, s2, 3 1701; GFX9-NEXT: s_waitcnt vmcnt(0) 1702; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v0 1703; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v1 1704; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v0 1705; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v1 1706; GFX9-NEXT: v_lshrrev_b32_e32 v10, 8, v2 1707; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1708; GFX9-NEXT: v_lshlrev_b32_sdwa v8, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1709; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2 1710; GFX9-NEXT: v_and_b32_sdwa v14, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1711; GFX9-NEXT: v_and_b32_sdwa v15, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1712; GFX9-NEXT: v_lshrrev_b32_e32 v12, 8, v3 1713; GFX9-NEXT: v_lshlrev_b32_sdwa v10, v5, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1714; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v7 1715; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v6 1716; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 1717; GFX9-NEXT: v_and_or_b32 v1, v1, s1, v8 1718; GFX9-NEXT: v_and_b32_sdwa v16, v2, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1719; GFX9-NEXT: v_lshrrev_b32_e32 v13, 24, v3 1720; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v5, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1721; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v11 1722; GFX9-NEXT: v_and_or_b32 v2, v2, s1, v10 1723; GFX9-NEXT: v_or3_b32 v0, v0, v14, v7 1724; GFX9-NEXT: v_or3_b32 v1, v1, v15, v9 1725; GFX9-NEXT: v_and_b32_sdwa v17, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1726; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1727; GFX9-NEXT: v_lshlrev_b32_e32 v12, 24, v13 1728; GFX9-NEXT: v_and_or_b32 v3, v3, v4, v5 1729; GFX9-NEXT: v_or3_b32 v2, v2, v16, v11 1730; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 2 1731; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1732; GFX9-NEXT: v_or3_b32 v3, v3, v17, v12 1733; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 3 1734; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1735; GFX9-NEXT: s_lshl_b32 s0, s2, 3 1736; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 1737; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1738; GFX9-NEXT: ; return to shader part epilog 1739; 1740; GFX8-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 1741; GFX8: ; %bb.0: 1742; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 1743; GFX8-NEXT: s_movk_i32 s0, 0xff 1744; GFX8-NEXT: v_mov_b32_e32 v5, 8 1745; GFX8-NEXT: v_mov_b32_e32 v6, 8 1746; GFX8-NEXT: v_mov_b32_e32 v7, s0 1747; GFX8-NEXT: v_mov_b32_e32 v4, 0xff 1748; GFX8-NEXT: s_lshr_b32 s0, s2, 2 1749; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 1750; GFX8-NEXT: s_and_b32 s1, s2, 3 1751; GFX8-NEXT: s_waitcnt vmcnt(0) 1752; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v0 1753; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v0 1754; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v1 1755; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1756; GFX8-NEXT: v_lshlrev_b32_e32 v8, 24, v9 1757; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v1 1758; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v6, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1759; GFX8-NEXT: v_lshrrev_b32_e32 v12, 8, v2 1760; GFX8-NEXT: v_and_b32_sdwa v16, v0, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1761; GFX8-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1762; GFX8-NEXT: v_and_b32_sdwa v7, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1763; GFX8-NEXT: v_or_b32_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1764; GFX8-NEXT: v_lshlrev_b32_e32 v10, 24, v11 1765; GFX8-NEXT: v_lshrrev_b32_e32 v14, 8, v3 1766; GFX8-NEXT: v_lshlrev_b32_sdwa v11, v6, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1767; GFX8-NEXT: v_or_b32_e32 v0, v0, v16 1768; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 1769; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v2 1770; GFX8-NEXT: v_and_b32_sdwa v17, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1771; GFX8-NEXT: v_or_b32_sdwa v2, v2, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1772; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1773; GFX8-NEXT: v_lshrrev_b32_e32 v15, 24, v3 1774; GFX8-NEXT: v_and_b32_sdwa v4, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1775; GFX8-NEXT: v_or_b32_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1776; GFX8-NEXT: v_lshlrev_b32_e32 v12, 24, v13 1777; GFX8-NEXT: v_or_b32_e32 v2, v2, v17 1778; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 1779; GFX8-NEXT: v_or_b32_e32 v1, v1, v10 1780; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1781; GFX8-NEXT: v_lshlrev_b32_e32 v13, 24, v15 1782; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 1783; GFX8-NEXT: v_or_b32_e32 v2, v2, v12 1784; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2 1785; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1786; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3 1787; GFX8-NEXT: v_or_b32_e32 v3, v3, v13 1788; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1789; GFX8-NEXT: s_lshl_b32 s0, s1, 3 1790; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 1791; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1792; GFX8-NEXT: ; return to shader part epilog 1793; 1794; GFX7-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 1795; GFX7: ; %bb.0: 1796; GFX7-NEXT: s_mov_b32 s6, 0 1797; GFX7-NEXT: s_mov_b32 s7, 0xf000 1798; GFX7-NEXT: s_mov_b64 s[4:5], 0 1799; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 1800; GFX7-NEXT: s_movk_i32 s0, 0xff 1801; GFX7-NEXT: v_mov_b32_e32 v4, 0xff 1802; GFX7-NEXT: s_lshr_b32 s1, s2, 2 1803; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 1 1804; GFX7-NEXT: s_and_b32 s2, s2, 3 1805; GFX7-NEXT: s_waitcnt vmcnt(0) 1806; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v0 1807; GFX7-NEXT: v_lshrrev_b32_e32 v8, 8, v1 1808; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v0 1809; GFX7-NEXT: v_lshrrev_b32_e32 v9, 16, v1 1810; GFX7-NEXT: v_lshrrev_b32_e32 v11, 8, v2 1811; GFX7-NEXT: v_and_b32_e32 v5, s0, v5 1812; GFX7-NEXT: v_and_b32_e32 v8, s0, v8 1813; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v0 1814; GFX7-NEXT: v_lshrrev_b32_e32 v10, 24, v1 1815; GFX7-NEXT: v_lshrrev_b32_e32 v12, 16, v2 1816; GFX7-NEXT: v_lshrrev_b32_e32 v14, 8, v3 1817; GFX7-NEXT: v_and_b32_e32 v6, s0, v6 1818; GFX7-NEXT: v_and_b32_e32 v9, s0, v9 1819; GFX7-NEXT: v_and_b32_e32 v11, s0, v11 1820; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 1821; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 1822; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 1823; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v8 1824; GFX7-NEXT: v_lshrrev_b32_e32 v13, 24, v2 1825; GFX7-NEXT: v_and_b32_e32 v12, v12, v4 1826; GFX7-NEXT: v_and_b32_e32 v14, v14, v4 1827; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 1828; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v11 1829; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v3 1830; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 1831; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 1832; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9 1833; GFX7-NEXT: v_or_b32_e32 v1, v1, v8 1834; GFX7-NEXT: v_lshrrev_b32_e32 v16, 24, v3 1835; GFX7-NEXT: v_and_b32_e32 v3, v3, v4 1836; GFX7-NEXT: v_and_b32_e32 v4, v15, v4 1837; GFX7-NEXT: v_lshlrev_b32_e32 v14, 8, v14 1838; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 1839; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 1840; GFX7-NEXT: v_lshlrev_b32_e32 v10, 24, v10 1841; GFX7-NEXT: v_or_b32_e32 v1, v1, v9 1842; GFX7-NEXT: v_lshlrev_b32_e32 v12, 16, v12 1843; GFX7-NEXT: v_or_b32_e32 v2, v2, v11 1844; GFX7-NEXT: v_lshlrev_b32_e32 v13, 24, v13 1845; GFX7-NEXT: v_or_b32_e32 v2, v2, v12 1846; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 1847; GFX7-NEXT: v_or_b32_e32 v3, v3, v14 1848; GFX7-NEXT: v_or_b32_e32 v0, v0, v7 1849; GFX7-NEXT: v_or_b32_e32 v1, v1, v10 1850; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1851; GFX7-NEXT: v_lshlrev_b32_e32 v15, 24, v16 1852; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 1853; GFX7-NEXT: v_or_b32_e32 v2, v2, v13 1854; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 2 1855; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1856; GFX7-NEXT: v_or_b32_e32 v3, v3, v15 1857; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 3 1858; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1859; GFX7-NEXT: s_lshl_b32 s0, s2, 3 1860; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 1861; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1862; GFX7-NEXT: ; return to shader part epilog 1863 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 1864 %element = extractelement <16 x i8> %vector, i32 %idx 1865 ret i8 %element 1866} 1867 1868define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 %idx) { 1869; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 1870; GFX9: ; %bb.0: 1871; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1872; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off 1873; GFX9-NEXT: s_mov_b32 s4, 8 1874; GFX9-NEXT: v_mov_b32_e32 v1, 8 1875; GFX9-NEXT: s_movk_i32 s5, 0xff 1876; GFX9-NEXT: v_lshrrev_b32_e32 v7, 2, v2 1877; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 1878; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 1879; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 1880; GFX9-NEXT: s_waitcnt vmcnt(0) 1881; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v3 1882; GFX9-NEXT: v_lshrrev_b32_e32 v10, 8, v4 1883; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v3 1884; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v4 1885; GFX9-NEXT: v_lshrrev_b32_e32 v12, 8, v5 1886; GFX9-NEXT: v_lshrrev_b32_e32 v14, 8, v6 1887; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1888; GFX9-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1889; GFX9-NEXT: v_lshrrev_b32_e32 v13, 24, v5 1890; GFX9-NEXT: v_and_b32_sdwa v16, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1891; GFX9-NEXT: v_and_b32_sdwa v17, v4, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1892; GFX9-NEXT: v_lshlrev_b32_sdwa v12, v1, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1893; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 1894; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v11 1895; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1896; GFX9-NEXT: v_and_or_b32 v3, v3, s5, v8 1897; GFX9-NEXT: v_and_or_b32 v4, v4, s5, v10 1898; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v6 1899; GFX9-NEXT: v_and_b32_sdwa v18, v5, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1900; GFX9-NEXT: v_and_b32_sdwa v19, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1901; GFX9-NEXT: v_and_or_b32 v0, v6, v0, v1 1902; GFX9-NEXT: v_or3_b32 v1, v3, v16, v9 1903; GFX9-NEXT: v_or3_b32 v3, v4, v17, v11 1904; GFX9-NEXT: v_lshlrev_b32_e32 v13, 24, v13 1905; GFX9-NEXT: v_and_or_b32 v5, v5, s5, v12 1906; GFX9-NEXT: v_lshlrev_b32_e32 v14, 24, v15 1907; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1908; GFX9-NEXT: v_or3_b32 v4, v5, v18, v13 1909; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v7 1910; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1911; GFX9-NEXT: v_or3_b32 v0, v0, v19, v14 1912; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v7 1913; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1914; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2 1915; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 1916; GFX9-NEXT: s_setpc_b64 s[30:31] 1917; 1918; GFX8-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 1919; GFX8: ; %bb.0: 1920; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1921; GFX8-NEXT: flat_load_dwordx4 v[3:6], v[0:1] 1922; GFX8-NEXT: s_movk_i32 s4, 0xff 1923; GFX8-NEXT: v_mov_b32_e32 v1, 8 1924; GFX8-NEXT: v_mov_b32_e32 v7, 8 1925; GFX8-NEXT: v_mov_b32_e32 v8, s4 1926; GFX8-NEXT: v_mov_b32_e32 v0, 0xff 1927; GFX8-NEXT: v_lshrrev_b32_e32 v9, 2, v2 1928; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9 1929; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 1930; GFX8-NEXT: s_waitcnt vmcnt(0) 1931; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v3 1932; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v3 1933; GFX8-NEXT: v_lshrrev_b32_e32 v12, 8, v4 1934; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1935; GFX8-NEXT: v_lshlrev_b32_e32 v10, 24, v11 1936; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v4 1937; GFX8-NEXT: v_lshlrev_b32_sdwa v11, v7, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1938; GFX8-NEXT: v_lshrrev_b32_e32 v14, 8, v5 1939; GFX8-NEXT: v_and_b32_sdwa v18, v3, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1940; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1941; GFX8-NEXT: v_lshlrev_b32_e32 v12, 24, v13 1942; GFX8-NEXT: v_lshrrev_b32_e32 v16, 8, v6 1943; GFX8-NEXT: v_lshlrev_b32_sdwa v13, v7, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1944; GFX8-NEXT: v_and_b32_sdwa v8, v4, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1945; GFX8-NEXT: v_or_b32_sdwa v3, v4, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1946; GFX8-NEXT: v_or_b32_e32 v1, v1, v18 1947; GFX8-NEXT: v_or_b32_e32 v3, v3, v8 1948; GFX8-NEXT: v_lshrrev_b32_e32 v15, 24, v5 1949; GFX8-NEXT: v_and_b32_sdwa v19, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1950; GFX8-NEXT: v_or_b32_sdwa v4, v5, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1951; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v7, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1952; GFX8-NEXT: v_lshrrev_b32_e32 v17, 24, v6 1953; GFX8-NEXT: v_lshlrev_b32_e32 v14, 24, v15 1954; GFX8-NEXT: v_or_b32_e32 v4, v4, v19 1955; GFX8-NEXT: v_and_b32_sdwa v0, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1956; GFX8-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1957; GFX8-NEXT: v_or_b32_e32 v1, v1, v10 1958; GFX8-NEXT: v_or_b32_e32 v3, v3, v12 1959; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1960; GFX8-NEXT: v_lshlrev_b32_e32 v15, 24, v17 1961; GFX8-NEXT: v_or_b32_e32 v0, v5, v0 1962; GFX8-NEXT: v_or_b32_e32 v4, v4, v14 1963; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9 1964; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1965; GFX8-NEXT: v_or_b32_e32 v0, v0, v15 1966; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9 1967; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1968; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 1969; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 1970; GFX8-NEXT: s_setpc_b64 s[30:31] 1971; 1972; GFX7-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 1973; GFX7: ; %bb.0: 1974; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1975; GFX7-NEXT: s_mov_b32 s6, 0 1976; GFX7-NEXT: s_mov_b32 s7, 0xf000 1977; GFX7-NEXT: s_mov_b64 s[4:5], 0 1978; GFX7-NEXT: buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64 1979; GFX7-NEXT: s_movk_i32 s4, 0xff 1980; GFX7-NEXT: v_mov_b32_e32 v0, 0xff 1981; GFX7-NEXT: v_lshrrev_b32_e32 v18, 2, v2 1982; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v18 1983; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 1984; GFX7-NEXT: s_waitcnt vmcnt(0) 1985; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v3 1986; GFX7-NEXT: v_lshrrev_b32_e32 v9, 8, v4 1987; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v3 1988; GFX7-NEXT: v_lshrrev_b32_e32 v10, 16, v4 1989; GFX7-NEXT: v_lshrrev_b32_e32 v12, 8, v5 1990; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 1991; GFX7-NEXT: v_and_b32_e32 v9, s4, v9 1992; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v3 1993; GFX7-NEXT: v_lshrrev_b32_e32 v11, 24, v4 1994; GFX7-NEXT: v_lshrrev_b32_e32 v13, 16, v5 1995; GFX7-NEXT: v_lshrrev_b32_e32 v15, 8, v6 1996; GFX7-NEXT: v_and_b32_e32 v7, s4, v7 1997; GFX7-NEXT: v_and_b32_e32 v10, s4, v10 1998; GFX7-NEXT: v_and_b32_e32 v12, s4, v12 1999; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 2000; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 2001; GFX7-NEXT: v_and_b32_e32 v4, s4, v4 2002; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 2003; GFX7-NEXT: v_lshrrev_b32_e32 v14, 24, v5 2004; GFX7-NEXT: v_and_b32_e32 v13, v13, v0 2005; GFX7-NEXT: v_and_b32_e32 v15, v15, v0 2006; GFX7-NEXT: v_or_b32_e32 v1, v3, v1 2007; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 2008; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v6 2009; GFX7-NEXT: v_and_b32_e32 v5, s4, v5 2010; GFX7-NEXT: v_lshlrev_b32_e32 v10, 16, v10 2011; GFX7-NEXT: v_or_b32_e32 v3, v4, v9 2012; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v12 2013; GFX7-NEXT: v_lshrrev_b32_e32 v17, 24, v6 2014; GFX7-NEXT: v_and_b32_e32 v6, v6, v0 2015; GFX7-NEXT: v_and_b32_e32 v0, v16, v0 2016; GFX7-NEXT: v_lshlrev_b32_e32 v8, 24, v8 2017; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 2018; GFX7-NEXT: v_lshlrev_b32_e32 v11, 24, v11 2019; GFX7-NEXT: v_or_b32_e32 v3, v3, v10 2020; GFX7-NEXT: v_lshlrev_b32_e32 v13, 16, v13 2021; GFX7-NEXT: v_or_b32_e32 v4, v5, v12 2022; GFX7-NEXT: v_lshlrev_b32_e32 v15, 8, v15 2023; GFX7-NEXT: v_lshlrev_b32_e32 v14, 24, v14 2024; GFX7-NEXT: v_or_b32_e32 v4, v4, v13 2025; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2026; GFX7-NEXT: v_or_b32_e32 v5, v6, v15 2027; GFX7-NEXT: v_or_b32_e32 v1, v1, v8 2028; GFX7-NEXT: v_or_b32_e32 v3, v3, v11 2029; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2030; GFX7-NEXT: v_lshlrev_b32_e32 v16, 24, v17 2031; GFX7-NEXT: v_or_b32_e32 v0, v5, v0 2032; GFX7-NEXT: v_or_b32_e32 v4, v4, v14 2033; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v18 2034; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2035; GFX7-NEXT: v_or_b32_e32 v0, v0, v16 2036; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v18 2037; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 2038; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v2 2039; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 2040; GFX7-NEXT: s_setpc_b64 s[30:31] 2041 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2042 %element = extractelement <16 x i8> %vector, i32 %idx 2043 ret i8 %element 2044} 2045 2046define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 %idx) { 2047; GCN-LABEL: extractelement_sgpr_v16i8_vgpr_idx: 2048; GCN: ; %bb.0: 2049; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 2050; GCN-NEXT: s_movk_i32 s16, 0xff 2051; GCN-NEXT: v_lshrrev_b32_e32 v1, 2, v0 2052; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2053; GCN-NEXT: v_and_b32_e32 v0, 3, v0 2054; GCN-NEXT: s_waitcnt lgkmcnt(0) 2055; GCN-NEXT: s_lshr_b32 s4, s0, 8 2056; GCN-NEXT: s_and_b32 s4, s4, s16 2057; GCN-NEXT: s_lshr_b32 s5, s0, 16 2058; GCN-NEXT: s_lshr_b32 s6, s0, 24 2059; GCN-NEXT: s_and_b32 s0, s0, s16 2060; GCN-NEXT: s_lshl_b32 s4, s4, 8 2061; GCN-NEXT: s_or_b32 s0, s0, s4 2062; GCN-NEXT: s_and_b32 s4, s5, s16 2063; GCN-NEXT: s_lshl_b32 s4, s4, 16 2064; GCN-NEXT: s_or_b32 s0, s0, s4 2065; GCN-NEXT: s_lshl_b32 s4, s6, 24 2066; GCN-NEXT: s_lshr_b32 s7, s1, 8 2067; GCN-NEXT: s_or_b32 s0, s0, s4 2068; GCN-NEXT: s_and_b32 s4, s7, s16 2069; GCN-NEXT: s_lshr_b32 s8, s1, 16 2070; GCN-NEXT: s_lshr_b32 s9, s1, 24 2071; GCN-NEXT: s_and_b32 s1, s1, s16 2072; GCN-NEXT: s_lshl_b32 s4, s4, 8 2073; GCN-NEXT: s_or_b32 s1, s1, s4 2074; GCN-NEXT: s_and_b32 s4, s8, s16 2075; GCN-NEXT: s_lshl_b32 s4, s4, 16 2076; GCN-NEXT: s_or_b32 s1, s1, s4 2077; GCN-NEXT: s_lshl_b32 s4, s9, 24 2078; GCN-NEXT: s_lshr_b32 s10, s2, 8 2079; GCN-NEXT: s_or_b32 s1, s1, s4 2080; GCN-NEXT: s_and_b32 s4, s10, s16 2081; GCN-NEXT: s_lshr_b32 s11, s2, 16 2082; GCN-NEXT: s_lshr_b32 s12, s2, 24 2083; GCN-NEXT: s_and_b32 s2, s2, s16 2084; GCN-NEXT: s_lshl_b32 s4, s4, 8 2085; GCN-NEXT: s_or_b32 s2, s2, s4 2086; GCN-NEXT: s_and_b32 s4, s11, s16 2087; GCN-NEXT: s_lshl_b32 s4, s4, 16 2088; GCN-NEXT: s_or_b32 s2, s2, s4 2089; GCN-NEXT: s_lshl_b32 s4, s12, 24 2090; GCN-NEXT: s_lshr_b32 s13, s3, 8 2091; GCN-NEXT: s_or_b32 s2, s2, s4 2092; GCN-NEXT: s_and_b32 s4, s13, s16 2093; GCN-NEXT: s_lshr_b32 s14, s3, 16 2094; GCN-NEXT: s_lshr_b32 s15, s3, 24 2095; GCN-NEXT: s_and_b32 s3, s3, s16 2096; GCN-NEXT: s_lshl_b32 s4, s4, 8 2097; GCN-NEXT: s_or_b32 s3, s3, s4 2098; GCN-NEXT: s_and_b32 s4, s14, s16 2099; GCN-NEXT: s_lshl_b32 s4, s4, 16 2100; GCN-NEXT: s_or_b32 s3, s3, s4 2101; GCN-NEXT: s_lshl_b32 s4, s15, 24 2102; GCN-NEXT: v_mov_b32_e32 v2, s0 2103; GCN-NEXT: v_mov_b32_e32 v3, s1 2104; GCN-NEXT: s_or_b32 s3, s3, s4 2105; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2106; GCN-NEXT: v_mov_b32_e32 v4, s2 2107; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 2108; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2109; GCN-NEXT: v_mov_b32_e32 v5, s3 2110; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 2111; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc 2112; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 2113; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1 2114; GCN-NEXT: v_readfirstlane_b32 s0, v0 2115; GCN-NEXT: ; return to shader part epilog 2116 %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr 2117 %element = extractelement <16 x i8> %vector, i32 %idx 2118 ret i8 %element 2119} 2120 2121define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) { 2122; GFX9-LABEL: extractelement_vgpr_v16i8_idx0: 2123; GFX9: ; %bb.0: 2124; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2125; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2126; GFX9-NEXT: s_mov_b32 s4, 8 2127; GFX9-NEXT: s_movk_i32 s5, 0xff 2128; GFX9-NEXT: s_waitcnt vmcnt(0) 2129; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 2130; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 2131; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2132; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2133; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 2134; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 2135; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2136; GFX9-NEXT: s_setpc_b64 s[30:31] 2137; 2138; GFX8-LABEL: extractelement_vgpr_v16i8_idx0: 2139; GFX8: ; %bb.0: 2140; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2141; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2142; GFX8-NEXT: s_movk_i32 s4, 0xff 2143; GFX8-NEXT: s_waitcnt vmcnt(0) 2144; GFX8-NEXT: v_mov_b32_e32 v1, 8 2145; GFX8-NEXT: v_mov_b32_e32 v2, s4 2146; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 2147; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2148; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 2149; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2150; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2151; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2152; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2153; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2154; GFX8-NEXT: s_setpc_b64 s[30:31] 2155; 2156; GFX7-LABEL: extractelement_vgpr_v16i8_idx0: 2157; GFX7: ; %bb.0: 2158; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2159; GFX7-NEXT: s_mov_b32 s6, 0 2160; GFX7-NEXT: s_mov_b32 s7, 0xf000 2161; GFX7-NEXT: s_mov_b64 s[4:5], 0 2162; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2163; GFX7-NEXT: s_movk_i32 s4, 0xff 2164; GFX7-NEXT: s_waitcnt vmcnt(0) 2165; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 2166; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 2167; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 2168; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 2169; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2170; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 2171; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 2172; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2173; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2174; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 2175; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2176; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2177; GFX7-NEXT: s_setpc_b64 s[30:31] 2178 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2179 %element = extractelement <16 x i8> %vector, i32 0 2180 ret i8 %element 2181} 2182 2183define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) { 2184; GFX9-LABEL: extractelement_vgpr_v16i8_idx1: 2185; GFX9: ; %bb.0: 2186; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2187; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2188; GFX9-NEXT: s_mov_b32 s4, 8 2189; GFX9-NEXT: s_movk_i32 s5, 0xff 2190; GFX9-NEXT: s_waitcnt vmcnt(0) 2191; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 2192; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 2193; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2194; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2195; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 2196; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 2197; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2198; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2199; GFX9-NEXT: s_setpc_b64 s[30:31] 2200; 2201; GFX8-LABEL: extractelement_vgpr_v16i8_idx1: 2202; GFX8: ; %bb.0: 2203; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2204; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2205; GFX8-NEXT: s_movk_i32 s4, 0xff 2206; GFX8-NEXT: s_waitcnt vmcnt(0) 2207; GFX8-NEXT: v_mov_b32_e32 v1, 8 2208; GFX8-NEXT: v_mov_b32_e32 v2, s4 2209; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 2210; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2211; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 2212; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2213; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2214; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2215; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2216; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2217; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2218; GFX8-NEXT: s_setpc_b64 s[30:31] 2219; 2220; GFX7-LABEL: extractelement_vgpr_v16i8_idx1: 2221; GFX7: ; %bb.0: 2222; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2223; GFX7-NEXT: s_mov_b32 s6, 0 2224; GFX7-NEXT: s_mov_b32 s7, 0xf000 2225; GFX7-NEXT: s_mov_b64 s[4:5], 0 2226; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2227; GFX7-NEXT: s_movk_i32 s4, 0xff 2228; GFX7-NEXT: s_waitcnt vmcnt(0) 2229; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 2230; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 2231; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 2232; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 2233; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2234; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 2235; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 2236; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2237; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2238; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 2239; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2240; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2241; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2242; GFX7-NEXT: s_setpc_b64 s[30:31] 2243 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2244 %element = extractelement <16 x i8> %vector, i32 1 2245 ret i8 %element 2246} 2247 2248define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) { 2249; GFX9-LABEL: extractelement_vgpr_v16i8_idx2: 2250; GFX9: ; %bb.0: 2251; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2252; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2253; GFX9-NEXT: s_mov_b32 s4, 8 2254; GFX9-NEXT: s_movk_i32 s5, 0xff 2255; GFX9-NEXT: s_waitcnt vmcnt(0) 2256; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 2257; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 2258; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2259; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2260; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 2261; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 2262; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2263; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2264; GFX9-NEXT: s_setpc_b64 s[30:31] 2265; 2266; GFX8-LABEL: extractelement_vgpr_v16i8_idx2: 2267; GFX8: ; %bb.0: 2268; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2269; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2270; GFX8-NEXT: s_movk_i32 s4, 0xff 2271; GFX8-NEXT: s_waitcnt vmcnt(0) 2272; GFX8-NEXT: v_mov_b32_e32 v1, 8 2273; GFX8-NEXT: v_mov_b32_e32 v2, s4 2274; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 2275; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2276; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 2277; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2278; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2279; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2280; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2281; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2282; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2283; GFX8-NEXT: s_setpc_b64 s[30:31] 2284; 2285; GFX7-LABEL: extractelement_vgpr_v16i8_idx2: 2286; GFX7: ; %bb.0: 2287; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2288; GFX7-NEXT: s_mov_b32 s6, 0 2289; GFX7-NEXT: s_mov_b32 s7, 0xf000 2290; GFX7-NEXT: s_mov_b64 s[4:5], 0 2291; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2292; GFX7-NEXT: s_movk_i32 s4, 0xff 2293; GFX7-NEXT: s_waitcnt vmcnt(0) 2294; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 2295; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 2296; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 2297; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 2298; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2299; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 2300; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 2301; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2302; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2303; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 2304; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2305; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2306; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2307; GFX7-NEXT: s_setpc_b64 s[30:31] 2308 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2309 %element = extractelement <16 x i8> %vector, i32 2 2310 ret i8 %element 2311} 2312 2313define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) { 2314; GFX9-LABEL: extractelement_vgpr_v16i8_idx3: 2315; GFX9: ; %bb.0: 2316; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2317; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2318; GFX9-NEXT: s_mov_b32 s4, 8 2319; GFX9-NEXT: s_movk_i32 s5, 0xff 2320; GFX9-NEXT: s_waitcnt vmcnt(0) 2321; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 2322; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 2323; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2324; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2325; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 2326; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 2327; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2328; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2329; GFX9-NEXT: s_setpc_b64 s[30:31] 2330; 2331; GFX8-LABEL: extractelement_vgpr_v16i8_idx3: 2332; GFX8: ; %bb.0: 2333; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2334; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2335; GFX8-NEXT: s_movk_i32 s4, 0xff 2336; GFX8-NEXT: s_waitcnt vmcnt(0) 2337; GFX8-NEXT: v_mov_b32_e32 v1, 8 2338; GFX8-NEXT: v_mov_b32_e32 v2, s4 2339; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 2340; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2341; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 2342; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2343; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2344; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2345; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2346; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2347; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2348; GFX8-NEXT: s_setpc_b64 s[30:31] 2349; 2350; GFX7-LABEL: extractelement_vgpr_v16i8_idx3: 2351; GFX7: ; %bb.0: 2352; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2353; GFX7-NEXT: s_mov_b32 s6, 0 2354; GFX7-NEXT: s_mov_b32 s7, 0xf000 2355; GFX7-NEXT: s_mov_b64 s[4:5], 0 2356; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2357; GFX7-NEXT: s_movk_i32 s4, 0xff 2358; GFX7-NEXT: s_waitcnt vmcnt(0) 2359; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 2360; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 2361; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 2362; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 2363; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2364; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 2365; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 2366; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2367; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2368; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 2369; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2370; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2371; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2372; GFX7-NEXT: s_setpc_b64 s[30:31] 2373 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2374 %element = extractelement <16 x i8> %vector, i32 3 2375 ret i8 %element 2376} 2377 2378define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) { 2379; GFX9-LABEL: extractelement_vgpr_v16i8_idx4: 2380; GFX9: ; %bb.0: 2381; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2382; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2383; GFX9-NEXT: s_mov_b32 s4, 8 2384; GFX9-NEXT: s_movk_i32 s5, 0xff 2385; GFX9-NEXT: s_waitcnt vmcnt(0) 2386; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 2387; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2388; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 2389; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2390; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 2391; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 2392; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2393; GFX9-NEXT: s_setpc_b64 s[30:31] 2394; 2395; GFX8-LABEL: extractelement_vgpr_v16i8_idx4: 2396; GFX8: ; %bb.0: 2397; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2398; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2399; GFX8-NEXT: s_movk_i32 s4, 0xff 2400; GFX8-NEXT: s_waitcnt vmcnt(0) 2401; GFX8-NEXT: v_mov_b32_e32 v0, 8 2402; GFX8-NEXT: v_mov_b32_e32 v2, s4 2403; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 2404; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2405; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 2406; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2407; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2408; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2409; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2410; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2411; GFX8-NEXT: s_setpc_b64 s[30:31] 2412; 2413; GFX7-LABEL: extractelement_vgpr_v16i8_idx4: 2414; GFX7: ; %bb.0: 2415; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2416; GFX7-NEXT: s_mov_b32 s6, 0 2417; GFX7-NEXT: s_mov_b32 s7, 0xf000 2418; GFX7-NEXT: s_mov_b64 s[4:5], 0 2419; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2420; GFX7-NEXT: s_movk_i32 s4, 0xff 2421; GFX7-NEXT: s_waitcnt vmcnt(0) 2422; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 2423; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 2424; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 2425; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 2426; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2427; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 2428; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 2429; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 2430; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2431; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 2432; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2433; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2434; GFX7-NEXT: s_setpc_b64 s[30:31] 2435 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2436 %element = extractelement <16 x i8> %vector, i32 4 2437 ret i8 %element 2438} 2439 2440define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) { 2441; GFX9-LABEL: extractelement_vgpr_v16i8_idx5: 2442; GFX9: ; %bb.0: 2443; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2444; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2445; GFX9-NEXT: s_mov_b32 s4, 8 2446; GFX9-NEXT: s_movk_i32 s5, 0xff 2447; GFX9-NEXT: s_waitcnt vmcnt(0) 2448; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 2449; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2450; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 2451; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2452; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 2453; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 2454; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2455; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2456; GFX9-NEXT: s_setpc_b64 s[30:31] 2457; 2458; GFX8-LABEL: extractelement_vgpr_v16i8_idx5: 2459; GFX8: ; %bb.0: 2460; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2461; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2462; GFX8-NEXT: s_movk_i32 s4, 0xff 2463; GFX8-NEXT: s_waitcnt vmcnt(0) 2464; GFX8-NEXT: v_mov_b32_e32 v0, 8 2465; GFX8-NEXT: v_mov_b32_e32 v2, s4 2466; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 2467; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2468; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 2469; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2470; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2471; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2472; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2473; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2474; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2475; GFX8-NEXT: s_setpc_b64 s[30:31] 2476; 2477; GFX7-LABEL: extractelement_vgpr_v16i8_idx5: 2478; GFX7: ; %bb.0: 2479; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2480; GFX7-NEXT: s_mov_b32 s6, 0 2481; GFX7-NEXT: s_mov_b32 s7, 0xf000 2482; GFX7-NEXT: s_mov_b64 s[4:5], 0 2483; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2484; GFX7-NEXT: s_movk_i32 s4, 0xff 2485; GFX7-NEXT: s_waitcnt vmcnt(0) 2486; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 2487; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 2488; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 2489; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 2490; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2491; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 2492; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 2493; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 2494; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2495; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 2496; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2497; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2498; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2499; GFX7-NEXT: s_setpc_b64 s[30:31] 2500 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2501 %element = extractelement <16 x i8> %vector, i32 5 2502 ret i8 %element 2503} 2504 2505define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) { 2506; GFX9-LABEL: extractelement_vgpr_v16i8_idx6: 2507; GFX9: ; %bb.0: 2508; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2509; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2510; GFX9-NEXT: s_mov_b32 s4, 8 2511; GFX9-NEXT: s_movk_i32 s5, 0xff 2512; GFX9-NEXT: s_waitcnt vmcnt(0) 2513; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 2514; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2515; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 2516; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2517; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 2518; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 2519; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2520; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2521; GFX9-NEXT: s_setpc_b64 s[30:31] 2522; 2523; GFX8-LABEL: extractelement_vgpr_v16i8_idx6: 2524; GFX8: ; %bb.0: 2525; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2526; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2527; GFX8-NEXT: s_movk_i32 s4, 0xff 2528; GFX8-NEXT: s_waitcnt vmcnt(0) 2529; GFX8-NEXT: v_mov_b32_e32 v0, 8 2530; GFX8-NEXT: v_mov_b32_e32 v2, s4 2531; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 2532; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2533; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 2534; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2535; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2536; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2537; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2538; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2539; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2540; GFX8-NEXT: s_setpc_b64 s[30:31] 2541; 2542; GFX7-LABEL: extractelement_vgpr_v16i8_idx6: 2543; GFX7: ; %bb.0: 2544; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2545; GFX7-NEXT: s_mov_b32 s6, 0 2546; GFX7-NEXT: s_mov_b32 s7, 0xf000 2547; GFX7-NEXT: s_mov_b64 s[4:5], 0 2548; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2549; GFX7-NEXT: s_movk_i32 s4, 0xff 2550; GFX7-NEXT: s_waitcnt vmcnt(0) 2551; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 2552; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 2553; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 2554; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 2555; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2556; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 2557; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 2558; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 2559; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2560; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 2561; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2562; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2563; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2564; GFX7-NEXT: s_setpc_b64 s[30:31] 2565 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2566 %element = extractelement <16 x i8> %vector, i32 6 2567 ret i8 %element 2568} 2569 2570define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) { 2571; GFX9-LABEL: extractelement_vgpr_v16i8_idx7: 2572; GFX9: ; %bb.0: 2573; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2574; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2575; GFX9-NEXT: s_mov_b32 s4, 8 2576; GFX9-NEXT: s_movk_i32 s5, 0xff 2577; GFX9-NEXT: s_waitcnt vmcnt(0) 2578; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 2579; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2580; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 2581; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2582; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 2583; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 2584; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2585; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2586; GFX9-NEXT: s_setpc_b64 s[30:31] 2587; 2588; GFX8-LABEL: extractelement_vgpr_v16i8_idx7: 2589; GFX8: ; %bb.0: 2590; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2591; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2592; GFX8-NEXT: s_movk_i32 s4, 0xff 2593; GFX8-NEXT: s_waitcnt vmcnt(0) 2594; GFX8-NEXT: v_mov_b32_e32 v0, 8 2595; GFX8-NEXT: v_mov_b32_e32 v2, s4 2596; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 2597; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2598; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 2599; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2600; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2601; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2602; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2603; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2604; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2605; GFX8-NEXT: s_setpc_b64 s[30:31] 2606; 2607; GFX7-LABEL: extractelement_vgpr_v16i8_idx7: 2608; GFX7: ; %bb.0: 2609; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2610; GFX7-NEXT: s_mov_b32 s6, 0 2611; GFX7-NEXT: s_mov_b32 s7, 0xf000 2612; GFX7-NEXT: s_mov_b64 s[4:5], 0 2613; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2614; GFX7-NEXT: s_movk_i32 s4, 0xff 2615; GFX7-NEXT: s_waitcnt vmcnt(0) 2616; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 2617; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 2618; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 2619; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 2620; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2621; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 2622; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 2623; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 2624; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2625; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 2626; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2627; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2628; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2629; GFX7-NEXT: s_setpc_b64 s[30:31] 2630 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2631 %element = extractelement <16 x i8> %vector, i32 7 2632 ret i8 %element 2633} 2634 2635define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) { 2636; GFX9-LABEL: extractelement_vgpr_v16i8_idx8: 2637; GFX9: ; %bb.0: 2638; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2639; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2640; GFX9-NEXT: s_mov_b32 s4, 8 2641; GFX9-NEXT: s_movk_i32 s5, 0xff 2642; GFX9-NEXT: s_waitcnt vmcnt(0) 2643; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v2 2644; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v2 2645; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2646; GFX9-NEXT: v_and_b32_sdwa v3, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2647; GFX9-NEXT: v_and_or_b32 v0, v2, s5, v0 2648; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2649; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2650; GFX9-NEXT: s_setpc_b64 s[30:31] 2651; 2652; GFX8-LABEL: extractelement_vgpr_v16i8_idx8: 2653; GFX8: ; %bb.0: 2654; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2655; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2656; GFX8-NEXT: s_movk_i32 s4, 0xff 2657; GFX8-NEXT: s_waitcnt vmcnt(0) 2658; GFX8-NEXT: v_mov_b32_e32 v0, 8 2659; GFX8-NEXT: v_mov_b32_e32 v1, s4 2660; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v2 2661; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2662; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v2 2663; GFX8-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2664; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2665; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2666; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2667; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2668; GFX8-NEXT: s_setpc_b64 s[30:31] 2669; 2670; GFX7-LABEL: extractelement_vgpr_v16i8_idx8: 2671; GFX7: ; %bb.0: 2672; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2673; GFX7-NEXT: s_mov_b32 s6, 0 2674; GFX7-NEXT: s_mov_b32 s7, 0xf000 2675; GFX7-NEXT: s_mov_b64 s[4:5], 0 2676; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2677; GFX7-NEXT: s_movk_i32 s4, 0xff 2678; GFX7-NEXT: s_waitcnt vmcnt(0) 2679; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v2 2680; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 2681; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 2682; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 2683; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v2 2684; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 2685; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 2686; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2687; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 2688; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2689; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2690; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2691; GFX7-NEXT: s_setpc_b64 s[30:31] 2692 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2693 %element = extractelement <16 x i8> %vector, i32 8 2694 ret i8 %element 2695} 2696 2697define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) { 2698; GFX9-LABEL: extractelement_vgpr_v16i8_idx9: 2699; GFX9: ; %bb.0: 2700; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2701; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2702; GFX9-NEXT: s_mov_b32 s4, 8 2703; GFX9-NEXT: s_movk_i32 s5, 0xff 2704; GFX9-NEXT: s_waitcnt vmcnt(0) 2705; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v2 2706; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v2 2707; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2708; GFX9-NEXT: v_and_b32_sdwa v3, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2709; GFX9-NEXT: v_and_or_b32 v0, v2, s5, v0 2710; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2711; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2712; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2713; GFX9-NEXT: s_setpc_b64 s[30:31] 2714; 2715; GFX8-LABEL: extractelement_vgpr_v16i8_idx9: 2716; GFX8: ; %bb.0: 2717; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2718; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2719; GFX8-NEXT: s_movk_i32 s4, 0xff 2720; GFX8-NEXT: s_waitcnt vmcnt(0) 2721; GFX8-NEXT: v_mov_b32_e32 v0, 8 2722; GFX8-NEXT: v_mov_b32_e32 v1, s4 2723; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v2 2724; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2725; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v2 2726; GFX8-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2727; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2728; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2729; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2730; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2731; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2732; GFX8-NEXT: s_setpc_b64 s[30:31] 2733; 2734; GFX7-LABEL: extractelement_vgpr_v16i8_idx9: 2735; GFX7: ; %bb.0: 2736; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2737; GFX7-NEXT: s_mov_b32 s6, 0 2738; GFX7-NEXT: s_mov_b32 s7, 0xf000 2739; GFX7-NEXT: s_mov_b64 s[4:5], 0 2740; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2741; GFX7-NEXT: s_movk_i32 s4, 0xff 2742; GFX7-NEXT: s_waitcnt vmcnt(0) 2743; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v2 2744; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 2745; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 2746; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 2747; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v2 2748; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 2749; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 2750; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2751; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 2752; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2753; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2754; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2755; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2756; GFX7-NEXT: s_setpc_b64 s[30:31] 2757 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2758 %element = extractelement <16 x i8> %vector, i32 9 2759 ret i8 %element 2760} 2761 2762define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) { 2763; GFX9-LABEL: extractelement_vgpr_v16i8_idx10: 2764; GFX9: ; %bb.0: 2765; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2766; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2767; GFX9-NEXT: s_mov_b32 s4, 8 2768; GFX9-NEXT: s_movk_i32 s5, 0xff 2769; GFX9-NEXT: s_waitcnt vmcnt(0) 2770; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v2 2771; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v2 2772; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2773; GFX9-NEXT: v_and_b32_sdwa v3, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2774; GFX9-NEXT: v_and_or_b32 v0, v2, s5, v0 2775; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2776; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2777; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2778; GFX9-NEXT: s_setpc_b64 s[30:31] 2779; 2780; GFX8-LABEL: extractelement_vgpr_v16i8_idx10: 2781; GFX8: ; %bb.0: 2782; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2783; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2784; GFX8-NEXT: s_movk_i32 s4, 0xff 2785; GFX8-NEXT: s_waitcnt vmcnt(0) 2786; GFX8-NEXT: v_mov_b32_e32 v0, 8 2787; GFX8-NEXT: v_mov_b32_e32 v1, s4 2788; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v2 2789; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2790; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v2 2791; GFX8-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2792; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2793; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2794; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2795; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2796; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2797; GFX8-NEXT: s_setpc_b64 s[30:31] 2798; 2799; GFX7-LABEL: extractelement_vgpr_v16i8_idx10: 2800; GFX7: ; %bb.0: 2801; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2802; GFX7-NEXT: s_mov_b32 s6, 0 2803; GFX7-NEXT: s_mov_b32 s7, 0xf000 2804; GFX7-NEXT: s_mov_b64 s[4:5], 0 2805; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2806; GFX7-NEXT: s_movk_i32 s4, 0xff 2807; GFX7-NEXT: s_waitcnt vmcnt(0) 2808; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v2 2809; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 2810; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 2811; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 2812; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v2 2813; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 2814; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 2815; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2816; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 2817; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2818; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2819; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2820; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2821; GFX7-NEXT: s_setpc_b64 s[30:31] 2822 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2823 %element = extractelement <16 x i8> %vector, i32 10 2824 ret i8 %element 2825} 2826 2827define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) { 2828; GFX9-LABEL: extractelement_vgpr_v16i8_idx11: 2829; GFX9: ; %bb.0: 2830; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2831; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2832; GFX9-NEXT: s_mov_b32 s4, 8 2833; GFX9-NEXT: s_movk_i32 s5, 0xff 2834; GFX9-NEXT: s_waitcnt vmcnt(0) 2835; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v2 2836; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v2 2837; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2838; GFX9-NEXT: v_and_b32_sdwa v3, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2839; GFX9-NEXT: v_and_or_b32 v0, v2, s5, v0 2840; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2841; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2842; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2843; GFX9-NEXT: s_setpc_b64 s[30:31] 2844; 2845; GFX8-LABEL: extractelement_vgpr_v16i8_idx11: 2846; GFX8: ; %bb.0: 2847; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2848; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2849; GFX8-NEXT: s_movk_i32 s4, 0xff 2850; GFX8-NEXT: s_waitcnt vmcnt(0) 2851; GFX8-NEXT: v_mov_b32_e32 v0, 8 2852; GFX8-NEXT: v_mov_b32_e32 v1, s4 2853; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v2 2854; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2855; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v2 2856; GFX8-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2857; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2858; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2859; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2860; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2861; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2862; GFX8-NEXT: s_setpc_b64 s[30:31] 2863; 2864; GFX7-LABEL: extractelement_vgpr_v16i8_idx11: 2865; GFX7: ; %bb.0: 2866; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2867; GFX7-NEXT: s_mov_b32 s6, 0 2868; GFX7-NEXT: s_mov_b32 s7, 0xf000 2869; GFX7-NEXT: s_mov_b64 s[4:5], 0 2870; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2871; GFX7-NEXT: s_movk_i32 s4, 0xff 2872; GFX7-NEXT: s_waitcnt vmcnt(0) 2873; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v2 2874; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 2875; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 2876; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 2877; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v2 2878; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 2879; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 2880; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2881; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 2882; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2883; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2884; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2885; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2886; GFX7-NEXT: s_setpc_b64 s[30:31] 2887 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2888 %element = extractelement <16 x i8> %vector, i32 11 2889 ret i8 %element 2890} 2891 2892define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) { 2893; GFX9-LABEL: extractelement_vgpr_v16i8_idx12: 2894; GFX9: ; %bb.0: 2895; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2896; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2897; GFX9-NEXT: s_mov_b32 s4, 8 2898; GFX9-NEXT: s_movk_i32 s5, 0xff 2899; GFX9-NEXT: s_waitcnt vmcnt(0) 2900; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v3 2901; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v3 2902; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2903; GFX9-NEXT: v_and_b32_sdwa v2, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2904; GFX9-NEXT: v_and_or_b32 v0, v3, s5, v0 2905; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2906; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 2907; GFX9-NEXT: s_setpc_b64 s[30:31] 2908; 2909; GFX8-LABEL: extractelement_vgpr_v16i8_idx12: 2910; GFX8: ; %bb.0: 2911; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2912; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2913; GFX8-NEXT: s_movk_i32 s4, 0xff 2914; GFX8-NEXT: s_waitcnt vmcnt(0) 2915; GFX8-NEXT: v_mov_b32_e32 v0, 8 2916; GFX8-NEXT: v_mov_b32_e32 v1, s4 2917; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v3 2918; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2919; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v3 2920; GFX8-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2921; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2922; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2923; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2924; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2925; GFX8-NEXT: s_setpc_b64 s[30:31] 2926; 2927; GFX7-LABEL: extractelement_vgpr_v16i8_idx12: 2928; GFX7: ; %bb.0: 2929; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2930; GFX7-NEXT: s_mov_b32 s6, 0 2931; GFX7-NEXT: s_mov_b32 s7, 0xf000 2932; GFX7-NEXT: s_mov_b64 s[4:5], 0 2933; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2934; GFX7-NEXT: s_movk_i32 s4, 0xff 2935; GFX7-NEXT: s_waitcnt vmcnt(0) 2936; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3 2937; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v3 2938; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 2939; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 2940; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v3 2941; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 2942; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 2943; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2944; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 2945; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2946; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v2 2947; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2948; GFX7-NEXT: s_setpc_b64 s[30:31] 2949 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2950 %element = extractelement <16 x i8> %vector, i32 12 2951 ret i8 %element 2952} 2953 2954define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) { 2955; GFX9-LABEL: extractelement_vgpr_v16i8_idx13: 2956; GFX9: ; %bb.0: 2957; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2958; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2959; GFX9-NEXT: s_mov_b32 s4, 8 2960; GFX9-NEXT: s_movk_i32 s5, 0xff 2961; GFX9-NEXT: s_waitcnt vmcnt(0) 2962; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v3 2963; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v3 2964; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2965; GFX9-NEXT: v_and_b32_sdwa v2, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2966; GFX9-NEXT: v_and_or_b32 v0, v3, s5, v0 2967; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2968; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 2969; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2970; GFX9-NEXT: s_setpc_b64 s[30:31] 2971; 2972; GFX8-LABEL: extractelement_vgpr_v16i8_idx13: 2973; GFX8: ; %bb.0: 2974; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2975; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2976; GFX8-NEXT: s_movk_i32 s4, 0xff 2977; GFX8-NEXT: s_waitcnt vmcnt(0) 2978; GFX8-NEXT: v_mov_b32_e32 v0, 8 2979; GFX8-NEXT: v_mov_b32_e32 v1, s4 2980; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v3 2981; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 2982; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v3 2983; GFX8-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2984; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2985; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2986; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2987; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2988; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2989; GFX8-NEXT: s_setpc_b64 s[30:31] 2990; 2991; GFX7-LABEL: extractelement_vgpr_v16i8_idx13: 2992; GFX7: ; %bb.0: 2993; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2994; GFX7-NEXT: s_mov_b32 s6, 0 2995; GFX7-NEXT: s_mov_b32 s7, 0xf000 2996; GFX7-NEXT: s_mov_b64 s[4:5], 0 2997; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2998; GFX7-NEXT: s_movk_i32 s4, 0xff 2999; GFX7-NEXT: s_waitcnt vmcnt(0) 3000; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3 3001; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v3 3002; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 3003; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 3004; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v3 3005; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 3006; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 3007; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3008; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 3009; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 3010; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v2 3011; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 3012; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3013; GFX7-NEXT: s_setpc_b64 s[30:31] 3014 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3015 %element = extractelement <16 x i8> %vector, i32 13 3016 ret i8 %element 3017} 3018 3019define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) { 3020; GFX9-LABEL: extractelement_vgpr_v16i8_idx14: 3021; GFX9: ; %bb.0: 3022; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3023; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3024; GFX9-NEXT: s_mov_b32 s4, 8 3025; GFX9-NEXT: s_movk_i32 s5, 0xff 3026; GFX9-NEXT: s_waitcnt vmcnt(0) 3027; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v3 3028; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v3 3029; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 3030; GFX9-NEXT: v_and_b32_sdwa v2, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3031; GFX9-NEXT: v_and_or_b32 v0, v3, s5, v0 3032; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 3033; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 3034; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3035; GFX9-NEXT: s_setpc_b64 s[30:31] 3036; 3037; GFX8-LABEL: extractelement_vgpr_v16i8_idx14: 3038; GFX8: ; %bb.0: 3039; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3040; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3041; GFX8-NEXT: s_movk_i32 s4, 0xff 3042; GFX8-NEXT: s_waitcnt vmcnt(0) 3043; GFX8-NEXT: v_mov_b32_e32 v0, 8 3044; GFX8-NEXT: v_mov_b32_e32 v1, s4 3045; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v3 3046; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 3047; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v3 3048; GFX8-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3049; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3050; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3051; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 3052; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3053; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3054; GFX8-NEXT: s_setpc_b64 s[30:31] 3055; 3056; GFX7-LABEL: extractelement_vgpr_v16i8_idx14: 3057; GFX7: ; %bb.0: 3058; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3059; GFX7-NEXT: s_mov_b32 s6, 0 3060; GFX7-NEXT: s_mov_b32 s7, 0xf000 3061; GFX7-NEXT: s_mov_b64 s[4:5], 0 3062; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3063; GFX7-NEXT: s_movk_i32 s4, 0xff 3064; GFX7-NEXT: s_waitcnt vmcnt(0) 3065; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3 3066; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v3 3067; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 3068; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 3069; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v3 3070; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 3071; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 3072; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3073; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 3074; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 3075; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v2 3076; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 3077; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3078; GFX7-NEXT: s_setpc_b64 s[30:31] 3079 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3080 %element = extractelement <16 x i8> %vector, i32 14 3081 ret i8 %element 3082} 3083 3084define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) { 3085; GFX9-LABEL: extractelement_vgpr_v16i8_idx15: 3086; GFX9: ; %bb.0: 3087; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3088; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3089; GFX9-NEXT: s_mov_b32 s4, 8 3090; GFX9-NEXT: s_movk_i32 s5, 0xff 3091; GFX9-NEXT: s_waitcnt vmcnt(0) 3092; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v3 3093; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v3 3094; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 3095; GFX9-NEXT: v_and_b32_sdwa v2, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3096; GFX9-NEXT: v_and_or_b32 v0, v3, s5, v0 3097; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 3098; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 3099; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3100; GFX9-NEXT: s_setpc_b64 s[30:31] 3101; 3102; GFX8-LABEL: extractelement_vgpr_v16i8_idx15: 3103; GFX8: ; %bb.0: 3104; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3105; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3106; GFX8-NEXT: s_movk_i32 s4, 0xff 3107; GFX8-NEXT: s_waitcnt vmcnt(0) 3108; GFX8-NEXT: v_mov_b32_e32 v0, 8 3109; GFX8-NEXT: v_mov_b32_e32 v1, s4 3110; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v3 3111; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 3112; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v3 3113; GFX8-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3114; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3115; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3116; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 3117; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3118; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3119; GFX8-NEXT: s_setpc_b64 s[30:31] 3120; 3121; GFX7-LABEL: extractelement_vgpr_v16i8_idx15: 3122; GFX7: ; %bb.0: 3123; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3124; GFX7-NEXT: s_mov_b32 s6, 0 3125; GFX7-NEXT: s_mov_b32 s7, 0xf000 3126; GFX7-NEXT: s_mov_b64 s[4:5], 0 3127; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3128; GFX7-NEXT: s_movk_i32 s4, 0xff 3129; GFX7-NEXT: s_waitcnt vmcnt(0) 3130; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3 3131; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v3 3132; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 3133; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 3134; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v3 3135; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 3136; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 3137; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3138; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 3139; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 3140; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v2 3141; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 3142; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3143; GFX7-NEXT: s_setpc_b64 s[30:31] 3144 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3145 %element = extractelement <16 x i8> %vector, i32 15 3146 ret i8 %element 3147} 3148