1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 6 7define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { 8; GCN-LABEL: extractelement_sgpr_v4i8_sgpr_idx: 9; GCN: ; %bb.0: 10; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 11; GCN-NEXT: s_waitcnt lgkmcnt(0) 12; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 13; GCN-NEXT: s_lshr_b32 s1, s0, 24 14; GCN-NEXT: s_and_b32 s2, s0, 0xff 15; GCN-NEXT: s_lshl_b32 s3, s3, 8 16; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 17; GCN-NEXT: s_or_b32 s2, s2, s3 18; GCN-NEXT: s_lshl_b32 s0, s0, 16 19; GCN-NEXT: s_or_b32 s0, s2, s0 20; GCN-NEXT: s_lshl_b32 s1, s1, 24 21; GCN-NEXT: s_or_b32 s0, s0, s1 22; GCN-NEXT: s_and_b32 s1, s4, 3 23; GCN-NEXT: s_lshl_b32 s1, s1, 3 24; GCN-NEXT: s_lshr_b32 s0, s0, s1 25; GCN-NEXT: ; return to shader part epilog 26; 27; GFX10-LABEL: extractelement_sgpr_v4i8_sgpr_idx: 28; GFX10: ; %bb.0: 29; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 30; GFX10-NEXT: s_waitcnt lgkmcnt(0) 31; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80008 32; GFX10-NEXT: s_lshr_b32 s1, s0, 24 33; GFX10-NEXT: s_and_b32 s2, s0, 0xff 34; GFX10-NEXT: s_bfe_u32 s0, s0, 0x80010 35; GFX10-NEXT: s_lshl_b32 s3, s3, 8 36; GFX10-NEXT: s_lshl_b32 s0, s0, 16 37; GFX10-NEXT: s_or_b32 s2, s2, s3 38; GFX10-NEXT: s_lshl_b32 s1, s1, 24 39; GFX10-NEXT: s_or_b32 s0, s2, s0 40; GFX10-NEXT: s_and_b32 s2, s4, 3 41; GFX10-NEXT: s_or_b32 s0, s0, s1 42; GFX10-NEXT: s_lshl_b32 s1, s2, 3 43; GFX10-NEXT: s_lshr_b32 s0, s0, s1 44; GFX10-NEXT: ; return to shader part epilog 45 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 46 %element = extractelement <4 x i8> %vector, i32 %idx 47 ret i8 %element 48} 49 50define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 inreg %idx) { 51; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 52; GFX9: ; %bb.0: 53; GFX9-NEXT: global_load_dword v0, v[0:1], off 54; GFX9-NEXT: v_mov_b32_e32 v2, 8 55; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 56; GFX9-NEXT: v_mov_b32_e32 v3, 16 57; GFX9-NEXT: s_and_b32 s0, s2, 3 58; GFX9-NEXT: s_lshl_b32 s0, s0, 3 59; GFX9-NEXT: s_waitcnt vmcnt(0) 60; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 61; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 62; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 63; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 64; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 65; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 66; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 67; GFX9-NEXT: v_readfirstlane_b32 s0, v0 68; GFX9-NEXT: ; return to shader part epilog 69; 70; GFX8-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 71; GFX8: ; %bb.0: 72; GFX8-NEXT: flat_load_dword v0, v[0:1] 73; GFX8-NEXT: v_mov_b32_e32 v1, 8 74; GFX8-NEXT: v_mov_b32_e32 v2, 16 75; GFX8-NEXT: s_and_b32 s0, s2, 3 76; GFX8-NEXT: s_lshl_b32 s0, s0, 3 77; GFX8-NEXT: s_waitcnt vmcnt(0) 78; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 79; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 80; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 81; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 82; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 83; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 84; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 85; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 86; GFX8-NEXT: v_readfirstlane_b32 s0, v0 87; GFX8-NEXT: ; return to shader part epilog 88; 89; GFX7-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 90; GFX7: ; %bb.0: 91; GFX7-NEXT: s_mov_b32 s6, 0 92; GFX7-NEXT: s_mov_b32 s7, 0xf000 93; GFX7-NEXT: s_mov_b64 s[4:5], 0 94; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 95; GFX7-NEXT: s_and_b32 s0, s2, 3 96; GFX7-NEXT: s_lshl_b32 s0, s0, 3 97; GFX7-NEXT: s_waitcnt vmcnt(0) 98; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 99; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 100; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 101; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 102; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 103; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 104; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 105; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 106; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 107; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 108; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 109; GFX7-NEXT: v_readfirstlane_b32 s0, v0 110; GFX7-NEXT: ; return to shader part epilog 111; 112; GFX10-LABEL: extractelement_vgpr_v4i8_sgpr_idx: 113; GFX10: ; %bb.0: 114; GFX10-NEXT: global_load_dword v0, v[0:1], off 115; GFX10-NEXT: v_mov_b32_e32 v1, 8 116; GFX10-NEXT: v_mov_b32_e32 v2, 16 117; GFX10-NEXT: s_and_b32 s0, s2, 3 118; GFX10-NEXT: s_lshl_b32 s0, s0, 3 119; GFX10-NEXT: s_waitcnt vmcnt(0) 120; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 121; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 122; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 123; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 124; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 125; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 126; GFX10-NEXT: v_lshrrev_b32_e32 v0, s0, v0 127; GFX10-NEXT: v_readfirstlane_b32 s0, v0 128; GFX10-NEXT: ; return to shader part epilog 129 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 130 %element = extractelement <4 x i8> %vector, i32 %idx 131 ret i8 %element 132} 133 134define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %idx) { 135; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 136; GFX9: ; %bb.0: 137; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 138; GFX9-NEXT: global_load_dword v0, v[0:1], off 139; GFX9-NEXT: v_mov_b32_e32 v3, 8 140; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 141; GFX9-NEXT: v_mov_b32_e32 v4, 16 142; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 143; GFX9-NEXT: s_waitcnt vmcnt(0) 144; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v0 145; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 146; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 147; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v3 148; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v5 149; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 150; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2 151; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 152; GFX9-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX8-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 155; GFX8: ; %bb.0: 156; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX8-NEXT: flat_load_dword v0, v[0:1] 158; GFX8-NEXT: v_mov_b32_e32 v1, 8 159; GFX8-NEXT: v_mov_b32_e32 v3, 16 160; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 161; GFX8-NEXT: s_waitcnt vmcnt(0) 162; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 163; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 164; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 165; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 166; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 167; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 168; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 169; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 170; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 171; GFX8-NEXT: s_setpc_b64 s[30:31] 172; 173; GFX7-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 174; GFX7: ; %bb.0: 175; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 176; GFX7-NEXT: s_mov_b32 s6, 0 177; GFX7-NEXT: s_mov_b32 s7, 0xf000 178; GFX7-NEXT: s_mov_b64 s[4:5], 0 179; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 180; GFX7-NEXT: v_and_b32_e32 v1, 3, v2 181; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v1 182; GFX7-NEXT: s_waitcnt vmcnt(0) 183; GFX7-NEXT: v_bfe_u32 v4, v0, 8, 8 184; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 185; GFX7-NEXT: v_and_b32_e32 v3, 0xff, v0 186; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 187; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 188; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 189; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 190; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 191; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 192; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 193; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 194; GFX7-NEXT: s_setpc_b64 s[30:31] 195; 196; GFX10-LABEL: extractelement_vgpr_v4i8_vgpr_idx: 197; GFX10: ; %bb.0: 198; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 199; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 200; GFX10-NEXT: global_load_dword v0, v[0:1], off 201; GFX10-NEXT: v_mov_b32_e32 v1, 8 202; GFX10-NEXT: v_mov_b32_e32 v3, 16 203; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 204; GFX10-NEXT: s_waitcnt vmcnt(0) 205; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 206; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 207; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 208; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 209; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v4 210; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 211; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v2 212; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0 213; GFX10-NEXT: s_setpc_b64 s[30:31] 214 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 215 %element = extractelement <4 x i8> %vector, i32 %idx 216 ret i8 %element 217} 218 219define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 %idx) { 220; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 221; GFX9: ; %bb.0: 222; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 223; GFX9-NEXT: v_and_b32_e32 v0, 3, v0 224; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 225; GFX9-NEXT: s_waitcnt lgkmcnt(0) 226; GFX9-NEXT: s_bfe_u32 s3, s0, 0x80008 227; GFX9-NEXT: s_lshr_b32 s1, s0, 24 228; GFX9-NEXT: s_and_b32 s2, s0, 0xff 229; GFX9-NEXT: s_lshl_b32 s3, s3, 8 230; GFX9-NEXT: s_bfe_u32 s0, s0, 0x80010 231; GFX9-NEXT: s_or_b32 s2, s2, s3 232; GFX9-NEXT: s_lshl_b32 s0, s0, 16 233; GFX9-NEXT: s_or_b32 s0, s2, s0 234; GFX9-NEXT: s_lshl_b32 s1, s1, 24 235; GFX9-NEXT: s_or_b32 s0, s0, s1 236; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s0 237; GFX9-NEXT: v_readfirstlane_b32 s0, v0 238; GFX9-NEXT: ; return to shader part epilog 239; 240; GFX8-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 241; GFX8: ; %bb.0: 242; GFX8-NEXT: s_load_dword s0, s[2:3], 0x0 243; GFX8-NEXT: v_and_b32_e32 v0, 3, v0 244; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0 245; GFX8-NEXT: s_waitcnt lgkmcnt(0) 246; GFX8-NEXT: s_bfe_u32 s3, s0, 0x80008 247; GFX8-NEXT: s_lshr_b32 s1, s0, 24 248; GFX8-NEXT: s_and_b32 s2, s0, 0xff 249; GFX8-NEXT: s_lshl_b32 s3, s3, 8 250; GFX8-NEXT: s_bfe_u32 s0, s0, 0x80010 251; GFX8-NEXT: s_or_b32 s2, s2, s3 252; GFX8-NEXT: s_lshl_b32 s0, s0, 16 253; GFX8-NEXT: s_or_b32 s0, s2, s0 254; GFX8-NEXT: s_lshl_b32 s1, s1, 24 255; GFX8-NEXT: s_or_b32 s0, s0, s1 256; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s0 257; GFX8-NEXT: v_readfirstlane_b32 s0, v0 258; GFX8-NEXT: ; return to shader part epilog 259; 260; GFX7-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 261; GFX7: ; %bb.0: 262; GFX7-NEXT: s_load_dword s0, s[2:3], 0x0 263; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 264; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 265; GFX7-NEXT: s_waitcnt lgkmcnt(0) 266; GFX7-NEXT: s_bfe_u32 s3, s0, 0x80008 267; GFX7-NEXT: s_lshr_b32 s1, s0, 24 268; GFX7-NEXT: s_and_b32 s2, s0, 0xff 269; GFX7-NEXT: s_lshl_b32 s3, s3, 8 270; GFX7-NEXT: s_bfe_u32 s0, s0, 0x80010 271; GFX7-NEXT: s_or_b32 s2, s2, s3 272; GFX7-NEXT: s_lshl_b32 s0, s0, 16 273; GFX7-NEXT: s_or_b32 s0, s2, s0 274; GFX7-NEXT: s_lshl_b32 s1, s1, 24 275; GFX7-NEXT: s_or_b32 s0, s0, s1 276; GFX7-NEXT: v_lshr_b32_e32 v0, s0, v0 277; GFX7-NEXT: v_readfirstlane_b32 s0, v0 278; GFX7-NEXT: ; return to shader part epilog 279; 280; GFX10-LABEL: extractelement_sgpr_v4i8_vgpr_idx: 281; GFX10: ; %bb.0: 282; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 283; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 284; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 285; GFX10-NEXT: s_waitcnt lgkmcnt(0) 286; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 287; GFX10-NEXT: s_and_b32 s1, s0, 0xff 288; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 289; GFX10-NEXT: s_lshl_b32 s2, s2, 8 290; GFX10-NEXT: s_lshl_b32 s3, s3, 16 291; GFX10-NEXT: s_or_b32 s1, s1, s2 292; GFX10-NEXT: s_lshr_b32 s0, s0, 24 293; GFX10-NEXT: s_or_b32 s1, s1, s3 294; GFX10-NEXT: s_lshl_b32 s0, s0, 24 295; GFX10-NEXT: s_or_b32 s0, s1, s0 296; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s0 297; GFX10-NEXT: v_readfirstlane_b32 s0, v0 298; GFX10-NEXT: ; return to shader part epilog 299 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 300 %element = extractelement <4 x i8> %vector, i32 %idx 301 ret i8 %element 302} 303 304define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(<4 x i8> addrspace(4)* inreg %ptr) { 305; GCN-LABEL: extractelement_sgpr_v4i8_idx0: 306; GCN: ; %bb.0: 307; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 308; GCN-NEXT: s_waitcnt lgkmcnt(0) 309; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 310; GCN-NEXT: s_lshr_b32 s1, s0, 24 311; GCN-NEXT: s_and_b32 s2, s0, 0xff 312; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 313; GCN-NEXT: s_lshl_b32 s3, s3, 8 314; GCN-NEXT: s_or_b32 s2, s2, s3 315; GCN-NEXT: s_lshl_b32 s0, s0, 16 316; GCN-NEXT: s_or_b32 s0, s2, s0 317; GCN-NEXT: s_lshl_b32 s1, s1, 24 318; GCN-NEXT: s_or_b32 s0, s0, s1 319; GCN-NEXT: ; return to shader part epilog 320; 321; GFX10-LABEL: extractelement_sgpr_v4i8_idx0: 322; GFX10: ; %bb.0: 323; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 324; GFX10-NEXT: s_waitcnt lgkmcnt(0) 325; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 326; GFX10-NEXT: s_and_b32 s1, s0, 0xff 327; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 328; GFX10-NEXT: s_lshl_b32 s2, s2, 8 329; GFX10-NEXT: s_lshl_b32 s3, s3, 16 330; GFX10-NEXT: s_or_b32 s1, s1, s2 331; GFX10-NEXT: s_lshr_b32 s0, s0, 24 332; GFX10-NEXT: s_or_b32 s1, s1, s3 333; GFX10-NEXT: s_lshl_b32 s0, s0, 24 334; GFX10-NEXT: s_or_b32 s0, s1, s0 335; GFX10-NEXT: ; return to shader part epilog 336 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 337 %element = extractelement <4 x i8> %vector, i32 0 338 ret i8 %element 339} 340 341define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg %ptr) { 342; GCN-LABEL: extractelement_sgpr_v4i8_idx1: 343; GCN: ; %bb.0: 344; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 345; GCN-NEXT: s_waitcnt lgkmcnt(0) 346; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 347; GCN-NEXT: s_lshr_b32 s1, s0, 24 348; GCN-NEXT: s_and_b32 s2, s0, 0xff 349; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 350; GCN-NEXT: s_lshl_b32 s3, s3, 8 351; GCN-NEXT: s_or_b32 s2, s2, s3 352; GCN-NEXT: s_lshl_b32 s0, s0, 16 353; GCN-NEXT: s_or_b32 s0, s2, s0 354; GCN-NEXT: s_lshl_b32 s1, s1, 24 355; GCN-NEXT: s_or_b32 s0, s0, s1 356; GCN-NEXT: s_lshr_b32 s0, s0, 8 357; GCN-NEXT: ; return to shader part epilog 358; 359; GFX10-LABEL: extractelement_sgpr_v4i8_idx1: 360; GFX10: ; %bb.0: 361; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 362; GFX10-NEXT: s_waitcnt lgkmcnt(0) 363; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 364; GFX10-NEXT: s_and_b32 s1, s0, 0xff 365; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 366; GFX10-NEXT: s_lshl_b32 s2, s2, 8 367; GFX10-NEXT: s_lshl_b32 s3, s3, 16 368; GFX10-NEXT: s_or_b32 s1, s1, s2 369; GFX10-NEXT: s_lshr_b32 s0, s0, 24 370; GFX10-NEXT: s_or_b32 s1, s1, s3 371; GFX10-NEXT: s_lshl_b32 s0, s0, 24 372; GFX10-NEXT: s_or_b32 s0, s1, s0 373; GFX10-NEXT: s_lshr_b32 s0, s0, 8 374; GFX10-NEXT: ; return to shader part epilog 375 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 376 %element = extractelement <4 x i8> %vector, i32 1 377 ret i8 %element 378} 379 380define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg %ptr) { 381; GCN-LABEL: extractelement_sgpr_v4i8_idx2: 382; GCN: ; %bb.0: 383; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 384; GCN-NEXT: s_waitcnt lgkmcnt(0) 385; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 386; GCN-NEXT: s_lshr_b32 s1, s0, 24 387; GCN-NEXT: s_and_b32 s2, s0, 0xff 388; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 389; GCN-NEXT: s_lshl_b32 s3, s3, 8 390; GCN-NEXT: s_or_b32 s2, s2, s3 391; GCN-NEXT: s_lshl_b32 s0, s0, 16 392; GCN-NEXT: s_or_b32 s0, s2, s0 393; GCN-NEXT: s_lshl_b32 s1, s1, 24 394; GCN-NEXT: s_or_b32 s0, s0, s1 395; GCN-NEXT: s_lshr_b32 s0, s0, 16 396; GCN-NEXT: ; return to shader part epilog 397; 398; GFX10-LABEL: extractelement_sgpr_v4i8_idx2: 399; GFX10: ; %bb.0: 400; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 401; GFX10-NEXT: s_waitcnt lgkmcnt(0) 402; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 403; GFX10-NEXT: s_and_b32 s1, s0, 0xff 404; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 405; GFX10-NEXT: s_lshl_b32 s2, s2, 8 406; GFX10-NEXT: s_lshl_b32 s3, s3, 16 407; GFX10-NEXT: s_or_b32 s1, s1, s2 408; GFX10-NEXT: s_lshr_b32 s0, s0, 24 409; GFX10-NEXT: s_or_b32 s1, s1, s3 410; GFX10-NEXT: s_lshl_b32 s0, s0, 24 411; GFX10-NEXT: s_or_b32 s0, s1, s0 412; GFX10-NEXT: s_lshr_b32 s0, s0, 16 413; GFX10-NEXT: ; return to shader part epilog 414 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 415 %element = extractelement <4 x i8> %vector, i32 2 416 ret i8 %element 417} 418 419define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(<4 x i8> addrspace(4)* inreg %ptr) { 420; GCN-LABEL: extractelement_sgpr_v4i8_idx3: 421; GCN: ; %bb.0: 422; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 423; GCN-NEXT: s_waitcnt lgkmcnt(0) 424; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 425; GCN-NEXT: s_lshr_b32 s1, s0, 24 426; GCN-NEXT: s_and_b32 s2, s0, 0xff 427; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 428; GCN-NEXT: s_lshl_b32 s3, s3, 8 429; GCN-NEXT: s_or_b32 s2, s2, s3 430; GCN-NEXT: s_lshl_b32 s0, s0, 16 431; GCN-NEXT: s_or_b32 s0, s2, s0 432; GCN-NEXT: s_lshl_b32 s1, s1, 24 433; GCN-NEXT: s_or_b32 s0, s0, s1 434; GCN-NEXT: s_lshr_b32 s0, s0, 24 435; GCN-NEXT: ; return to shader part epilog 436; 437; GFX10-LABEL: extractelement_sgpr_v4i8_idx3: 438; GFX10: ; %bb.0: 439; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 440; GFX10-NEXT: s_waitcnt lgkmcnt(0) 441; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 442; GFX10-NEXT: s_and_b32 s1, s0, 0xff 443; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 444; GFX10-NEXT: s_lshl_b32 s2, s2, 8 445; GFX10-NEXT: s_lshl_b32 s3, s3, 16 446; GFX10-NEXT: s_or_b32 s1, s1, s2 447; GFX10-NEXT: s_lshr_b32 s0, s0, 24 448; GFX10-NEXT: s_or_b32 s1, s1, s3 449; GFX10-NEXT: s_lshl_b32 s0, s0, 24 450; GFX10-NEXT: s_or_b32 s0, s1, s0 451; GFX10-NEXT: s_lshr_b32 s0, s0, 24 452; GFX10-NEXT: ; return to shader part epilog 453 %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr 454 %element = extractelement <4 x i8> %vector, i32 3 455 ret i8 %element 456} 457 458define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) { 459; GFX9-LABEL: extractelement_vgpr_v4i8_idx0: 460; GFX9: ; %bb.0: 461; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 462; GFX9-NEXT: global_load_dword v0, v[0:1], off 463; GFX9-NEXT: v_mov_b32_e32 v2, 8 464; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 465; GFX9-NEXT: v_mov_b32_e32 v3, 16 466; GFX9-NEXT: s_waitcnt vmcnt(0) 467; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 468; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 469; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 470; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 471; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 472; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 473; GFX9-NEXT: s_setpc_b64 s[30:31] 474; 475; GFX8-LABEL: extractelement_vgpr_v4i8_idx0: 476; GFX8: ; %bb.0: 477; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 478; GFX8-NEXT: flat_load_dword v0, v[0:1] 479; GFX8-NEXT: v_mov_b32_e32 v1, 8 480; GFX8-NEXT: v_mov_b32_e32 v2, 16 481; GFX8-NEXT: s_waitcnt vmcnt(0) 482; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 483; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 484; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 485; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 486; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 487; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 488; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 489; GFX8-NEXT: s_setpc_b64 s[30:31] 490; 491; GFX7-LABEL: extractelement_vgpr_v4i8_idx0: 492; GFX7: ; %bb.0: 493; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 494; GFX7-NEXT: s_mov_b32 s6, 0 495; GFX7-NEXT: s_mov_b32 s7, 0xf000 496; GFX7-NEXT: s_mov_b64 s[4:5], 0 497; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 498; GFX7-NEXT: s_waitcnt vmcnt(0) 499; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 500; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 501; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 502; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 503; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 504; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 505; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 506; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 507; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 508; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 509; GFX7-NEXT: s_setpc_b64 s[30:31] 510; 511; GFX10-LABEL: extractelement_vgpr_v4i8_idx0: 512; GFX10: ; %bb.0: 513; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 514; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 515; GFX10-NEXT: global_load_dword v0, v[0:1], off 516; GFX10-NEXT: v_mov_b32_e32 v1, 8 517; GFX10-NEXT: v_mov_b32_e32 v2, 16 518; GFX10-NEXT: s_waitcnt vmcnt(0) 519; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 520; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 521; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 522; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 523; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 524; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 525; GFX10-NEXT: s_setpc_b64 s[30:31] 526 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 527 %element = extractelement <4 x i8> %vector, i32 0 528 ret i8 %element 529} 530 531define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) { 532; GFX9-LABEL: extractelement_vgpr_v4i8_idx1: 533; GFX9: ; %bb.0: 534; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 535; GFX9-NEXT: global_load_dword v0, v[0:1], off 536; GFX9-NEXT: s_mov_b32 s4, 8 537; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 538; GFX9-NEXT: v_mov_b32_e32 v2, 16 539; GFX9-NEXT: s_waitcnt vmcnt(0) 540; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 541; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 542; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 543; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v4 544; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 545; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 546; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 547; GFX9-NEXT: s_setpc_b64 s[30:31] 548; 549; GFX8-LABEL: extractelement_vgpr_v4i8_idx1: 550; GFX8: ; %bb.0: 551; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 552; GFX8-NEXT: flat_load_dword v0, v[0:1] 553; GFX8-NEXT: v_mov_b32_e32 v1, 8 554; GFX8-NEXT: v_mov_b32_e32 v2, 16 555; GFX8-NEXT: s_waitcnt vmcnt(0) 556; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 557; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 558; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 559; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 560; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 561; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 562; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 563; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 564; GFX8-NEXT: s_setpc_b64 s[30:31] 565; 566; GFX7-LABEL: extractelement_vgpr_v4i8_idx1: 567; GFX7: ; %bb.0: 568; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 569; GFX7-NEXT: s_mov_b32 s6, 0 570; GFX7-NEXT: s_mov_b32 s7, 0xf000 571; GFX7-NEXT: s_mov_b64 s[4:5], 0 572; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 573; GFX7-NEXT: s_waitcnt vmcnt(0) 574; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 575; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 576; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 577; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 578; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 579; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 580; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 581; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 582; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 583; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 584; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 585; GFX7-NEXT: s_setpc_b64 s[30:31] 586; 587; GFX10-LABEL: extractelement_vgpr_v4i8_idx1: 588; GFX10: ; %bb.0: 589; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 590; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 591; GFX10-NEXT: global_load_dword v0, v[0:1], off 592; GFX10-NEXT: s_mov_b32 s4, 8 593; GFX10-NEXT: v_mov_b32_e32 v1, 16 594; GFX10-NEXT: s_waitcnt vmcnt(0) 595; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 596; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 597; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 598; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v2 599; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 600; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 601; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 602; GFX10-NEXT: s_setpc_b64 s[30:31] 603 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 604 %element = extractelement <4 x i8> %vector, i32 1 605 ret i8 %element 606} 607 608define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) { 609; GFX9-LABEL: extractelement_vgpr_v4i8_idx2: 610; GFX9: ; %bb.0: 611; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 612; GFX9-NEXT: global_load_dword v0, v[0:1], off 613; GFX9-NEXT: v_mov_b32_e32 v2, 8 614; GFX9-NEXT: s_mov_b32 s4, 16 615; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 616; GFX9-NEXT: s_waitcnt vmcnt(0) 617; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 618; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 619; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 620; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 621; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 622; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 623; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 624; GFX9-NEXT: s_setpc_b64 s[30:31] 625; 626; GFX8-LABEL: extractelement_vgpr_v4i8_idx2: 627; GFX8: ; %bb.0: 628; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 629; GFX8-NEXT: flat_load_dword v0, v[0:1] 630; GFX8-NEXT: v_mov_b32_e32 v1, 8 631; GFX8-NEXT: v_mov_b32_e32 v2, 16 632; GFX8-NEXT: s_waitcnt vmcnt(0) 633; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 634; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 635; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 636; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 637; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 638; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 639; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 640; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 641; GFX8-NEXT: s_setpc_b64 s[30:31] 642; 643; GFX7-LABEL: extractelement_vgpr_v4i8_idx2: 644; GFX7: ; %bb.0: 645; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 646; GFX7-NEXT: s_mov_b32 s6, 0 647; GFX7-NEXT: s_mov_b32 s7, 0xf000 648; GFX7-NEXT: s_mov_b64 s[4:5], 0 649; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 650; GFX7-NEXT: s_waitcnt vmcnt(0) 651; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 652; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 653; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 654; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 655; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 656; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 657; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 658; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 659; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 660; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 661; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 662; GFX7-NEXT: s_setpc_b64 s[30:31] 663; 664; GFX10-LABEL: extractelement_vgpr_v4i8_idx2: 665; GFX10: ; %bb.0: 666; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 667; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 668; GFX10-NEXT: global_load_dword v0, v[0:1], off 669; GFX10-NEXT: v_mov_b32_e32 v1, 8 670; GFX10-NEXT: s_mov_b32 s4, 16 671; GFX10-NEXT: s_waitcnt vmcnt(0) 672; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 673; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 674; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 675; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 676; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 677; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 678; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 679; GFX10-NEXT: s_setpc_b64 s[30:31] 680 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 681 %element = extractelement <4 x i8> %vector, i32 2 682 ret i8 %element 683} 684 685define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) { 686; GFX9-LABEL: extractelement_vgpr_v4i8_idx3: 687; GFX9: ; %bb.0: 688; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 689; GFX9-NEXT: global_load_dword v0, v[0:1], off 690; GFX9-NEXT: v_mov_b32_e32 v2, 8 691; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 692; GFX9-NEXT: v_mov_b32_e32 v3, 16 693; GFX9-NEXT: s_waitcnt vmcnt(0) 694; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 695; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 696; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 697; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 698; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 699; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 700; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 701; GFX9-NEXT: s_setpc_b64 s[30:31] 702; 703; GFX8-LABEL: extractelement_vgpr_v4i8_idx3: 704; GFX8: ; %bb.0: 705; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 706; GFX8-NEXT: flat_load_dword v0, v[0:1] 707; GFX8-NEXT: v_mov_b32_e32 v1, 8 708; GFX8-NEXT: v_mov_b32_e32 v2, 16 709; GFX8-NEXT: s_waitcnt vmcnt(0) 710; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 711; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 712; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 713; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 714; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 715; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 716; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 717; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 718; GFX8-NEXT: s_setpc_b64 s[30:31] 719; 720; GFX7-LABEL: extractelement_vgpr_v4i8_idx3: 721; GFX7: ; %bb.0: 722; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 723; GFX7-NEXT: s_mov_b32 s6, 0 724; GFX7-NEXT: s_mov_b32 s7, 0xf000 725; GFX7-NEXT: s_mov_b64 s[4:5], 0 726; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 727; GFX7-NEXT: s_waitcnt vmcnt(0) 728; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 729; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 730; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 731; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 732; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 733; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 734; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 735; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 736; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 737; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 738; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 739; GFX7-NEXT: s_setpc_b64 s[30:31] 740; 741; GFX10-LABEL: extractelement_vgpr_v4i8_idx3: 742; GFX10: ; %bb.0: 743; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 744; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 745; GFX10-NEXT: global_load_dword v0, v[0:1], off 746; GFX10-NEXT: v_mov_b32_e32 v1, 8 747; GFX10-NEXT: v_mov_b32_e32 v2, 16 748; GFX10-NEXT: s_waitcnt vmcnt(0) 749; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 750; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 751; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 752; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 753; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 754; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 755; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 756; GFX10-NEXT: s_setpc_b64 s[30:31] 757 %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 758 %element = extractelement <4 x i8> %vector, i32 3 759 ret i8 %element 760} 761 762define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { 763; GCN-LABEL: extractelement_sgpr_v8i8_sgpr_idx: 764; GCN: ; %bb.0: 765; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 766; GCN-NEXT: s_mov_b32 s7, 0x80008 767; GCN-NEXT: s_movk_i32 s5, 0xff 768; GCN-NEXT: s_waitcnt lgkmcnt(0) 769; GCN-NEXT: s_bfe_u32 s8, s0, s7 770; GCN-NEXT: s_and_b32 s6, s0, s5 771; GCN-NEXT: s_lshl_b32 s8, s8, 8 772; GCN-NEXT: s_or_b32 s6, s6, s8 773; GCN-NEXT: s_mov_b32 s8, 0x80010 774; GCN-NEXT: s_lshr_b32 s2, s0, 24 775; GCN-NEXT: s_bfe_u32 s0, s0, s8 776; GCN-NEXT: s_lshl_b32 s0, s0, 16 777; GCN-NEXT: s_or_b32 s0, s6, s0 778; GCN-NEXT: s_lshl_b32 s2, s2, 24 779; GCN-NEXT: s_or_b32 s0, s0, s2 780; GCN-NEXT: s_and_b32 s2, s1, s5 781; GCN-NEXT: s_bfe_u32 s5, s1, s7 782; GCN-NEXT: s_lshr_b32 s3, s1, 24 783; GCN-NEXT: s_lshl_b32 s5, s5, 8 784; GCN-NEXT: s_bfe_u32 s1, s1, s8 785; GCN-NEXT: s_or_b32 s2, s2, s5 786; GCN-NEXT: s_lshl_b32 s1, s1, 16 787; GCN-NEXT: s_or_b32 s1, s2, s1 788; GCN-NEXT: s_lshl_b32 s2, s3, 24 789; GCN-NEXT: s_or_b32 s1, s1, s2 790; GCN-NEXT: s_lshr_b32 s2, s4, 2 791; GCN-NEXT: s_cmp_eq_u32 s2, 1 792; GCN-NEXT: s_cselect_b32 s0, s1, s0 793; GCN-NEXT: s_and_b32 s1, s4, 3 794; GCN-NEXT: s_lshl_b32 s1, s1, 3 795; GCN-NEXT: s_lshr_b32 s0, s0, s1 796; GCN-NEXT: ; return to shader part epilog 797; 798; GFX10-LABEL: extractelement_sgpr_v8i8_sgpr_idx: 799; GFX10: ; %bb.0: 800; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 801; GFX10-NEXT: s_mov_b32 s3, 0x80008 802; GFX10-NEXT: s_movk_i32 s2, 0xff 803; GFX10-NEXT: s_mov_b32 s5, 0x80010 804; GFX10-NEXT: s_lshr_b32 s6, s4, 2 805; GFX10-NEXT: s_waitcnt lgkmcnt(0) 806; GFX10-NEXT: s_bfe_u32 s10, s0, s3 807; GFX10-NEXT: s_bfe_u32 s3, s1, s3 808; GFX10-NEXT: s_lshr_b32 s7, s0, 24 809; GFX10-NEXT: s_lshr_b32 s8, s1, 24 810; GFX10-NEXT: s_and_b32 s9, s0, s2 811; GFX10-NEXT: s_bfe_u32 s0, s0, s5 812; GFX10-NEXT: s_and_b32 s2, s1, s2 813; GFX10-NEXT: s_bfe_u32 s1, s1, s5 814; GFX10-NEXT: s_lshl_b32 s5, s10, 8 815; GFX10-NEXT: s_lshl_b32 s3, s3, 8 816; GFX10-NEXT: s_lshl_b32 s0, s0, 16 817; GFX10-NEXT: s_lshl_b32 s1, s1, 16 818; GFX10-NEXT: s_or_b32 s5, s9, s5 819; GFX10-NEXT: s_or_b32 s2, s2, s3 820; GFX10-NEXT: s_lshl_b32 s7, s7, 24 821; GFX10-NEXT: s_lshl_b32 s8, s8, 24 822; GFX10-NEXT: s_or_b32 s0, s5, s0 823; GFX10-NEXT: s_or_b32 s1, s2, s1 824; GFX10-NEXT: s_or_b32 s0, s0, s7 825; GFX10-NEXT: s_or_b32 s1, s1, s8 826; GFX10-NEXT: s_cmp_eq_u32 s6, 1 827; GFX10-NEXT: s_cselect_b32 s0, s1, s0 828; GFX10-NEXT: s_and_b32 s1, s4, 3 829; GFX10-NEXT: s_lshl_b32 s1, s1, 3 830; GFX10-NEXT: s_lshr_b32 s0, s0, s1 831; GFX10-NEXT: ; return to shader part epilog 832 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 833 %element = extractelement <8 x i8> %vector, i32 %idx 834 ret i8 %element 835} 836 837define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 inreg %idx) { 838; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 839; GFX9: ; %bb.0: 840; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 841; GFX9-NEXT: s_mov_b32 s0, 8 842; GFX9-NEXT: s_mov_b32 s1, 16 843; GFX9-NEXT: s_movk_i32 s3, 0xff 844; GFX9-NEXT: s_lshr_b32 s4, s2, 2 845; GFX9-NEXT: s_and_b32 s2, s2, 3 846; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 847; GFX9-NEXT: s_waitcnt vmcnt(0) 848; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 849; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 850; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 851; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 852; GFX9-NEXT: v_lshlrev_b32_sdwa v5, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 853; GFX9-NEXT: v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 854; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v4 855; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 856; GFX9-NEXT: v_and_or_b32 v1, v1, s3, v6 857; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 858; GFX9-NEXT: v_or3_b32 v0, v0, v5, v2 859; GFX9-NEXT: v_or3_b32 v1, v1, v7, v3 860; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 861; GFX9-NEXT: s_lshl_b32 s0, s2, 3 862; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 863; GFX9-NEXT: v_readfirstlane_b32 s0, v0 864; GFX9-NEXT: ; return to shader part epilog 865; 866; GFX8-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 867; GFX8: ; %bb.0: 868; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 869; GFX8-NEXT: v_mov_b32_e32 v2, 8 870; GFX8-NEXT: v_mov_b32_e32 v3, 16 871; GFX8-NEXT: s_lshr_b32 s0, s2, 2 872; GFX8-NEXT: s_and_b32 s1, s2, 3 873; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 874; GFX8-NEXT: s_lshl_b32 s0, s1, 3 875; GFX8-NEXT: s_waitcnt vmcnt(0) 876; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 877; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 878; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 879; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v1 880; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 881; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 882; GFX8-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 883; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 884; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 885; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v5 886; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 887; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 888; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 889; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 890; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 891; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 892; GFX8-NEXT: v_readfirstlane_b32 s0, v0 893; GFX8-NEXT: ; return to shader part epilog 894; 895; GFX7-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 896; GFX7: ; %bb.0: 897; GFX7-NEXT: s_mov_b32 s6, 0 898; GFX7-NEXT: s_mov_b32 s7, 0xf000 899; GFX7-NEXT: s_mov_b64 s[4:5], 0 900; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 901; GFX7-NEXT: s_movk_i32 s0, 0xff 902; GFX7-NEXT: s_lshr_b32 s1, s2, 2 903; GFX7-NEXT: s_and_b32 s2, s2, 3 904; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 1 905; GFX7-NEXT: s_waitcnt vmcnt(0) 906; GFX7-NEXT: v_bfe_u32 v5, v0, 8, 8 907; GFX7-NEXT: v_bfe_u32 v7, v1, 8, 8 908; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 909; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 910; GFX7-NEXT: v_and_b32_e32 v4, s0, v0 911; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 912; GFX7-NEXT: v_and_b32_e32 v6, s0, v1 913; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 914; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 915; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 916; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 917; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 918; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 919; GFX7-NEXT: v_or_b32_e32 v5, v6, v7 920; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 921; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 922; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 923; GFX7-NEXT: v_or_b32_e32 v1, v5, v1 924; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 925; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 926; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 927; GFX7-NEXT: s_lshl_b32 s0, s2, 3 928; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 929; GFX7-NEXT: v_readfirstlane_b32 s0, v0 930; GFX7-NEXT: ; return to shader part epilog 931; 932; GFX10-LABEL: extractelement_vgpr_v8i8_sgpr_idx: 933; GFX10: ; %bb.0: 934; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 935; GFX10-NEXT: s_mov_b32 s0, 8 936; GFX10-NEXT: s_mov_b32 s1, 16 937; GFX10-NEXT: s_movk_i32 s3, 0xff 938; GFX10-NEXT: s_waitcnt vmcnt(0) 939; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 940; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 941; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v1 942; GFX10-NEXT: v_lshlrev_b32_sdwa v5, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 943; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 944; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 945; GFX10-NEXT: v_and_or_b32 v0, v0, s3, v3 946; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 947; GFX10-NEXT: v_and_or_b32 v1, v1, s3, v5 948; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v4 949; GFX10-NEXT: s_lshr_b32 s0, s2, 2 950; GFX10-NEXT: v_or3_b32 v0, v0, v6, v2 951; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1 952; GFX10-NEXT: v_or3_b32 v1, v1, v7, v3 953; GFX10-NEXT: s_and_b32 s0, s2, 3 954; GFX10-NEXT: s_lshl_b32 s0, s0, 3 955; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 956; GFX10-NEXT: v_lshrrev_b32_e32 v0, s0, v0 957; GFX10-NEXT: v_readfirstlane_b32 s0, v0 958; GFX10-NEXT: ; return to shader part epilog 959 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 960 %element = extractelement <8 x i8> %vector, i32 %idx 961 ret i8 %element 962} 963 964define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %idx) { 965; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 966; GFX9: ; %bb.0: 967; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 968; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 969; GFX9-NEXT: s_mov_b32 s4, 8 970; GFX9-NEXT: s_mov_b32 s5, 16 971; GFX9-NEXT: s_movk_i32 s6, 0xff 972; GFX9-NEXT: v_lshrrev_b32_e32 v3, 2, v2 973; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 974; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 975; GFX9-NEXT: s_waitcnt vmcnt(0) 976; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 977; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 978; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 979; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 980; GFX9-NEXT: v_lshlrev_b32_sdwa v7, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 981; GFX9-NEXT: v_lshlrev_b32_sdwa v9, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 982; GFX9-NEXT: v_and_or_b32 v0, v0, s6, v6 983; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 984; GFX9-NEXT: v_and_or_b32 v1, v1, s6, v8 985; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v5 986; GFX9-NEXT: v_or3_b32 v0, v0, v7, v4 987; GFX9-NEXT: v_or3_b32 v1, v1, v9, v5 988; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 989; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2 990; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 991; GFX9-NEXT: s_setpc_b64 s[30:31] 992; 993; GFX8-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 994; GFX8: ; %bb.0: 995; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 996; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 997; GFX8-NEXT: v_mov_b32_e32 v3, 8 998; GFX8-NEXT: v_mov_b32_e32 v4, 16 999; GFX8-NEXT: v_lshrrev_b32_e32 v5, 2, v2 1000; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 1001; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5 1002; GFX8-NEXT: s_waitcnt vmcnt(0) 1003; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1004; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1005; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v0 1006; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1 1007; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1008; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1009; GFX8-NEXT: v_or_b32_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1010; GFX8-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1011; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v6 1012; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v7 1013; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 1014; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 1015; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 1016; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 1017; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1018; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 1019; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 1020; GFX8-NEXT: s_setpc_b64 s[30:31] 1021; 1022; GFX7-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 1023; GFX7: ; %bb.0: 1024; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1025; GFX7-NEXT: s_mov_b32 s6, 0 1026; GFX7-NEXT: s_mov_b32 s7, 0xf000 1027; GFX7-NEXT: s_mov_b64 s[4:5], 0 1028; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1029; GFX7-NEXT: s_movk_i32 s4, 0xff 1030; GFX7-NEXT: v_lshrrev_b32_e32 v3, 2, v2 1031; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 1032; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 1033; GFX7-NEXT: s_waitcnt vmcnt(0) 1034; GFX7-NEXT: v_bfe_u32 v7, v0, 8, 8 1035; GFX7-NEXT: v_bfe_u32 v9, v1, 8, 8 1036; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 1037; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v1 1038; GFX7-NEXT: v_and_b32_e32 v6, s4, v0 1039; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 1040; GFX7-NEXT: v_and_b32_e32 v8, s4, v1 1041; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 1042; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 1043; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 1044; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1045; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1046; GFX7-NEXT: v_or_b32_e32 v6, v6, v7 1047; GFX7-NEXT: v_or_b32_e32 v7, v8, v9 1048; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 1049; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 1050; GFX7-NEXT: v_or_b32_e32 v0, v6, v0 1051; GFX7-NEXT: v_or_b32_e32 v1, v7, v1 1052; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 1053; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 1054; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1055; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v2 1056; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 1057; GFX7-NEXT: s_setpc_b64 s[30:31] 1058; 1059; GFX10-LABEL: extractelement_vgpr_v8i8_vgpr_idx: 1060; GFX10: ; %bb.0: 1061; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1062; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1063; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1064; GFX10-NEXT: s_mov_b32 s4, 8 1065; GFX10-NEXT: s_mov_b32 s5, 16 1066; GFX10-NEXT: s_movk_i32 s6, 0xff 1067; GFX10-NEXT: s_waitcnt vmcnt(0) 1068; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1069; GFX10-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1070; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 1071; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1072; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1073; GFX10-NEXT: v_lshlrev_b32_sdwa v8, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1074; GFX10-NEXT: v_and_or_b32 v0, v0, s6, v4 1075; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1076; GFX10-NEXT: v_and_or_b32 v1, v1, s6, v6 1077; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v5 1078; GFX10-NEXT: v_lshrrev_b32_e32 v5, 2, v2 1079; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 1080; GFX10-NEXT: v_or3_b32 v0, v0, v7, v3 1081; GFX10-NEXT: v_or3_b32 v1, v1, v8, v4 1082; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5 1083; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1084; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v2 1085; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0 1086; GFX10-NEXT: s_setpc_b64 s[30:31] 1087 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1088 %element = extractelement <8 x i8> %vector, i32 %idx 1089 ret i8 %element 1090} 1091 1092define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 %idx) { 1093; GCN-LABEL: extractelement_sgpr_v8i8_vgpr_idx: 1094; GCN: ; %bb.0: 1095; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1096; GCN-NEXT: s_mov_b32 s6, 0x80008 1097; GCN-NEXT: s_movk_i32 s4, 0xff 1098; GCN-NEXT: v_lshrrev_b32_e32 v1, 2, v0 1099; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 1100; GCN-NEXT: s_waitcnt lgkmcnt(0) 1101; GCN-NEXT: s_bfe_u32 s7, s0, s6 1102; GCN-NEXT: s_and_b32 s5, s0, s4 1103; GCN-NEXT: s_lshl_b32 s7, s7, 8 1104; GCN-NEXT: s_or_b32 s5, s5, s7 1105; GCN-NEXT: s_mov_b32 s7, 0x80010 1106; GCN-NEXT: s_lshr_b32 s2, s0, 24 1107; GCN-NEXT: s_bfe_u32 s0, s0, s7 1108; GCN-NEXT: s_lshl_b32 s0, s0, 16 1109; GCN-NEXT: s_or_b32 s0, s5, s0 1110; GCN-NEXT: s_lshl_b32 s2, s2, 24 1111; GCN-NEXT: s_or_b32 s0, s0, s2 1112; GCN-NEXT: s_and_b32 s2, s1, s4 1113; GCN-NEXT: s_bfe_u32 s4, s1, s6 1114; GCN-NEXT: s_lshr_b32 s3, s1, 24 1115; GCN-NEXT: s_lshl_b32 s4, s4, 8 1116; GCN-NEXT: s_bfe_u32 s1, s1, s7 1117; GCN-NEXT: s_or_b32 s2, s2, s4 1118; GCN-NEXT: s_lshl_b32 s1, s1, 16 1119; GCN-NEXT: s_or_b32 s1, s2, s1 1120; GCN-NEXT: s_lshl_b32 s2, s3, 24 1121; GCN-NEXT: s_or_b32 s1, s1, s2 1122; GCN-NEXT: v_mov_b32_e32 v2, s0 1123; GCN-NEXT: v_mov_b32_e32 v3, s1 1124; GCN-NEXT: v_and_b32_e32 v0, 3, v0 1125; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 1126; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 1127; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1 1128; GCN-NEXT: v_readfirstlane_b32 s0, v0 1129; GCN-NEXT: ; return to shader part epilog 1130; 1131; GFX10-LABEL: extractelement_sgpr_v8i8_vgpr_idx: 1132; GFX10: ; %bb.0: 1133; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1134; GFX10-NEXT: s_mov_b32 s3, 0x80008 1135; GFX10-NEXT: s_movk_i32 s2, 0xff 1136; GFX10-NEXT: s_mov_b32 s4, 0x80010 1137; GFX10-NEXT: v_lshrrev_b32_e32 v1, 2, v0 1138; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 1139; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 1140; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 1141; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1142; GFX10-NEXT: s_bfe_u32 s8, s0, s3 1143; GFX10-NEXT: s_bfe_u32 s3, s1, s3 1144; GFX10-NEXT: s_lshr_b32 s6, s1, 24 1145; GFX10-NEXT: s_and_b32 s7, s0, s2 1146; GFX10-NEXT: s_and_b32 s2, s1, s2 1147; GFX10-NEXT: s_bfe_u32 s1, s1, s4 1148; GFX10-NEXT: s_lshl_b32 s3, s3, 8 1149; GFX10-NEXT: s_lshl_b32 s1, s1, 16 1150; GFX10-NEXT: s_or_b32 s2, s2, s3 1151; GFX10-NEXT: s_lshl_b32 s3, s6, 24 1152; GFX10-NEXT: s_or_b32 s1, s2, s1 1153; GFX10-NEXT: s_lshr_b32 s5, s0, 24 1154; GFX10-NEXT: s_bfe_u32 s0, s0, s4 1155; GFX10-NEXT: s_lshl_b32 s4, s8, 8 1156; GFX10-NEXT: s_or_b32 s1, s1, s3 1157; GFX10-NEXT: s_lshl_b32 s0, s0, 16 1158; GFX10-NEXT: s_or_b32 s3, s7, s4 1159; GFX10-NEXT: v_mov_b32_e32 v2, s1 1160; GFX10-NEXT: s_lshl_b32 s2, s5, 24 1161; GFX10-NEXT: s_or_b32 s0, s3, s0 1162; GFX10-NEXT: s_or_b32 s0, s0, s2 1163; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v2, vcc_lo 1164; GFX10-NEXT: v_lshrrev_b32_e32 v0, v0, v1 1165; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1166; GFX10-NEXT: ; return to shader part epilog 1167 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1168 %element = extractelement <8 x i8> %vector, i32 %idx 1169 ret i8 %element 1170} 1171 1172define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(<8 x i8> addrspace(4)* inreg %ptr) { 1173; GCN-LABEL: extractelement_sgpr_v8i8_idx0: 1174; GCN: ; %bb.0: 1175; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1176; GCN-NEXT: s_waitcnt lgkmcnt(0) 1177; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 1178; GCN-NEXT: s_lshr_b32 s1, s0, 24 1179; GCN-NEXT: s_and_b32 s2, s0, 0xff 1180; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 1181; GCN-NEXT: s_lshl_b32 s3, s3, 8 1182; GCN-NEXT: s_or_b32 s2, s2, s3 1183; GCN-NEXT: s_lshl_b32 s0, s0, 16 1184; GCN-NEXT: s_or_b32 s0, s2, s0 1185; GCN-NEXT: s_lshl_b32 s1, s1, 24 1186; GCN-NEXT: s_or_b32 s0, s0, s1 1187; GCN-NEXT: ; return to shader part epilog 1188; 1189; GFX10-LABEL: extractelement_sgpr_v8i8_idx0: 1190; GFX10: ; %bb.0: 1191; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1192; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1193; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 1194; GFX10-NEXT: s_and_b32 s1, s0, 0xff 1195; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 1196; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1197; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1198; GFX10-NEXT: s_or_b32 s1, s1, s2 1199; GFX10-NEXT: s_lshr_b32 s0, s0, 24 1200; GFX10-NEXT: s_or_b32 s1, s1, s3 1201; GFX10-NEXT: s_lshl_b32 s0, s0, 24 1202; GFX10-NEXT: s_or_b32 s0, s1, s0 1203; GFX10-NEXT: ; return to shader part epilog 1204 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1205 %element = extractelement <8 x i8> %vector, i32 0 1206 ret i8 %element 1207} 1208 1209define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(<8 x i8> addrspace(4)* inreg %ptr) { 1210; GCN-LABEL: extractelement_sgpr_v8i8_idx1: 1211; GCN: ; %bb.0: 1212; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1213; GCN-NEXT: s_waitcnt lgkmcnt(0) 1214; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 1215; GCN-NEXT: s_lshr_b32 s1, s0, 24 1216; GCN-NEXT: s_and_b32 s2, s0, 0xff 1217; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 1218; GCN-NEXT: s_lshl_b32 s3, s3, 8 1219; GCN-NEXT: s_or_b32 s2, s2, s3 1220; GCN-NEXT: s_lshl_b32 s0, s0, 16 1221; GCN-NEXT: s_or_b32 s0, s2, s0 1222; GCN-NEXT: s_lshl_b32 s1, s1, 24 1223; GCN-NEXT: s_or_b32 s0, s0, s1 1224; GCN-NEXT: s_lshr_b32 s0, s0, 8 1225; GCN-NEXT: ; return to shader part epilog 1226; 1227; GFX10-LABEL: extractelement_sgpr_v8i8_idx1: 1228; GFX10: ; %bb.0: 1229; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1230; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1231; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 1232; GFX10-NEXT: s_and_b32 s1, s0, 0xff 1233; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 1234; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1235; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1236; GFX10-NEXT: s_or_b32 s1, s1, s2 1237; GFX10-NEXT: s_lshr_b32 s0, s0, 24 1238; GFX10-NEXT: s_or_b32 s1, s1, s3 1239; GFX10-NEXT: s_lshl_b32 s0, s0, 24 1240; GFX10-NEXT: s_or_b32 s0, s1, s0 1241; GFX10-NEXT: s_lshr_b32 s0, s0, 8 1242; GFX10-NEXT: ; return to shader part epilog 1243 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1244 %element = extractelement <8 x i8> %vector, i32 1 1245 ret i8 %element 1246} 1247 1248define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(<8 x i8> addrspace(4)* inreg %ptr) { 1249; GCN-LABEL: extractelement_sgpr_v8i8_idx2: 1250; GCN: ; %bb.0: 1251; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1252; GCN-NEXT: s_waitcnt lgkmcnt(0) 1253; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 1254; GCN-NEXT: s_lshr_b32 s1, s0, 24 1255; GCN-NEXT: s_and_b32 s2, s0, 0xff 1256; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 1257; GCN-NEXT: s_lshl_b32 s3, s3, 8 1258; GCN-NEXT: s_or_b32 s2, s2, s3 1259; GCN-NEXT: s_lshl_b32 s0, s0, 16 1260; GCN-NEXT: s_or_b32 s0, s2, s0 1261; GCN-NEXT: s_lshl_b32 s1, s1, 24 1262; GCN-NEXT: s_or_b32 s0, s0, s1 1263; GCN-NEXT: s_lshr_b32 s0, s0, 16 1264; GCN-NEXT: ; return to shader part epilog 1265; 1266; GFX10-LABEL: extractelement_sgpr_v8i8_idx2: 1267; GFX10: ; %bb.0: 1268; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1269; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1270; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 1271; GFX10-NEXT: s_and_b32 s1, s0, 0xff 1272; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 1273; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1274; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1275; GFX10-NEXT: s_or_b32 s1, s1, s2 1276; GFX10-NEXT: s_lshr_b32 s0, s0, 24 1277; GFX10-NEXT: s_or_b32 s1, s1, s3 1278; GFX10-NEXT: s_lshl_b32 s0, s0, 24 1279; GFX10-NEXT: s_or_b32 s0, s1, s0 1280; GFX10-NEXT: s_lshr_b32 s0, s0, 16 1281; GFX10-NEXT: ; return to shader part epilog 1282 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1283 %element = extractelement <8 x i8> %vector, i32 2 1284 ret i8 %element 1285} 1286 1287define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(<8 x i8> addrspace(4)* inreg %ptr) { 1288; GCN-LABEL: extractelement_sgpr_v8i8_idx3: 1289; GCN: ; %bb.0: 1290; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1291; GCN-NEXT: s_waitcnt lgkmcnt(0) 1292; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 1293; GCN-NEXT: s_lshr_b32 s1, s0, 24 1294; GCN-NEXT: s_and_b32 s2, s0, 0xff 1295; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 1296; GCN-NEXT: s_lshl_b32 s3, s3, 8 1297; GCN-NEXT: s_or_b32 s2, s2, s3 1298; GCN-NEXT: s_lshl_b32 s0, s0, 16 1299; GCN-NEXT: s_or_b32 s0, s2, s0 1300; GCN-NEXT: s_lshl_b32 s1, s1, 24 1301; GCN-NEXT: s_or_b32 s0, s0, s1 1302; GCN-NEXT: s_lshr_b32 s0, s0, 24 1303; GCN-NEXT: ; return to shader part epilog 1304; 1305; GFX10-LABEL: extractelement_sgpr_v8i8_idx3: 1306; GFX10: ; %bb.0: 1307; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1308; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1309; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 1310; GFX10-NEXT: s_and_b32 s1, s0, 0xff 1311; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 1312; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1313; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1314; GFX10-NEXT: s_or_b32 s1, s1, s2 1315; GFX10-NEXT: s_lshr_b32 s0, s0, 24 1316; GFX10-NEXT: s_or_b32 s1, s1, s3 1317; GFX10-NEXT: s_lshl_b32 s0, s0, 24 1318; GFX10-NEXT: s_or_b32 s0, s1, s0 1319; GFX10-NEXT: s_lshr_b32 s0, s0, 24 1320; GFX10-NEXT: ; return to shader part epilog 1321 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1322 %element = extractelement <8 x i8> %vector, i32 3 1323 ret i8 %element 1324} 1325 1326define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(<8 x i8> addrspace(4)* inreg %ptr) { 1327; GCN-LABEL: extractelement_sgpr_v8i8_idx4: 1328; GCN: ; %bb.0: 1329; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1330; GCN-NEXT: s_waitcnt lgkmcnt(0) 1331; GCN-NEXT: s_bfe_u32 s3, s1, 0x80008 1332; GCN-NEXT: s_lshr_b32 s0, s1, 24 1333; GCN-NEXT: s_and_b32 s2, s1, 0xff 1334; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 1335; GCN-NEXT: s_lshl_b32 s3, s3, 8 1336; GCN-NEXT: s_or_b32 s2, s2, s3 1337; GCN-NEXT: s_lshl_b32 s1, s1, 16 1338; GCN-NEXT: s_or_b32 s1, s2, s1 1339; GCN-NEXT: s_lshl_b32 s0, s0, 24 1340; GCN-NEXT: s_or_b32 s0, s1, s0 1341; GCN-NEXT: ; return to shader part epilog 1342; 1343; GFX10-LABEL: extractelement_sgpr_v8i8_idx4: 1344; GFX10: ; %bb.0: 1345; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1346; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1347; GFX10-NEXT: s_bfe_u32 s2, s1, 0x80008 1348; GFX10-NEXT: s_and_b32 s0, s1, 0xff 1349; GFX10-NEXT: s_bfe_u32 s3, s1, 0x80010 1350; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1351; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1352; GFX10-NEXT: s_or_b32 s0, s0, s2 1353; GFX10-NEXT: s_lshr_b32 s1, s1, 24 1354; GFX10-NEXT: s_or_b32 s0, s0, s3 1355; GFX10-NEXT: s_lshl_b32 s1, s1, 24 1356; GFX10-NEXT: s_or_b32 s0, s0, s1 1357; GFX10-NEXT: ; return to shader part epilog 1358 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1359 %element = extractelement <8 x i8> %vector, i32 4 1360 ret i8 %element 1361} 1362 1363define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(<8 x i8> addrspace(4)* inreg %ptr) { 1364; GCN-LABEL: extractelement_sgpr_v8i8_idx5: 1365; GCN: ; %bb.0: 1366; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1367; GCN-NEXT: s_waitcnt lgkmcnt(0) 1368; GCN-NEXT: s_bfe_u32 s3, s1, 0x80008 1369; GCN-NEXT: s_lshr_b32 s0, s1, 24 1370; GCN-NEXT: s_and_b32 s2, s1, 0xff 1371; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 1372; GCN-NEXT: s_lshl_b32 s3, s3, 8 1373; GCN-NEXT: s_or_b32 s2, s2, s3 1374; GCN-NEXT: s_lshl_b32 s1, s1, 16 1375; GCN-NEXT: s_or_b32 s1, s2, s1 1376; GCN-NEXT: s_lshl_b32 s0, s0, 24 1377; GCN-NEXT: s_or_b32 s0, s1, s0 1378; GCN-NEXT: s_lshr_b32 s0, s0, 8 1379; GCN-NEXT: ; return to shader part epilog 1380; 1381; GFX10-LABEL: extractelement_sgpr_v8i8_idx5: 1382; GFX10: ; %bb.0: 1383; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1384; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1385; GFX10-NEXT: s_bfe_u32 s2, s1, 0x80008 1386; GFX10-NEXT: s_and_b32 s0, s1, 0xff 1387; GFX10-NEXT: s_bfe_u32 s3, s1, 0x80010 1388; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1389; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1390; GFX10-NEXT: s_or_b32 s0, s0, s2 1391; GFX10-NEXT: s_lshr_b32 s1, s1, 24 1392; GFX10-NEXT: s_or_b32 s0, s0, s3 1393; GFX10-NEXT: s_lshl_b32 s1, s1, 24 1394; GFX10-NEXT: s_or_b32 s0, s0, s1 1395; GFX10-NEXT: s_lshr_b32 s0, s0, 8 1396; GFX10-NEXT: ; return to shader part epilog 1397 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1398 %element = extractelement <8 x i8> %vector, i32 5 1399 ret i8 %element 1400} 1401 1402define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(<8 x i8> addrspace(4)* inreg %ptr) { 1403; GCN-LABEL: extractelement_sgpr_v8i8_idx6: 1404; GCN: ; %bb.0: 1405; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1406; GCN-NEXT: s_waitcnt lgkmcnt(0) 1407; GCN-NEXT: s_bfe_u32 s3, s1, 0x80008 1408; GCN-NEXT: s_lshr_b32 s0, s1, 24 1409; GCN-NEXT: s_and_b32 s2, s1, 0xff 1410; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 1411; GCN-NEXT: s_lshl_b32 s3, s3, 8 1412; GCN-NEXT: s_or_b32 s2, s2, s3 1413; GCN-NEXT: s_lshl_b32 s1, s1, 16 1414; GCN-NEXT: s_or_b32 s1, s2, s1 1415; GCN-NEXT: s_lshl_b32 s0, s0, 24 1416; GCN-NEXT: s_or_b32 s0, s1, s0 1417; GCN-NEXT: s_lshr_b32 s0, s0, 16 1418; GCN-NEXT: ; return to shader part epilog 1419; 1420; GFX10-LABEL: extractelement_sgpr_v8i8_idx6: 1421; GFX10: ; %bb.0: 1422; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1423; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1424; GFX10-NEXT: s_bfe_u32 s2, s1, 0x80008 1425; GFX10-NEXT: s_and_b32 s0, s1, 0xff 1426; GFX10-NEXT: s_bfe_u32 s3, s1, 0x80010 1427; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1428; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1429; GFX10-NEXT: s_or_b32 s0, s0, s2 1430; GFX10-NEXT: s_lshr_b32 s1, s1, 24 1431; GFX10-NEXT: s_or_b32 s0, s0, s3 1432; GFX10-NEXT: s_lshl_b32 s1, s1, 24 1433; GFX10-NEXT: s_or_b32 s0, s0, s1 1434; GFX10-NEXT: s_lshr_b32 s0, s0, 16 1435; GFX10-NEXT: ; return to shader part epilog 1436 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1437 %element = extractelement <8 x i8> %vector, i32 6 1438 ret i8 %element 1439} 1440 1441define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(<8 x i8> addrspace(4)* inreg %ptr) { 1442; GCN-LABEL: extractelement_sgpr_v8i8_idx7: 1443; GCN: ; %bb.0: 1444; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1445; GCN-NEXT: s_waitcnt lgkmcnt(0) 1446; GCN-NEXT: s_bfe_u32 s3, s1, 0x80008 1447; GCN-NEXT: s_lshr_b32 s0, s1, 24 1448; GCN-NEXT: s_and_b32 s2, s1, 0xff 1449; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 1450; GCN-NEXT: s_lshl_b32 s3, s3, 8 1451; GCN-NEXT: s_or_b32 s2, s2, s3 1452; GCN-NEXT: s_lshl_b32 s1, s1, 16 1453; GCN-NEXT: s_or_b32 s1, s2, s1 1454; GCN-NEXT: s_lshl_b32 s0, s0, 24 1455; GCN-NEXT: s_or_b32 s0, s1, s0 1456; GCN-NEXT: s_lshr_b32 s0, s0, 24 1457; GCN-NEXT: ; return to shader part epilog 1458; 1459; GFX10-LABEL: extractelement_sgpr_v8i8_idx7: 1460; GFX10: ; %bb.0: 1461; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1462; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1463; GFX10-NEXT: s_bfe_u32 s2, s1, 0x80008 1464; GFX10-NEXT: s_and_b32 s0, s1, 0xff 1465; GFX10-NEXT: s_bfe_u32 s3, s1, 0x80010 1466; GFX10-NEXT: s_lshl_b32 s2, s2, 8 1467; GFX10-NEXT: s_lshl_b32 s3, s3, 16 1468; GFX10-NEXT: s_or_b32 s0, s0, s2 1469; GFX10-NEXT: s_lshr_b32 s1, s1, 24 1470; GFX10-NEXT: s_or_b32 s0, s0, s3 1471; GFX10-NEXT: s_lshl_b32 s1, s1, 24 1472; GFX10-NEXT: s_or_b32 s0, s0, s1 1473; GFX10-NEXT: s_lshr_b32 s0, s0, 24 1474; GFX10-NEXT: ; return to shader part epilog 1475 %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr 1476 %element = extractelement <8 x i8> %vector, i32 7 1477 ret i8 %element 1478} 1479 1480define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) { 1481; GFX9-LABEL: extractelement_vgpr_v8i8_idx0: 1482; GFX9: ; %bb.0: 1483; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1484; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1485; GFX9-NEXT: v_mov_b32_e32 v2, 8 1486; GFX9-NEXT: s_waitcnt vmcnt(0) 1487; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 1488; GFX9-NEXT: v_mov_b32_e32 v3, 16 1489; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 1490; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1491; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1492; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 1493; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 1494; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 1495; GFX9-NEXT: s_setpc_b64 s[30:31] 1496; 1497; GFX8-LABEL: extractelement_vgpr_v8i8_idx0: 1498; GFX8: ; %bb.0: 1499; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1500; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1501; GFX8-NEXT: s_waitcnt vmcnt(0) 1502; GFX8-NEXT: v_mov_b32_e32 v1, 8 1503; GFX8-NEXT: v_mov_b32_e32 v2, 16 1504; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1505; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1506; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1507; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1508; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1509; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1510; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1511; GFX8-NEXT: s_setpc_b64 s[30:31] 1512; 1513; GFX7-LABEL: extractelement_vgpr_v8i8_idx0: 1514; GFX7: ; %bb.0: 1515; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1516; GFX7-NEXT: s_mov_b32 s6, 0 1517; GFX7-NEXT: s_mov_b32 s7, 0xf000 1518; GFX7-NEXT: s_mov_b64 s[4:5], 0 1519; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1520; GFX7-NEXT: s_waitcnt vmcnt(0) 1521; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 1522; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 1523; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 1524; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 1525; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1526; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1527; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 1528; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 1529; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 1530; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1531; GFX7-NEXT: s_setpc_b64 s[30:31] 1532; 1533; GFX10-LABEL: extractelement_vgpr_v8i8_idx0: 1534; GFX10: ; %bb.0: 1535; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1536; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1537; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1538; GFX10-NEXT: s_waitcnt vmcnt(0) 1539; GFX10-NEXT: v_mov_b32_e32 v1, 8 1540; GFX10-NEXT: v_mov_b32_e32 v2, 16 1541; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1542; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1543; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1544; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 1545; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1546; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 1547; GFX10-NEXT: s_setpc_b64 s[30:31] 1548 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1549 %element = extractelement <8 x i8> %vector, i32 0 1550 ret i8 %element 1551} 1552 1553define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) { 1554; GFX9-LABEL: extractelement_vgpr_v8i8_idx1: 1555; GFX9: ; %bb.0: 1556; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1557; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1558; GFX9-NEXT: s_mov_b32 s4, 8 1559; GFX9-NEXT: s_waitcnt vmcnt(0) 1560; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 1561; GFX9-NEXT: v_mov_b32_e32 v2, 16 1562; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1563; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1564; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1565; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v4 1566; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1567; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 1568; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1569; GFX9-NEXT: s_setpc_b64 s[30:31] 1570; 1571; GFX8-LABEL: extractelement_vgpr_v8i8_idx1: 1572; GFX8: ; %bb.0: 1573; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1574; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1575; GFX8-NEXT: s_waitcnt vmcnt(0) 1576; GFX8-NEXT: v_mov_b32_e32 v1, 8 1577; GFX8-NEXT: v_mov_b32_e32 v2, 16 1578; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1579; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1580; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1581; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1582; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1583; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1584; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1585; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1586; GFX8-NEXT: s_setpc_b64 s[30:31] 1587; 1588; GFX7-LABEL: extractelement_vgpr_v8i8_idx1: 1589; GFX7: ; %bb.0: 1590; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1591; GFX7-NEXT: s_mov_b32 s6, 0 1592; GFX7-NEXT: s_mov_b32 s7, 0xf000 1593; GFX7-NEXT: s_mov_b64 s[4:5], 0 1594; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1595; GFX7-NEXT: s_waitcnt vmcnt(0) 1596; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 1597; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 1598; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 1599; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 1600; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1601; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1602; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 1603; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 1604; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 1605; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1606; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1607; GFX7-NEXT: s_setpc_b64 s[30:31] 1608; 1609; GFX10-LABEL: extractelement_vgpr_v8i8_idx1: 1610; GFX10: ; %bb.0: 1611; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1612; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1613; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1614; GFX10-NEXT: s_mov_b32 s4, 8 1615; GFX10-NEXT: s_waitcnt vmcnt(0) 1616; GFX10-NEXT: v_mov_b32_e32 v1, 16 1617; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1618; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1619; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1620; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v2 1621; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 1622; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 1623; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1624; GFX10-NEXT: s_setpc_b64 s[30:31] 1625 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1626 %element = extractelement <8 x i8> %vector, i32 1 1627 ret i8 %element 1628} 1629 1630define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) { 1631; GFX9-LABEL: extractelement_vgpr_v8i8_idx2: 1632; GFX9: ; %bb.0: 1633; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1634; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1635; GFX9-NEXT: v_mov_b32_e32 v2, 8 1636; GFX9-NEXT: s_mov_b32 s4, 16 1637; GFX9-NEXT: s_waitcnt vmcnt(0) 1638; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 1639; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1640; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1641; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1642; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 1643; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1644; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 1645; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1646; GFX9-NEXT: s_setpc_b64 s[30:31] 1647; 1648; GFX8-LABEL: extractelement_vgpr_v8i8_idx2: 1649; GFX8: ; %bb.0: 1650; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1651; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1652; GFX8-NEXT: s_waitcnt vmcnt(0) 1653; GFX8-NEXT: v_mov_b32_e32 v1, 8 1654; GFX8-NEXT: v_mov_b32_e32 v2, 16 1655; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1656; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1657; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1658; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1659; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1660; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1661; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1662; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1663; GFX8-NEXT: s_setpc_b64 s[30:31] 1664; 1665; GFX7-LABEL: extractelement_vgpr_v8i8_idx2: 1666; GFX7: ; %bb.0: 1667; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1668; GFX7-NEXT: s_mov_b32 s6, 0 1669; GFX7-NEXT: s_mov_b32 s7, 0xf000 1670; GFX7-NEXT: s_mov_b64 s[4:5], 0 1671; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1672; GFX7-NEXT: s_waitcnt vmcnt(0) 1673; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 1674; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 1675; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 1676; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 1677; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1678; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1679; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 1680; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 1681; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 1682; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1683; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1684; GFX7-NEXT: s_setpc_b64 s[30:31] 1685; 1686; GFX10-LABEL: extractelement_vgpr_v8i8_idx2: 1687; GFX10: ; %bb.0: 1688; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1689; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1690; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1691; GFX10-NEXT: s_waitcnt vmcnt(0) 1692; GFX10-NEXT: v_mov_b32_e32 v1, 8 1693; GFX10-NEXT: s_mov_b32 s4, 16 1694; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1695; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 1696; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1697; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 1698; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 1699; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 1700; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1701; GFX10-NEXT: s_setpc_b64 s[30:31] 1702 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1703 %element = extractelement <8 x i8> %vector, i32 2 1704 ret i8 %element 1705} 1706 1707define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) { 1708; GFX9-LABEL: extractelement_vgpr_v8i8_idx3: 1709; GFX9: ; %bb.0: 1710; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1711; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1712; GFX9-NEXT: v_mov_b32_e32 v2, 8 1713; GFX9-NEXT: s_waitcnt vmcnt(0) 1714; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 1715; GFX9-NEXT: v_mov_b32_e32 v3, 16 1716; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 1717; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1718; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1719; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 1720; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 1721; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 1722; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 1723; GFX9-NEXT: s_setpc_b64 s[30:31] 1724; 1725; GFX8-LABEL: extractelement_vgpr_v8i8_idx3: 1726; GFX8: ; %bb.0: 1727; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1728; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1729; GFX8-NEXT: s_waitcnt vmcnt(0) 1730; GFX8-NEXT: v_mov_b32_e32 v1, 8 1731; GFX8-NEXT: v_mov_b32_e32 v2, 16 1732; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1733; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1734; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1735; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1736; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1737; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1738; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1739; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 1740; GFX8-NEXT: s_setpc_b64 s[30:31] 1741; 1742; GFX7-LABEL: extractelement_vgpr_v8i8_idx3: 1743; GFX7: ; %bb.0: 1744; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1745; GFX7-NEXT: s_mov_b32 s6, 0 1746; GFX7-NEXT: s_mov_b32 s7, 0xf000 1747; GFX7-NEXT: s_mov_b64 s[4:5], 0 1748; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1749; GFX7-NEXT: s_waitcnt vmcnt(0) 1750; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 1751; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 1752; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 1753; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 1754; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1755; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1756; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 1757; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 1758; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 1759; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1760; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 1761; GFX7-NEXT: s_setpc_b64 s[30:31] 1762; 1763; GFX10-LABEL: extractelement_vgpr_v8i8_idx3: 1764; GFX10: ; %bb.0: 1765; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1766; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1767; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1768; GFX10-NEXT: s_waitcnt vmcnt(0) 1769; GFX10-NEXT: v_mov_b32_e32 v1, 8 1770; GFX10-NEXT: v_mov_b32_e32 v2, 16 1771; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1772; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1773; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1774; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 1775; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1776; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 1777; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 1778; GFX10-NEXT: s_setpc_b64 s[30:31] 1779 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1780 %element = extractelement <8 x i8> %vector, i32 3 1781 ret i8 %element 1782} 1783 1784define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) { 1785; GFX9-LABEL: extractelement_vgpr_v8i8_idx4: 1786; GFX9: ; %bb.0: 1787; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1788; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1789; GFX9-NEXT: v_mov_b32_e32 v2, 8 1790; GFX9-NEXT: s_waitcnt vmcnt(0) 1791; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 1792; GFX9-NEXT: v_mov_b32_e32 v3, 16 1793; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v1 1794; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1795; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1796; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 1797; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 1798; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 1799; GFX9-NEXT: s_setpc_b64 s[30:31] 1800; 1801; GFX8-LABEL: extractelement_vgpr_v8i8_idx4: 1802; GFX8: ; %bb.0: 1803; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1804; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1805; GFX8-NEXT: s_waitcnt vmcnt(0) 1806; GFX8-NEXT: v_mov_b32_e32 v0, 8 1807; GFX8-NEXT: v_mov_b32_e32 v2, 16 1808; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1809; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 1810; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1811; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1812; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1813; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1814; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1815; GFX8-NEXT: s_setpc_b64 s[30:31] 1816; 1817; GFX7-LABEL: extractelement_vgpr_v8i8_idx4: 1818; GFX7: ; %bb.0: 1819; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1820; GFX7-NEXT: s_mov_b32 s6, 0 1821; GFX7-NEXT: s_mov_b32 s7, 0xf000 1822; GFX7-NEXT: s_mov_b64 s[4:5], 0 1823; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1824; GFX7-NEXT: s_waitcnt vmcnt(0) 1825; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 1826; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 1827; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 1828; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 1829; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1830; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1831; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 1832; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 1833; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 1834; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 1835; GFX7-NEXT: s_setpc_b64 s[30:31] 1836; 1837; GFX10-LABEL: extractelement_vgpr_v8i8_idx4: 1838; GFX10: ; %bb.0: 1839; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1840; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1841; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1842; GFX10-NEXT: s_waitcnt vmcnt(0) 1843; GFX10-NEXT: v_mov_b32_e32 v0, 8 1844; GFX10-NEXT: v_mov_b32_e32 v2, 16 1845; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1846; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 1847; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1848; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 1849; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1850; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 1851; GFX10-NEXT: s_setpc_b64 s[30:31] 1852 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1853 %element = extractelement <8 x i8> %vector, i32 4 1854 ret i8 %element 1855} 1856 1857define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) { 1858; GFX9-LABEL: extractelement_vgpr_v8i8_idx5: 1859; GFX9: ; %bb.0: 1860; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1861; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1862; GFX9-NEXT: s_mov_b32 s4, 8 1863; GFX9-NEXT: s_waitcnt vmcnt(0) 1864; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 1865; GFX9-NEXT: v_mov_b32_e32 v2, 16 1866; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 1867; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1868; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1869; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v4 1870; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1871; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 1872; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1873; GFX9-NEXT: s_setpc_b64 s[30:31] 1874; 1875; GFX8-LABEL: extractelement_vgpr_v8i8_idx5: 1876; GFX8: ; %bb.0: 1877; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1878; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1879; GFX8-NEXT: s_waitcnt vmcnt(0) 1880; GFX8-NEXT: v_mov_b32_e32 v0, 8 1881; GFX8-NEXT: v_mov_b32_e32 v2, 16 1882; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1883; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 1884; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1885; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1886; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1887; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1888; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1889; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1890; GFX8-NEXT: s_setpc_b64 s[30:31] 1891; 1892; GFX7-LABEL: extractelement_vgpr_v8i8_idx5: 1893; GFX7: ; %bb.0: 1894; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1895; GFX7-NEXT: s_mov_b32 s6, 0 1896; GFX7-NEXT: s_mov_b32 s7, 0xf000 1897; GFX7-NEXT: s_mov_b64 s[4:5], 0 1898; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1899; GFX7-NEXT: s_waitcnt vmcnt(0) 1900; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 1901; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 1902; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 1903; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 1904; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1905; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1906; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 1907; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 1908; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 1909; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 1910; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1911; GFX7-NEXT: s_setpc_b64 s[30:31] 1912; 1913; GFX10-LABEL: extractelement_vgpr_v8i8_idx5: 1914; GFX10: ; %bb.0: 1915; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1916; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1917; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1918; GFX10-NEXT: s_mov_b32 s4, 8 1919; GFX10-NEXT: s_waitcnt vmcnt(0) 1920; GFX10-NEXT: v_mov_b32_e32 v0, 16 1921; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1922; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 1923; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1924; GFX10-NEXT: v_and_or_b32 v1, 0xff, v1, v2 1925; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 1926; GFX10-NEXT: v_or3_b32 v0, v1, v0, v2 1927; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 1928; GFX10-NEXT: s_setpc_b64 s[30:31] 1929 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 1930 %element = extractelement <8 x i8> %vector, i32 5 1931 ret i8 %element 1932} 1933 1934define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) { 1935; GFX9-LABEL: extractelement_vgpr_v8i8_idx6: 1936; GFX9: ; %bb.0: 1937; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1938; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1939; GFX9-NEXT: v_mov_b32_e32 v2, 8 1940; GFX9-NEXT: s_mov_b32 s4, 16 1941; GFX9-NEXT: s_waitcnt vmcnt(0) 1942; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 1943; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 1944; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1945; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1946; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 1947; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1948; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 1949; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1950; GFX9-NEXT: s_setpc_b64 s[30:31] 1951; 1952; GFX8-LABEL: extractelement_vgpr_v8i8_idx6: 1953; GFX8: ; %bb.0: 1954; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1955; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 1956; GFX8-NEXT: s_waitcnt vmcnt(0) 1957; GFX8-NEXT: v_mov_b32_e32 v0, 8 1958; GFX8-NEXT: v_mov_b32_e32 v2, 16 1959; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1960; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 1961; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1962; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1963; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 1964; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 1965; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1966; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1967; GFX8-NEXT: s_setpc_b64 s[30:31] 1968; 1969; GFX7-LABEL: extractelement_vgpr_v8i8_idx6: 1970; GFX7: ; %bb.0: 1971; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1972; GFX7-NEXT: s_mov_b32 s6, 0 1973; GFX7-NEXT: s_mov_b32 s7, 0xf000 1974; GFX7-NEXT: s_mov_b64 s[4:5], 0 1975; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 1976; GFX7-NEXT: s_waitcnt vmcnt(0) 1977; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 1978; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 1979; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 1980; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 1981; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1982; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1983; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 1984; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 1985; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 1986; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 1987; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1988; GFX7-NEXT: s_setpc_b64 s[30:31] 1989; 1990; GFX10-LABEL: extractelement_vgpr_v8i8_idx6: 1991; GFX10: ; %bb.0: 1992; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1993; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1994; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 1995; GFX10-NEXT: s_waitcnt vmcnt(0) 1996; GFX10-NEXT: v_mov_b32_e32 v0, 8 1997; GFX10-NEXT: s_mov_b32 s4, 16 1998; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1999; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v1 2000; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2001; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 2002; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 2003; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 2004; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2005; GFX10-NEXT: s_setpc_b64 s[30:31] 2006 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 2007 %element = extractelement <8 x i8> %vector, i32 6 2008 ret i8 %element 2009} 2010 2011define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) { 2012; GFX9-LABEL: extractelement_vgpr_v8i8_idx7: 2013; GFX9: ; %bb.0: 2014; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2015; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2016; GFX9-NEXT: v_mov_b32_e32 v2, 8 2017; GFX9-NEXT: s_waitcnt vmcnt(0) 2018; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 2019; GFX9-NEXT: v_mov_b32_e32 v3, 16 2020; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v1 2021; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2022; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2023; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 2024; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2025; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2026; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2027; GFX9-NEXT: s_setpc_b64 s[30:31] 2028; 2029; GFX8-LABEL: extractelement_vgpr_v8i8_idx7: 2030; GFX8: ; %bb.0: 2031; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2032; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 2033; GFX8-NEXT: s_waitcnt vmcnt(0) 2034; GFX8-NEXT: v_mov_b32_e32 v0, 8 2035; GFX8-NEXT: v_mov_b32_e32 v2, 16 2036; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2037; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2038; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2039; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2040; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2041; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2042; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2043; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2044; GFX8-NEXT: s_setpc_b64 s[30:31] 2045; 2046; GFX7-LABEL: extractelement_vgpr_v8i8_idx7: 2047; GFX7: ; %bb.0: 2048; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2049; GFX7-NEXT: s_mov_b32 s6, 0 2050; GFX7-NEXT: s_mov_b32 s7, 0xf000 2051; GFX7-NEXT: s_mov_b64 s[4:5], 0 2052; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 2053; GFX7-NEXT: s_waitcnt vmcnt(0) 2054; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 2055; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 2056; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 2057; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 2058; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 2059; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2060; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 2061; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 2062; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 2063; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 2064; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2065; GFX7-NEXT: s_setpc_b64 s[30:31] 2066; 2067; GFX10-LABEL: extractelement_vgpr_v8i8_idx7: 2068; GFX10: ; %bb.0: 2069; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2070; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2071; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 2072; GFX10-NEXT: s_waitcnt vmcnt(0) 2073; GFX10-NEXT: v_mov_b32_e32 v0, 8 2074; GFX10-NEXT: v_mov_b32_e32 v2, 16 2075; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2076; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 2077; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2078; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 2079; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2080; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 2081; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 2082; GFX10-NEXT: s_setpc_b64 s[30:31] 2083 %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr 2084 %element = extractelement <8 x i8> %vector, i32 7 2085 ret i8 %element 2086} 2087 2088define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { 2089; GCN-LABEL: extractelement_sgpr_v16i8_sgpr_idx: 2090; GCN: ; %bb.0: 2091; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 2092; GCN-NEXT: s_mov_b32 s11, 0x80008 2093; GCN-NEXT: s_movk_i32 s9, 0xff 2094; GCN-NEXT: s_waitcnt lgkmcnt(0) 2095; GCN-NEXT: s_bfe_u32 s12, s0, s11 2096; GCN-NEXT: s_and_b32 s10, s0, s9 2097; GCN-NEXT: s_lshl_b32 s12, s12, 8 2098; GCN-NEXT: s_or_b32 s10, s10, s12 2099; GCN-NEXT: s_mov_b32 s12, 0x80010 2100; GCN-NEXT: s_lshr_b32 s5, s0, 24 2101; GCN-NEXT: s_bfe_u32 s0, s0, s12 2102; GCN-NEXT: s_lshl_b32 s0, s0, 16 2103; GCN-NEXT: s_or_b32 s0, s10, s0 2104; GCN-NEXT: s_lshl_b32 s5, s5, 24 2105; GCN-NEXT: s_bfe_u32 s10, s1, s11 2106; GCN-NEXT: s_lshr_b32 s6, s1, 24 2107; GCN-NEXT: s_or_b32 s0, s0, s5 2108; GCN-NEXT: s_and_b32 s5, s1, s9 2109; GCN-NEXT: s_lshl_b32 s10, s10, 8 2110; GCN-NEXT: s_bfe_u32 s1, s1, s12 2111; GCN-NEXT: s_or_b32 s5, s5, s10 2112; GCN-NEXT: s_lshl_b32 s1, s1, 16 2113; GCN-NEXT: s_or_b32 s1, s5, s1 2114; GCN-NEXT: s_lshl_b32 s5, s6, 24 2115; GCN-NEXT: s_bfe_u32 s6, s2, s11 2116; GCN-NEXT: s_lshr_b32 s7, s2, 24 2117; GCN-NEXT: s_or_b32 s1, s1, s5 2118; GCN-NEXT: s_and_b32 s5, s2, s9 2119; GCN-NEXT: s_lshl_b32 s6, s6, 8 2120; GCN-NEXT: s_bfe_u32 s2, s2, s12 2121; GCN-NEXT: s_or_b32 s5, s5, s6 2122; GCN-NEXT: s_lshl_b32 s2, s2, 16 2123; GCN-NEXT: s_or_b32 s2, s5, s2 2124; GCN-NEXT: s_lshl_b32 s5, s7, 24 2125; GCN-NEXT: s_bfe_u32 s6, s3, s11 2126; GCN-NEXT: s_lshr_b32 s8, s3, 24 2127; GCN-NEXT: s_or_b32 s2, s2, s5 2128; GCN-NEXT: s_and_b32 s5, s3, s9 2129; GCN-NEXT: s_lshl_b32 s6, s6, 8 2130; GCN-NEXT: s_bfe_u32 s3, s3, s12 2131; GCN-NEXT: s_or_b32 s5, s5, s6 2132; GCN-NEXT: s_lshl_b32 s3, s3, 16 2133; GCN-NEXT: s_or_b32 s3, s5, s3 2134; GCN-NEXT: s_lshl_b32 s5, s8, 24 2135; GCN-NEXT: s_or_b32 s3, s3, s5 2136; GCN-NEXT: s_lshr_b32 s5, s4, 2 2137; GCN-NEXT: s_cmp_eq_u32 s5, 1 2138; GCN-NEXT: s_cselect_b32 s0, s1, s0 2139; GCN-NEXT: s_cmp_eq_u32 s5, 2 2140; GCN-NEXT: s_cselect_b32 s0, s2, s0 2141; GCN-NEXT: s_cmp_eq_u32 s5, 3 2142; GCN-NEXT: s_cselect_b32 s0, s3, s0 2143; GCN-NEXT: s_and_b32 s1, s4, 3 2144; GCN-NEXT: s_lshl_b32 s1, s1, 3 2145; GCN-NEXT: s_lshr_b32 s0, s0, s1 2146; GCN-NEXT: ; return to shader part epilog 2147; 2148; GFX10-LABEL: extractelement_sgpr_v16i8_sgpr_idx: 2149; GFX10: ; %bb.0: 2150; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 2151; GFX10-NEXT: s_mov_b32 s6, 0x80008 2152; GFX10-NEXT: s_movk_i32 s5, 0xff 2153; GFX10-NEXT: s_mov_b32 s7, 0x80010 2154; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2155; GFX10-NEXT: s_bfe_u32 s13, s0, s6 2156; GFX10-NEXT: s_lshr_b32 s8, s0, 24 2157; GFX10-NEXT: s_and_b32 s12, s0, s5 2158; GFX10-NEXT: s_bfe_u32 s0, s0, s7 2159; GFX10-NEXT: s_lshl_b32 s13, s13, 8 2160; GFX10-NEXT: s_bfe_u32 s15, s1, s6 2161; GFX10-NEXT: s_bfe_u32 s17, s2, s6 2162; GFX10-NEXT: s_lshl_b32 s0, s0, 16 2163; GFX10-NEXT: s_or_b32 s12, s12, s13 2164; GFX10-NEXT: s_bfe_u32 s6, s3, s6 2165; GFX10-NEXT: s_lshr_b32 s9, s1, 24 2166; GFX10-NEXT: s_lshr_b32 s10, s2, 24 2167; GFX10-NEXT: s_lshr_b32 s11, s3, 24 2168; GFX10-NEXT: s_and_b32 s14, s1, s5 2169; GFX10-NEXT: s_bfe_u32 s1, s1, s7 2170; GFX10-NEXT: s_and_b32 s16, s2, s5 2171; GFX10-NEXT: s_lshl_b32 s8, s8, 24 2172; GFX10-NEXT: s_lshl_b32 s15, s15, 8 2173; GFX10-NEXT: s_lshl_b32 s17, s17, 8 2174; GFX10-NEXT: s_or_b32 s0, s12, s0 2175; GFX10-NEXT: s_bfe_u32 s2, s2, s7 2176; GFX10-NEXT: s_and_b32 s5, s3, s5 2177; GFX10-NEXT: s_lshl_b32 s6, s6, 8 2178; GFX10-NEXT: s_bfe_u32 s3, s3, s7 2179; GFX10-NEXT: s_lshl_b32 s1, s1, 16 2180; GFX10-NEXT: s_or_b32 s13, s14, s15 2181; GFX10-NEXT: s_or_b32 s0, s0, s8 2182; GFX10-NEXT: s_or_b32 s8, s16, s17 2183; GFX10-NEXT: s_lshl_b32 s2, s2, 16 2184; GFX10-NEXT: s_or_b32 s5, s5, s6 2185; GFX10-NEXT: s_lshl_b32 s3, s3, 16 2186; GFX10-NEXT: s_lshl_b32 s9, s9, 24 2187; GFX10-NEXT: s_or_b32 s1, s13, s1 2188; GFX10-NEXT: s_or_b32 s2, s8, s2 2189; GFX10-NEXT: s_lshl_b32 s8, s10, 24 2190; GFX10-NEXT: s_or_b32 s3, s5, s3 2191; GFX10-NEXT: s_lshl_b32 s5, s11, 24 2192; GFX10-NEXT: s_lshr_b32 s6, s4, 2 2193; GFX10-NEXT: s_or_b32 s1, s1, s9 2194; GFX10-NEXT: s_or_b32 s2, s2, s8 2195; GFX10-NEXT: s_or_b32 s3, s3, s5 2196; GFX10-NEXT: s_cmp_eq_u32 s6, 1 2197; GFX10-NEXT: s_cselect_b32 s0, s1, s0 2198; GFX10-NEXT: s_cmp_eq_u32 s6, 2 2199; GFX10-NEXT: s_cselect_b32 s0, s2, s0 2200; GFX10-NEXT: s_cmp_eq_u32 s6, 3 2201; GFX10-NEXT: s_cselect_b32 s0, s3, s0 2202; GFX10-NEXT: s_and_b32 s1, s4, 3 2203; GFX10-NEXT: s_lshl_b32 s1, s1, 3 2204; GFX10-NEXT: s_lshr_b32 s0, s0, s1 2205; GFX10-NEXT: ; return to shader part epilog 2206 %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr 2207 %element = extractelement <16 x i8> %vector, i32 %idx 2208 ret i8 %element 2209} 2210 2211define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 inreg %idx) { 2212; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 2213; GFX9: ; %bb.0: 2214; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2215; GFX9-NEXT: s_mov_b32 s0, 8 2216; GFX9-NEXT: s_mov_b32 s1, 16 2217; GFX9-NEXT: s_movk_i32 s3, 0xff 2218; GFX9-NEXT: v_mov_b32_e32 v5, 8 2219; GFX9-NEXT: v_mov_b32_e32 v4, 0xff 2220; GFX9-NEXT: v_mov_b32_e32 v6, 16 2221; GFX9-NEXT: s_lshr_b32 s4, s2, 2 2222; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 2223; GFX9-NEXT: s_and_b32 s2, s2, 3 2224; GFX9-NEXT: s_waitcnt vmcnt(0) 2225; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v0 2226; GFX9-NEXT: v_lshrrev_b32_e32 v8, 24, v1 2227; GFX9-NEXT: v_lshlrev_b32_sdwa v11, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2228; GFX9-NEXT: v_lshlrev_b32_sdwa v13, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2229; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v2 2230; GFX9-NEXT: v_lshlrev_b32_sdwa v12, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2231; GFX9-NEXT: v_lshlrev_b32_sdwa v14, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2232; GFX9-NEXT: v_lshlrev_b32_sdwa v15, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2233; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v11 2234; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v7 2235; GFX9-NEXT: v_and_or_b32 v1, v1, s3, v13 2236; GFX9-NEXT: v_lshlrev_b32_e32 v8, 24, v8 2237; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v3 2238; GFX9-NEXT: v_lshlrev_b32_sdwa v16, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2239; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2240; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v15 2241; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 2242; GFX9-NEXT: v_or3_b32 v0, v0, v12, v7 2243; GFX9-NEXT: v_or3_b32 v1, v1, v14, v8 2244; GFX9-NEXT: v_lshlrev_b32_sdwa v6, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2245; GFX9-NEXT: v_and_or_b32 v3, v3, v4, v5 2246; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v10 2247; GFX9-NEXT: v_or3_b32 v2, v2, v16, v9 2248; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2249; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 2 2250; GFX9-NEXT: v_or3_b32 v3, v3, v6, v4 2251; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2252; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 3 2253; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2254; GFX9-NEXT: s_lshl_b32 s0, s2, 3 2255; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 2256; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2257; GFX9-NEXT: ; return to shader part epilog 2258; 2259; GFX8-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 2260; GFX8: ; %bb.0: 2261; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2262; GFX8-NEXT: v_mov_b32_e32 v4, 8 2263; GFX8-NEXT: v_mov_b32_e32 v5, 16 2264; GFX8-NEXT: v_mov_b32_e32 v6, 8 2265; GFX8-NEXT: v_mov_b32_e32 v7, 16 2266; GFX8-NEXT: s_lshr_b32 s0, s2, 2 2267; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 2268; GFX8-NEXT: s_and_b32 s1, s2, 3 2269; GFX8-NEXT: s_waitcnt vmcnt(0) 2270; GFX8-NEXT: v_lshlrev_b32_sdwa v12, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2271; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2272; GFX8-NEXT: v_lshrrev_b32_e32 v8, 24, v0 2273; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v1 2274; GFX8-NEXT: v_lshlrev_b32_sdwa v13, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2275; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2276; GFX8-NEXT: v_lshlrev_b32_sdwa v14, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2277; GFX8-NEXT: v_or_b32_sdwa v0, v0, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2278; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2279; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v2 2280; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2281; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2282; GFX8-NEXT: v_lshlrev_b32_e32 v8, 24, v8 2283; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v9 2284; GFX8-NEXT: v_or_b32_sdwa v2, v2, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2285; GFX8-NEXT: v_or_b32_e32 v0, v0, v13 2286; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 2287; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v3 2288; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2289; GFX8-NEXT: v_lshlrev_b32_e32 v9, 24, v10 2290; GFX8-NEXT: v_or_b32_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2291; GFX8-NEXT: v_or_b32_e32 v2, v2, v15 2292; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 2293; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 2294; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v11 2295; GFX8-NEXT: v_or_b32_e32 v3, v3, v7 2296; GFX8-NEXT: v_or_b32_e32 v2, v2, v9 2297; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2298; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2 2299; GFX8-NEXT: v_or_b32_e32 v3, v3, v6 2300; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2301; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3 2302; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2303; GFX8-NEXT: s_lshl_b32 s0, s1, 3 2304; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 2305; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2306; GFX8-NEXT: ; return to shader part epilog 2307; 2308; GFX7-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 2309; GFX7: ; %bb.0: 2310; GFX7-NEXT: s_mov_b32 s6, 0 2311; GFX7-NEXT: s_mov_b32 s7, 0xf000 2312; GFX7-NEXT: s_mov_b64 s[4:5], 0 2313; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2314; GFX7-NEXT: s_movk_i32 s0, 0xff 2315; GFX7-NEXT: v_mov_b32_e32 v4, 0xff 2316; GFX7-NEXT: s_lshr_b32 s1, s2, 2 2317; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 1 2318; GFX7-NEXT: s_and_b32 s2, s2, 3 2319; GFX7-NEXT: s_waitcnt vmcnt(0) 2320; GFX7-NEXT: v_bfe_u32 v10, v0, 8, 8 2321; GFX7-NEXT: v_bfe_u32 v12, v1, 8, 8 2322; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v0 2323; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v1 2324; GFX7-NEXT: v_and_b32_e32 v9, s0, v0 2325; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 2326; GFX7-NEXT: v_and_b32_e32 v11, s0, v1 2327; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 2328; GFX7-NEXT: v_bfe_u32 v14, v2, 8, 8 2329; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v10 2330; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v12 2331; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v2 2332; GFX7-NEXT: v_and_b32_e32 v13, v2, v4 2333; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 2334; GFX7-NEXT: v_bfe_u32 v15, v3, 8, 8 2335; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2336; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2337; GFX7-NEXT: v_lshlrev_b32_e32 v14, 8, v14 2338; GFX7-NEXT: v_or_b32_e32 v9, v9, v10 2339; GFX7-NEXT: v_or_b32_e32 v10, v11, v12 2340; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v3 2341; GFX7-NEXT: v_and_b32_e32 v4, v3, v4 2342; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 2343; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 2344; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 2345; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2346; GFX7-NEXT: v_lshlrev_b32_e32 v15, 8, v15 2347; GFX7-NEXT: v_or_b32_e32 v11, v13, v14 2348; GFX7-NEXT: v_or_b32_e32 v0, v9, v0 2349; GFX7-NEXT: v_or_b32_e32 v1, v10, v1 2350; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 2351; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2352; GFX7-NEXT: v_or_b32_e32 v4, v4, v15 2353; GFX7-NEXT: v_or_b32_e32 v2, v11, v2 2354; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 2355; GFX7-NEXT: v_or_b32_e32 v1, v1, v6 2356; GFX7-NEXT: v_lshlrev_b32_e32 v8, 24, v8 2357; GFX7-NEXT: v_or_b32_e32 v3, v4, v3 2358; GFX7-NEXT: v_or_b32_e32 v2, v2, v7 2359; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2360; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 2 2361; GFX7-NEXT: v_or_b32_e32 v3, v3, v8 2362; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2363; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 3 2364; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2365; GFX7-NEXT: s_lshl_b32 s0, s2, 3 2366; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 2367; GFX7-NEXT: v_readfirstlane_b32 s0, v0 2368; GFX7-NEXT: ; return to shader part epilog 2369; 2370; GFX10-LABEL: extractelement_vgpr_v16i8_sgpr_idx: 2371; GFX10: ; %bb.0: 2372; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2373; GFX10-NEXT: s_mov_b32 s0, 8 2374; GFX10-NEXT: v_mov_b32_e32 v5, 8 2375; GFX10-NEXT: s_mov_b32 s1, 16 2376; GFX10-NEXT: s_movk_i32 s3, 0xff 2377; GFX10-NEXT: v_mov_b32_e32 v4, 0xff 2378; GFX10-NEXT: v_mov_b32_e32 v6, 16 2379; GFX10-NEXT: s_waitcnt vmcnt(0) 2380; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v0 2381; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v1 2382; GFX10-NEXT: v_lshlrev_b32_sdwa v10, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2383; GFX10-NEXT: v_lshlrev_b32_sdwa v12, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2384; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v2 2385; GFX10-NEXT: v_lshlrev_b32_sdwa v11, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2386; GFX10-NEXT: v_lshlrev_b32_sdwa v13, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2387; GFX10-NEXT: v_lshlrev_b32_sdwa v14, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2388; GFX10-NEXT: v_and_or_b32 v0, v0, s3, v10 2389; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v7 2390; GFX10-NEXT: v_and_or_b32 v1, v1, s3, v12 2391; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 2392; GFX10-NEXT: s_lshr_b32 s0, s2, 2 2393; GFX10-NEXT: v_lshlrev_b32_sdwa v15, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2394; GFX10-NEXT: v_and_or_b32 v2, v2, v4, v14 2395; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v9 2396; GFX10-NEXT: v_lshlrev_b32_sdwa v5, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2397; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v3 2398; GFX10-NEXT: v_or3_b32 v0, v0, v11, v7 2399; GFX10-NEXT: v_or3_b32 v1, v1, v13, v8 2400; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1 2401; GFX10-NEXT: v_or3_b32 v2, v2, v15, v9 2402; GFX10-NEXT: v_and_or_b32 v4, v3, v4, v5 2403; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2404; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v10 2405; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2406; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 2 2407; GFX10-NEXT: v_or3_b32 v1, v4, v3, v5 2408; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2409; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 3 2410; GFX10-NEXT: s_and_b32 s0, s2, 3 2411; GFX10-NEXT: s_lshl_b32 s0, s0, 3 2412; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2413; GFX10-NEXT: v_lshrrev_b32_e32 v0, s0, v0 2414; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2415; GFX10-NEXT: ; return to shader part epilog 2416 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2417 %element = extractelement <16 x i8> %vector, i32 %idx 2418 ret i8 %element 2419} 2420 2421define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 %idx) { 2422; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 2423; GFX9: ; %bb.0: 2424; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2425; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off 2426; GFX9-NEXT: s_mov_b32 s4, 8 2427; GFX9-NEXT: s_mov_b32 s5, 16 2428; GFX9-NEXT: s_movk_i32 s6, 0xff 2429; GFX9-NEXT: v_mov_b32_e32 v1, 8 2430; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 2431; GFX9-NEXT: v_mov_b32_e32 v7, 16 2432; GFX9-NEXT: v_lshrrev_b32_e32 v8, 2, v2 2433; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 2434; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 2435; GFX9-NEXT: s_waitcnt vmcnt(0) 2436; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v3 2437; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v4 2438; GFX9-NEXT: v_lshlrev_b32_sdwa v13, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2439; GFX9-NEXT: v_lshlrev_b32_sdwa v15, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2440; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v5 2441; GFX9-NEXT: v_lshrrev_b32_e32 v12, 24, v6 2442; GFX9-NEXT: v_lshlrev_b32_sdwa v14, s5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2443; GFX9-NEXT: v_lshlrev_b32_sdwa v16, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2444; GFX9-NEXT: v_lshlrev_b32_sdwa v17, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2445; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2446; GFX9-NEXT: v_and_or_b32 v3, v3, s6, v13 2447; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 2448; GFX9-NEXT: v_and_or_b32 v4, v4, s6, v15 2449; GFX9-NEXT: v_lshlrev_b32_e32 v10, 24, v10 2450; GFX9-NEXT: v_lshlrev_b32_sdwa v18, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2451; GFX9-NEXT: v_lshlrev_b32_sdwa v7, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2452; GFX9-NEXT: v_and_or_b32 v5, v5, v0, v17 2453; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v11 2454; GFX9-NEXT: v_and_or_b32 v0, v6, v0, v1 2455; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v12 2456; GFX9-NEXT: v_or3_b32 v3, v3, v14, v9 2457; GFX9-NEXT: v_or3_b32 v4, v4, v16, v10 2458; GFX9-NEXT: v_or3_b32 v5, v5, v18, v11 2459; GFX9-NEXT: v_or3_b32 v0, v0, v7, v1 2460; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc 2461; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 2462; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2463; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 2464; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 2465; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2 2466; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 2467; GFX9-NEXT: s_setpc_b64 s[30:31] 2468; 2469; GFX8-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 2470; GFX8: ; %bb.0: 2471; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2472; GFX8-NEXT: flat_load_dwordx4 v[3:6], v[0:1] 2473; GFX8-NEXT: v_mov_b32_e32 v0, 8 2474; GFX8-NEXT: v_mov_b32_e32 v1, 16 2475; GFX8-NEXT: v_mov_b32_e32 v7, 8 2476; GFX8-NEXT: v_mov_b32_e32 v8, 16 2477; GFX8-NEXT: v_lshrrev_b32_e32 v9, 2, v2 2478; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9 2479; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 2480; GFX8-NEXT: s_waitcnt vmcnt(0) 2481; GFX8-NEXT: v_lshlrev_b32_sdwa v14, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2482; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2483; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v3 2484; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v4 2485; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2486; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2487; GFX8-NEXT: v_lshlrev_b32_sdwa v16, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2488; GFX8-NEXT: v_or_b32_sdwa v3, v3, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2489; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2490; GFX8-NEXT: v_lshrrev_b32_e32 v12, 24, v5 2491; GFX8-NEXT: v_lshlrev_b32_sdwa v17, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2492; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2493; GFX8-NEXT: v_lshlrev_b32_e32 v10, 24, v10 2494; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v11 2495; GFX8-NEXT: v_or_b32_sdwa v5, v5, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2496; GFX8-NEXT: v_or_b32_e32 v3, v3, v15 2497; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2498; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v6 2499; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2500; GFX8-NEXT: v_lshlrev_b32_e32 v11, 24, v12 2501; GFX8-NEXT: v_or_b32_sdwa v6, v6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2502; GFX8-NEXT: v_or_b32_e32 v1, v5, v17 2503; GFX8-NEXT: v_or_b32_e32 v3, v3, v10 2504; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 2505; GFX8-NEXT: v_lshlrev_b32_e32 v7, 24, v13 2506; GFX8-NEXT: v_or_b32_e32 v5, v6, v8 2507; GFX8-NEXT: v_or_b32_e32 v1, v1, v11 2508; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 2509; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9 2510; GFX8-NEXT: v_or_b32_e32 v4, v5, v7 2511; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2512; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9 2513; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2514; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 2515; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 2516; GFX8-NEXT: s_setpc_b64 s[30:31] 2517; 2518; GFX7-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 2519; GFX7: ; %bb.0: 2520; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2521; GFX7-NEXT: s_mov_b32 s6, 0 2522; GFX7-NEXT: s_mov_b32 s7, 0xf000 2523; GFX7-NEXT: s_mov_b64 s[4:5], 0 2524; GFX7-NEXT: buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64 2525; GFX7-NEXT: s_movk_i32 s4, 0xff 2526; GFX7-NEXT: v_mov_b32_e32 v0, 0xff 2527; GFX7-NEXT: v_lshrrev_b32_e32 v17, 2, v2 2528; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v17 2529; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 2530; GFX7-NEXT: s_waitcnt vmcnt(0) 2531; GFX7-NEXT: v_bfe_u32 v11, v3, 8, 8 2532; GFX7-NEXT: v_bfe_u32 v13, v4, 8, 8 2533; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v3 2534; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v4 2535; GFX7-NEXT: v_and_b32_e32 v10, s4, v3 2536; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 2537; GFX7-NEXT: v_and_b32_e32 v12, s4, v4 2538; GFX7-NEXT: v_bfe_u32 v4, v4, 16, 8 2539; GFX7-NEXT: v_bfe_u32 v15, v5, 8, 8 2540; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v11 2541; GFX7-NEXT: v_lshlrev_b32_e32 v13, 8, v13 2542; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v5 2543; GFX7-NEXT: v_and_b32_e32 v14, v5, v0 2544; GFX7-NEXT: v_bfe_u32 v5, v5, 16, 8 2545; GFX7-NEXT: v_bfe_u32 v16, v6, 8, 8 2546; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2547; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 2548; GFX7-NEXT: v_lshlrev_b32_e32 v15, 8, v15 2549; GFX7-NEXT: v_or_b32_e32 v10, v10, v11 2550; GFX7-NEXT: v_or_b32_e32 v11, v12, v13 2551; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v6 2552; GFX7-NEXT: v_and_b32_e32 v0, v6, v0 2553; GFX7-NEXT: v_bfe_u32 v6, v6, 16, 8 2554; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2555; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 2556; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 2557; GFX7-NEXT: v_lshlrev_b32_e32 v16, 8, v16 2558; GFX7-NEXT: v_or_b32_e32 v12, v14, v15 2559; GFX7-NEXT: v_or_b32_e32 v3, v10, v3 2560; GFX7-NEXT: v_or_b32_e32 v4, v11, v4 2561; GFX7-NEXT: v_lshlrev_b32_e32 v8, 24, v8 2562; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 2563; GFX7-NEXT: v_or_b32_e32 v0, v0, v16 2564; GFX7-NEXT: v_or_b32_e32 v5, v12, v5 2565; GFX7-NEXT: v_or_b32_e32 v1, v3, v1 2566; GFX7-NEXT: v_or_b32_e32 v3, v4, v7 2567; GFX7-NEXT: v_lshlrev_b32_e32 v9, 24, v9 2568; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 2569; GFX7-NEXT: v_or_b32_e32 v4, v5, v8 2570; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2571; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v17 2572; GFX7-NEXT: v_or_b32_e32 v0, v0, v9 2573; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2574; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v17 2575; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 2576; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v2 2577; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 2578; GFX7-NEXT: s_setpc_b64 s[30:31] 2579; 2580; GFX10-LABEL: extractelement_vgpr_v16i8_vgpr_idx: 2581; GFX10: ; %bb.0: 2582; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2583; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2584; GFX10-NEXT: global_load_dwordx4 v[3:6], v[0:1], off 2585; GFX10-NEXT: s_mov_b32 s4, 8 2586; GFX10-NEXT: v_mov_b32_e32 v1, 8 2587; GFX10-NEXT: s_mov_b32 s5, 16 2588; GFX10-NEXT: s_movk_i32 s6, 0xff 2589; GFX10-NEXT: v_mov_b32_e32 v0, 0xff 2590; GFX10-NEXT: v_mov_b32_e32 v7, 16 2591; GFX10-NEXT: v_lshrrev_b32_e32 v8, 2, v2 2592; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 2593; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 2594; GFX10-NEXT: s_waitcnt vmcnt(0) 2595; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v3 2596; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v4 2597; GFX10-NEXT: v_lshlrev_b32_sdwa v13, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2598; GFX10-NEXT: v_lshlrev_b32_sdwa v15, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2599; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v5 2600; GFX10-NEXT: v_lshlrev_b32_sdwa v14, s5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2601; GFX10-NEXT: v_lshlrev_b32_sdwa v16, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2602; GFX10-NEXT: v_lshlrev_b32_sdwa v17, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2603; GFX10-NEXT: v_and_or_b32 v3, v3, s6, v13 2604; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v9 2605; GFX10-NEXT: v_and_or_b32 v4, v4, s6, v15 2606; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v10 2607; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v6 2608; GFX10-NEXT: v_lshlrev_b32_sdwa v18, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2609; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2610; GFX10-NEXT: v_and_or_b32 v5, v5, v0, v17 2611; GFX10-NEXT: v_lshlrev_b32_e32 v11, 24, v11 2612; GFX10-NEXT: v_or3_b32 v3, v3, v14, v9 2613; GFX10-NEXT: v_or3_b32 v4, v4, v16, v10 2614; GFX10-NEXT: v_lshlrev_b32_sdwa v7, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2615; GFX10-NEXT: v_and_or_b32 v0, v6, v0, v1 2616; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v12 2617; GFX10-NEXT: v_or3_b32 v5, v5, v18, v11 2618; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo 2619; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 2620; GFX10-NEXT: v_or3_b32 v0, v0, v7, v1 2621; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo 2622; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 2623; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 2624; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v2 2625; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0 2626; GFX10-NEXT: s_setpc_b64 s[30:31] 2627 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2628 %element = extractelement <16 x i8> %vector, i32 %idx 2629 ret i8 %element 2630} 2631 2632define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 %idx) { 2633; GCN-LABEL: extractelement_sgpr_v16i8_vgpr_idx: 2634; GCN: ; %bb.0: 2635; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 2636; GCN-NEXT: s_mov_b32 s10, 0x80008 2637; GCN-NEXT: s_movk_i32 s8, 0xff 2638; GCN-NEXT: v_lshrrev_b32_e32 v1, 2, v0 2639; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2640; GCN-NEXT: s_waitcnt lgkmcnt(0) 2641; GCN-NEXT: s_bfe_u32 s11, s0, s10 2642; GCN-NEXT: s_and_b32 s9, s0, s8 2643; GCN-NEXT: s_lshl_b32 s11, s11, 8 2644; GCN-NEXT: s_or_b32 s9, s9, s11 2645; GCN-NEXT: s_mov_b32 s11, 0x80010 2646; GCN-NEXT: s_lshr_b32 s4, s0, 24 2647; GCN-NEXT: s_bfe_u32 s0, s0, s11 2648; GCN-NEXT: s_lshl_b32 s0, s0, 16 2649; GCN-NEXT: s_or_b32 s0, s9, s0 2650; GCN-NEXT: s_lshl_b32 s4, s4, 24 2651; GCN-NEXT: s_bfe_u32 s9, s1, s10 2652; GCN-NEXT: s_lshr_b32 s5, s1, 24 2653; GCN-NEXT: s_or_b32 s0, s0, s4 2654; GCN-NEXT: s_and_b32 s4, s1, s8 2655; GCN-NEXT: s_lshl_b32 s9, s9, 8 2656; GCN-NEXT: s_bfe_u32 s1, s1, s11 2657; GCN-NEXT: s_or_b32 s4, s4, s9 2658; GCN-NEXT: s_lshl_b32 s1, s1, 16 2659; GCN-NEXT: s_or_b32 s1, s4, s1 2660; GCN-NEXT: s_lshl_b32 s4, s5, 24 2661; GCN-NEXT: s_bfe_u32 s5, s2, s10 2662; GCN-NEXT: s_lshr_b32 s6, s2, 24 2663; GCN-NEXT: s_or_b32 s1, s1, s4 2664; GCN-NEXT: s_and_b32 s4, s2, s8 2665; GCN-NEXT: s_lshl_b32 s5, s5, 8 2666; GCN-NEXT: s_bfe_u32 s2, s2, s11 2667; GCN-NEXT: s_or_b32 s4, s4, s5 2668; GCN-NEXT: s_lshl_b32 s2, s2, 16 2669; GCN-NEXT: s_or_b32 s2, s4, s2 2670; GCN-NEXT: s_lshl_b32 s4, s6, 24 2671; GCN-NEXT: s_bfe_u32 s5, s3, s10 2672; GCN-NEXT: s_lshr_b32 s7, s3, 24 2673; GCN-NEXT: s_or_b32 s2, s2, s4 2674; GCN-NEXT: s_and_b32 s4, s3, s8 2675; GCN-NEXT: s_lshl_b32 s5, s5, 8 2676; GCN-NEXT: s_bfe_u32 s3, s3, s11 2677; GCN-NEXT: s_or_b32 s4, s4, s5 2678; GCN-NEXT: s_lshl_b32 s3, s3, 16 2679; GCN-NEXT: s_or_b32 s3, s4, s3 2680; GCN-NEXT: s_lshl_b32 s4, s7, 24 2681; GCN-NEXT: v_mov_b32_e32 v2, s0 2682; GCN-NEXT: v_mov_b32_e32 v3, s1 2683; GCN-NEXT: s_or_b32 s3, s3, s4 2684; GCN-NEXT: v_mov_b32_e32 v4, s2 2685; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2686; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 2687; GCN-NEXT: v_mov_b32_e32 v5, s3 2688; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2689; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 2690; GCN-NEXT: v_and_b32_e32 v0, 3, v0 2691; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc 2692; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 2693; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1 2694; GCN-NEXT: v_readfirstlane_b32 s0, v0 2695; GCN-NEXT: ; return to shader part epilog 2696; 2697; GFX10-LABEL: extractelement_sgpr_v16i8_vgpr_idx: 2698; GFX10: ; %bb.0: 2699; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 2700; GFX10-NEXT: s_mov_b32 s5, 0x80008 2701; GFX10-NEXT: s_movk_i32 s4, 0xff 2702; GFX10-NEXT: s_mov_b32 s6, 0x80010 2703; GFX10-NEXT: v_lshrrev_b32_e32 v1, 2, v0 2704; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 2705; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 2706; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 2707; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2708; GFX10-NEXT: s_bfe_u32 s12, s0, s5 2709; GFX10-NEXT: s_bfe_u32 s14, s1, s5 2710; GFX10-NEXT: s_lshr_b32 s8, s1, 24 2711; GFX10-NEXT: s_and_b32 s11, s0, s4 2712; GFX10-NEXT: s_and_b32 s13, s1, s4 2713; GFX10-NEXT: s_bfe_u32 s1, s1, s6 2714; GFX10-NEXT: s_lshl_b32 s12, s12, 8 2715; GFX10-NEXT: s_lshl_b32 s14, s14, 8 2716; GFX10-NEXT: s_lshl_b32 s1, s1, 16 2717; GFX10-NEXT: s_or_b32 s11, s11, s12 2718; GFX10-NEXT: s_or_b32 s12, s13, s14 2719; GFX10-NEXT: s_lshl_b32 s8, s8, 24 2720; GFX10-NEXT: s_or_b32 s1, s12, s1 2721; GFX10-NEXT: s_lshr_b32 s7, s0, 24 2722; GFX10-NEXT: s_bfe_u32 s0, s0, s6 2723; GFX10-NEXT: s_or_b32 s1, s1, s8 2724; GFX10-NEXT: s_lshl_b32 s0, s0, 16 2725; GFX10-NEXT: s_bfe_u32 s16, s2, s5 2726; GFX10-NEXT: v_mov_b32_e32 v2, s1 2727; GFX10-NEXT: s_lshl_b32 s7, s7, 24 2728; GFX10-NEXT: s_or_b32 s0, s11, s0 2729; GFX10-NEXT: s_lshr_b32 s9, s2, 24 2730; GFX10-NEXT: s_and_b32 s15, s2, s4 2731; GFX10-NEXT: s_lshl_b32 s16, s16, 8 2732; GFX10-NEXT: s_bfe_u32 s2, s2, s6 2733; GFX10-NEXT: s_or_b32 s0, s0, s7 2734; GFX10-NEXT: s_or_b32 s7, s15, s16 2735; GFX10-NEXT: s_lshl_b32 s2, s2, 16 2736; GFX10-NEXT: s_bfe_u32 s5, s3, s5 2737; GFX10-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo 2738; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 2739; GFX10-NEXT: s_or_b32 s2, s7, s2 2740; GFX10-NEXT: s_lshl_b32 s7, s9, 24 2741; GFX10-NEXT: s_and_b32 s4, s3, s4 2742; GFX10-NEXT: s_lshl_b32 s5, s5, 8 2743; GFX10-NEXT: s_bfe_u32 s1, s3, s6 2744; GFX10-NEXT: s_or_b32 s2, s2, s7 2745; GFX10-NEXT: s_lshr_b32 s10, s3, 24 2746; GFX10-NEXT: s_or_b32 s3, s4, s5 2747; GFX10-NEXT: s_lshl_b32 s1, s1, 16 2748; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s2, vcc_lo 2749; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 2750; GFX10-NEXT: s_or_b32 s0, s3, s1 2751; GFX10-NEXT: s_lshl_b32 s1, s10, 24 2752; GFX10-NEXT: s_or_b32 s3, s0, s1 2753; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo 2754; GFX10-NEXT: v_lshrrev_b32_e32 v0, v0, v1 2755; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2756; GFX10-NEXT: ; return to shader part epilog 2757 %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr 2758 %element = extractelement <16 x i8> %vector, i32 %idx 2759 ret i8 %element 2760} 2761 2762define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) { 2763; GFX9-LABEL: extractelement_vgpr_v16i8_idx0: 2764; GFX9: ; %bb.0: 2765; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2766; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2767; GFX9-NEXT: s_waitcnt vmcnt(0) 2768; GFX9-NEXT: v_mov_b32_e32 v2, 8 2769; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 2770; GFX9-NEXT: v_mov_b32_e32 v3, 16 2771; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 2772; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2773; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2774; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 2775; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 2776; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 2777; GFX9-NEXT: s_setpc_b64 s[30:31] 2778; 2779; GFX8-LABEL: extractelement_vgpr_v16i8_idx0: 2780; GFX8: ; %bb.0: 2781; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2782; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2783; GFX8-NEXT: s_waitcnt vmcnt(0) 2784; GFX8-NEXT: v_mov_b32_e32 v1, 8 2785; GFX8-NEXT: v_mov_b32_e32 v2, 16 2786; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2787; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2788; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2789; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2790; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2791; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2792; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2793; GFX8-NEXT: s_setpc_b64 s[30:31] 2794; 2795; GFX7-LABEL: extractelement_vgpr_v16i8_idx0: 2796; GFX7: ; %bb.0: 2797; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2798; GFX7-NEXT: s_mov_b32 s6, 0 2799; GFX7-NEXT: s_mov_b32 s7, 0xf000 2800; GFX7-NEXT: s_mov_b64 s[4:5], 0 2801; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2802; GFX7-NEXT: s_waitcnt vmcnt(0) 2803; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 2804; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 2805; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 2806; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 2807; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 2808; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2809; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 2810; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 2811; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2812; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2813; GFX7-NEXT: s_setpc_b64 s[30:31] 2814; 2815; GFX10-LABEL: extractelement_vgpr_v16i8_idx0: 2816; GFX10: ; %bb.0: 2817; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2818; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2819; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2820; GFX10-NEXT: s_waitcnt vmcnt(0) 2821; GFX10-NEXT: v_mov_b32_e32 v1, 8 2822; GFX10-NEXT: v_mov_b32_e32 v2, 16 2823; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2824; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2825; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2826; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 2827; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2828; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 2829; GFX10-NEXT: s_setpc_b64 s[30:31] 2830 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2831 %element = extractelement <16 x i8> %vector, i32 0 2832 ret i8 %element 2833} 2834 2835define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) { 2836; GFX9-LABEL: extractelement_vgpr_v16i8_idx1: 2837; GFX9: ; %bb.0: 2838; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2839; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2840; GFX9-NEXT: s_mov_b32 s4, 8 2841; GFX9-NEXT: s_waitcnt vmcnt(0) 2842; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 2843; GFX9-NEXT: v_mov_b32_e32 v2, 16 2844; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2845; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2846; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2847; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v4 2848; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2849; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 2850; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2851; GFX9-NEXT: s_setpc_b64 s[30:31] 2852; 2853; GFX8-LABEL: extractelement_vgpr_v16i8_idx1: 2854; GFX8: ; %bb.0: 2855; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2856; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2857; GFX8-NEXT: s_waitcnt vmcnt(0) 2858; GFX8-NEXT: v_mov_b32_e32 v1, 8 2859; GFX8-NEXT: v_mov_b32_e32 v2, 16 2860; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2861; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2862; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2863; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2864; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2865; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2866; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2867; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2868; GFX8-NEXT: s_setpc_b64 s[30:31] 2869; 2870; GFX7-LABEL: extractelement_vgpr_v16i8_idx1: 2871; GFX7: ; %bb.0: 2872; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2873; GFX7-NEXT: s_mov_b32 s6, 0 2874; GFX7-NEXT: s_mov_b32 s7, 0xf000 2875; GFX7-NEXT: s_mov_b64 s[4:5], 0 2876; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2877; GFX7-NEXT: s_waitcnt vmcnt(0) 2878; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 2879; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 2880; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 2881; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 2882; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 2883; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2884; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 2885; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 2886; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2887; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2888; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2889; GFX7-NEXT: s_setpc_b64 s[30:31] 2890; 2891; GFX10-LABEL: extractelement_vgpr_v16i8_idx1: 2892; GFX10: ; %bb.0: 2893; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2894; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2895; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2896; GFX10-NEXT: s_mov_b32 s4, 8 2897; GFX10-NEXT: s_waitcnt vmcnt(0) 2898; GFX10-NEXT: v_mov_b32_e32 v1, 16 2899; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2900; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2901; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2902; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v2 2903; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 2904; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 2905; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 2906; GFX10-NEXT: s_setpc_b64 s[30:31] 2907 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2908 %element = extractelement <16 x i8> %vector, i32 1 2909 ret i8 %element 2910} 2911 2912define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) { 2913; GFX9-LABEL: extractelement_vgpr_v16i8_idx2: 2914; GFX9: ; %bb.0: 2915; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2916; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2917; GFX9-NEXT: s_waitcnt vmcnt(0) 2918; GFX9-NEXT: v_mov_b32_e32 v2, 8 2919; GFX9-NEXT: s_mov_b32 s4, 16 2920; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 2921; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2922; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2923; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2924; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 2925; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2926; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 2927; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2928; GFX9-NEXT: s_setpc_b64 s[30:31] 2929; 2930; GFX8-LABEL: extractelement_vgpr_v16i8_idx2: 2931; GFX8: ; %bb.0: 2932; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2933; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2934; GFX8-NEXT: s_waitcnt vmcnt(0) 2935; GFX8-NEXT: v_mov_b32_e32 v1, 8 2936; GFX8-NEXT: v_mov_b32_e32 v2, 16 2937; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2938; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 2939; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2940; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2941; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2942; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 2943; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2944; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2945; GFX8-NEXT: s_setpc_b64 s[30:31] 2946; 2947; GFX7-LABEL: extractelement_vgpr_v16i8_idx2: 2948; GFX7: ; %bb.0: 2949; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2950; GFX7-NEXT: s_mov_b32 s6, 0 2951; GFX7-NEXT: s_mov_b32 s7, 0xf000 2952; GFX7-NEXT: s_mov_b64 s[4:5], 0 2953; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 2954; GFX7-NEXT: s_waitcnt vmcnt(0) 2955; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 2956; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 2957; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 2958; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 2959; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 2960; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2961; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 2962; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 2963; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 2964; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 2965; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2966; GFX7-NEXT: s_setpc_b64 s[30:31] 2967; 2968; GFX10-LABEL: extractelement_vgpr_v16i8_idx2: 2969; GFX10: ; %bb.0: 2970; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2971; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2972; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2973; GFX10-NEXT: s_waitcnt vmcnt(0) 2974; GFX10-NEXT: v_mov_b32_e32 v1, 8 2975; GFX10-NEXT: s_mov_b32 s4, 16 2976; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2977; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 2978; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2979; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 2980; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 2981; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 2982; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2983; GFX10-NEXT: s_setpc_b64 s[30:31] 2984 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 2985 %element = extractelement <16 x i8> %vector, i32 2 2986 ret i8 %element 2987} 2988 2989define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) { 2990; GFX9-LABEL: extractelement_vgpr_v16i8_idx3: 2991; GFX9: ; %bb.0: 2992; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2993; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 2994; GFX9-NEXT: s_waitcnt vmcnt(0) 2995; GFX9-NEXT: v_mov_b32_e32 v2, 8 2996; GFX9-NEXT: v_mov_b32_e32 v1, 0xff 2997; GFX9-NEXT: v_mov_b32_e32 v3, 16 2998; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 2999; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3000; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3001; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 3002; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 3003; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 3004; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3005; GFX9-NEXT: s_setpc_b64 s[30:31] 3006; 3007; GFX8-LABEL: extractelement_vgpr_v16i8_idx3: 3008; GFX8: ; %bb.0: 3009; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3010; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3011; GFX8-NEXT: s_waitcnt vmcnt(0) 3012; GFX8-NEXT: v_mov_b32_e32 v1, 8 3013; GFX8-NEXT: v_mov_b32_e32 v2, 16 3014; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3015; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3016; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3017; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3018; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 3019; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3020; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3021; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3022; GFX8-NEXT: s_setpc_b64 s[30:31] 3023; 3024; GFX7-LABEL: extractelement_vgpr_v16i8_idx3: 3025; GFX7: ; %bb.0: 3026; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3027; GFX7-NEXT: s_mov_b32 s6, 0 3028; GFX7-NEXT: s_mov_b32 s7, 0xf000 3029; GFX7-NEXT: s_mov_b64 s[4:5], 0 3030; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3031; GFX7-NEXT: s_waitcnt vmcnt(0) 3032; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 3033; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 3034; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 3035; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 3036; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 3037; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3038; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 3039; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 3040; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 3041; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 3042; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3043; GFX7-NEXT: s_setpc_b64 s[30:31] 3044; 3045; GFX10-LABEL: extractelement_vgpr_v16i8_idx3: 3046; GFX10: ; %bb.0: 3047; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3048; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3049; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3050; GFX10-NEXT: s_waitcnt vmcnt(0) 3051; GFX10-NEXT: v_mov_b32_e32 v1, 8 3052; GFX10-NEXT: v_mov_b32_e32 v2, 16 3053; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3054; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 3055; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3056; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 3057; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3058; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 3059; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3060; GFX10-NEXT: s_setpc_b64 s[30:31] 3061 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3062 %element = extractelement <16 x i8> %vector, i32 3 3063 ret i8 %element 3064} 3065 3066define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) { 3067; GFX9-LABEL: extractelement_vgpr_v16i8_idx4: 3068; GFX9: ; %bb.0: 3069; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3070; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3071; GFX9-NEXT: s_waitcnt vmcnt(0) 3072; GFX9-NEXT: v_mov_b32_e32 v2, 8 3073; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 3074; GFX9-NEXT: v_mov_b32_e32 v3, 16 3075; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v1 3076; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3077; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3078; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 3079; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 3080; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 3081; GFX9-NEXT: s_setpc_b64 s[30:31] 3082; 3083; GFX8-LABEL: extractelement_vgpr_v16i8_idx4: 3084; GFX8: ; %bb.0: 3085; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3086; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3087; GFX8-NEXT: s_waitcnt vmcnt(0) 3088; GFX8-NEXT: v_mov_b32_e32 v0, 8 3089; GFX8-NEXT: v_mov_b32_e32 v2, 16 3090; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3091; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 3092; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3093; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3094; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 3095; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3096; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3097; GFX8-NEXT: s_setpc_b64 s[30:31] 3098; 3099; GFX7-LABEL: extractelement_vgpr_v16i8_idx4: 3100; GFX7: ; %bb.0: 3101; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3102; GFX7-NEXT: s_mov_b32 s6, 0 3103; GFX7-NEXT: s_mov_b32 s7, 0xf000 3104; GFX7-NEXT: s_mov_b64 s[4:5], 0 3105; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3106; GFX7-NEXT: s_waitcnt vmcnt(0) 3107; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 3108; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 3109; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 3110; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 3111; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 3112; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3113; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 3114; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 3115; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 3116; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 3117; GFX7-NEXT: s_setpc_b64 s[30:31] 3118; 3119; GFX10-LABEL: extractelement_vgpr_v16i8_idx4: 3120; GFX10: ; %bb.0: 3121; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3122; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3123; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3124; GFX10-NEXT: s_waitcnt vmcnt(0) 3125; GFX10-NEXT: v_mov_b32_e32 v0, 8 3126; GFX10-NEXT: v_mov_b32_e32 v2, 16 3127; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3128; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 3129; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3130; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 3131; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3132; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 3133; GFX10-NEXT: s_setpc_b64 s[30:31] 3134 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3135 %element = extractelement <16 x i8> %vector, i32 4 3136 ret i8 %element 3137} 3138 3139define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) { 3140; GFX9-LABEL: extractelement_vgpr_v16i8_idx5: 3141; GFX9: ; %bb.0: 3142; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3143; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3144; GFX9-NEXT: s_mov_b32 s4, 8 3145; GFX9-NEXT: s_waitcnt vmcnt(0) 3146; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 3147; GFX9-NEXT: v_mov_b32_e32 v2, 16 3148; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 3149; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3150; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3151; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v4 3152; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3153; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 3154; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3155; GFX9-NEXT: s_setpc_b64 s[30:31] 3156; 3157; GFX8-LABEL: extractelement_vgpr_v16i8_idx5: 3158; GFX8: ; %bb.0: 3159; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3160; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3161; GFX8-NEXT: s_waitcnt vmcnt(0) 3162; GFX8-NEXT: v_mov_b32_e32 v0, 8 3163; GFX8-NEXT: v_mov_b32_e32 v2, 16 3164; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3165; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 3166; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3167; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3168; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 3169; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3170; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3171; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3172; GFX8-NEXT: s_setpc_b64 s[30:31] 3173; 3174; GFX7-LABEL: extractelement_vgpr_v16i8_idx5: 3175; GFX7: ; %bb.0: 3176; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3177; GFX7-NEXT: s_mov_b32 s6, 0 3178; GFX7-NEXT: s_mov_b32 s7, 0xf000 3179; GFX7-NEXT: s_mov_b64 s[4:5], 0 3180; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3181; GFX7-NEXT: s_waitcnt vmcnt(0) 3182; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 3183; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 3184; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 3185; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 3186; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 3187; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3188; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 3189; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 3190; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 3191; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 3192; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3193; GFX7-NEXT: s_setpc_b64 s[30:31] 3194; 3195; GFX10-LABEL: extractelement_vgpr_v16i8_idx5: 3196; GFX10: ; %bb.0: 3197; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3198; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3199; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3200; GFX10-NEXT: s_mov_b32 s4, 8 3201; GFX10-NEXT: s_waitcnt vmcnt(0) 3202; GFX10-NEXT: v_mov_b32_e32 v0, 16 3203; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3204; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 3205; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3206; GFX10-NEXT: v_and_or_b32 v1, 0xff, v1, v2 3207; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 3208; GFX10-NEXT: v_or3_b32 v0, v1, v0, v2 3209; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3210; GFX10-NEXT: s_setpc_b64 s[30:31] 3211 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3212 %element = extractelement <16 x i8> %vector, i32 5 3213 ret i8 %element 3214} 3215 3216define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) { 3217; GFX9-LABEL: extractelement_vgpr_v16i8_idx6: 3218; GFX9: ; %bb.0: 3219; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3220; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3221; GFX9-NEXT: s_waitcnt vmcnt(0) 3222; GFX9-NEXT: v_mov_b32_e32 v2, 8 3223; GFX9-NEXT: s_mov_b32 s4, 16 3224; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 3225; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 3226; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3227; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3228; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 3229; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3230; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 3231; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3232; GFX9-NEXT: s_setpc_b64 s[30:31] 3233; 3234; GFX8-LABEL: extractelement_vgpr_v16i8_idx6: 3235; GFX8: ; %bb.0: 3236; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3237; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3238; GFX8-NEXT: s_waitcnt vmcnt(0) 3239; GFX8-NEXT: v_mov_b32_e32 v0, 8 3240; GFX8-NEXT: v_mov_b32_e32 v2, 16 3241; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3242; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 3243; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3244; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3245; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 3246; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3247; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3248; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3249; GFX8-NEXT: s_setpc_b64 s[30:31] 3250; 3251; GFX7-LABEL: extractelement_vgpr_v16i8_idx6: 3252; GFX7: ; %bb.0: 3253; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3254; GFX7-NEXT: s_mov_b32 s6, 0 3255; GFX7-NEXT: s_mov_b32 s7, 0xf000 3256; GFX7-NEXT: s_mov_b64 s[4:5], 0 3257; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3258; GFX7-NEXT: s_waitcnt vmcnt(0) 3259; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 3260; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 3261; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 3262; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 3263; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 3264; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3265; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 3266; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 3267; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 3268; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 3269; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3270; GFX7-NEXT: s_setpc_b64 s[30:31] 3271; 3272; GFX10-LABEL: extractelement_vgpr_v16i8_idx6: 3273; GFX10: ; %bb.0: 3274; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3275; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3276; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3277; GFX10-NEXT: s_waitcnt vmcnt(0) 3278; GFX10-NEXT: v_mov_b32_e32 v0, 8 3279; GFX10-NEXT: s_mov_b32 s4, 16 3280; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3281; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v1 3282; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3283; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 3284; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 3285; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 3286; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3287; GFX10-NEXT: s_setpc_b64 s[30:31] 3288 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3289 %element = extractelement <16 x i8> %vector, i32 6 3290 ret i8 %element 3291} 3292 3293define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) { 3294; GFX9-LABEL: extractelement_vgpr_v16i8_idx7: 3295; GFX9: ; %bb.0: 3296; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3297; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3298; GFX9-NEXT: s_waitcnt vmcnt(0) 3299; GFX9-NEXT: v_mov_b32_e32 v2, 8 3300; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 3301; GFX9-NEXT: v_mov_b32_e32 v3, 16 3302; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v1 3303; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3304; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3305; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 3306; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 3307; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 3308; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3309; GFX9-NEXT: s_setpc_b64 s[30:31] 3310; 3311; GFX8-LABEL: extractelement_vgpr_v16i8_idx7: 3312; GFX8: ; %bb.0: 3313; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3314; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3315; GFX8-NEXT: s_waitcnt vmcnt(0) 3316; GFX8-NEXT: v_mov_b32_e32 v0, 8 3317; GFX8-NEXT: v_mov_b32_e32 v2, 16 3318; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3319; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 3320; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3321; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3322; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 3323; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3324; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3325; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3326; GFX8-NEXT: s_setpc_b64 s[30:31] 3327; 3328; GFX7-LABEL: extractelement_vgpr_v16i8_idx7: 3329; GFX7: ; %bb.0: 3330; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3331; GFX7-NEXT: s_mov_b32 s6, 0 3332; GFX7-NEXT: s_mov_b32 s7, 0xf000 3333; GFX7-NEXT: s_mov_b64 s[4:5], 0 3334; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3335; GFX7-NEXT: s_waitcnt vmcnt(0) 3336; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 3337; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 3338; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 3339; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 3340; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 3341; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3342; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 3343; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 3344; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 3345; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 3346; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3347; GFX7-NEXT: s_setpc_b64 s[30:31] 3348; 3349; GFX10-LABEL: extractelement_vgpr_v16i8_idx7: 3350; GFX10: ; %bb.0: 3351; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3352; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3353; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3354; GFX10-NEXT: s_waitcnt vmcnt(0) 3355; GFX10-NEXT: v_mov_b32_e32 v0, 8 3356; GFX10-NEXT: v_mov_b32_e32 v2, 16 3357; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3358; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 3359; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3360; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 3361; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3362; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 3363; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3364; GFX10-NEXT: s_setpc_b64 s[30:31] 3365 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3366 %element = extractelement <16 x i8> %vector, i32 7 3367 ret i8 %element 3368} 3369 3370define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) { 3371; GFX9-LABEL: extractelement_vgpr_v16i8_idx8: 3372; GFX9: ; %bb.0: 3373; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3374; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3375; GFX9-NEXT: s_waitcnt vmcnt(0) 3376; GFX9-NEXT: v_mov_b32_e32 v1, 8 3377; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 3378; GFX9-NEXT: v_mov_b32_e32 v3, 16 3379; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v2 3380; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3381; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3382; GFX9-NEXT: v_and_or_b32 v0, v2, v0, v1 3383; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 3384; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 3385; GFX9-NEXT: s_setpc_b64 s[30:31] 3386; 3387; GFX8-LABEL: extractelement_vgpr_v16i8_idx8: 3388; GFX8: ; %bb.0: 3389; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3390; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3391; GFX8-NEXT: s_waitcnt vmcnt(0) 3392; GFX8-NEXT: v_mov_b32_e32 v0, 8 3393; GFX8-NEXT: v_mov_b32_e32 v1, 16 3394; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3395; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v2 3396; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3397; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3398; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3399; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3400; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3401; GFX8-NEXT: s_setpc_b64 s[30:31] 3402; 3403; GFX7-LABEL: extractelement_vgpr_v16i8_idx8: 3404; GFX7: ; %bb.0: 3405; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3406; GFX7-NEXT: s_mov_b32 s6, 0 3407; GFX7-NEXT: s_mov_b32 s7, 0xf000 3408; GFX7-NEXT: s_mov_b64 s[4:5], 0 3409; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3410; GFX7-NEXT: s_waitcnt vmcnt(0) 3411; GFX7-NEXT: v_bfe_u32 v3, v2, 8, 8 3412; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 3413; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v2 3414; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 3415; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 3416; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3417; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 3418; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 3419; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 3420; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 3421; GFX7-NEXT: s_setpc_b64 s[30:31] 3422; 3423; GFX10-LABEL: extractelement_vgpr_v16i8_idx8: 3424; GFX10: ; %bb.0: 3425; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3426; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3427; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3428; GFX10-NEXT: s_waitcnt vmcnt(0) 3429; GFX10-NEXT: v_mov_b32_e32 v0, 8 3430; GFX10-NEXT: v_mov_b32_e32 v1, 16 3431; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3432; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v2 3433; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3434; GFX10-NEXT: v_and_or_b32 v0, 0xff, v2, v0 3435; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 3436; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 3437; GFX10-NEXT: s_setpc_b64 s[30:31] 3438 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3439 %element = extractelement <16 x i8> %vector, i32 8 3440 ret i8 %element 3441} 3442 3443define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) { 3444; GFX9-LABEL: extractelement_vgpr_v16i8_idx9: 3445; GFX9: ; %bb.0: 3446; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3447; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3448; GFX9-NEXT: s_mov_b32 s4, 8 3449; GFX9-NEXT: s_waitcnt vmcnt(0) 3450; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 3451; GFX9-NEXT: v_mov_b32_e32 v1, 16 3452; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v2 3453; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3454; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3455; GFX9-NEXT: v_and_or_b32 v0, v2, v0, v4 3456; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v3 3457; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 3458; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3459; GFX9-NEXT: s_setpc_b64 s[30:31] 3460; 3461; GFX8-LABEL: extractelement_vgpr_v16i8_idx9: 3462; GFX8: ; %bb.0: 3463; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3464; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3465; GFX8-NEXT: s_waitcnt vmcnt(0) 3466; GFX8-NEXT: v_mov_b32_e32 v0, 8 3467; GFX8-NEXT: v_mov_b32_e32 v1, 16 3468; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3469; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v2 3470; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3471; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3472; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3473; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3474; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3475; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3476; GFX8-NEXT: s_setpc_b64 s[30:31] 3477; 3478; GFX7-LABEL: extractelement_vgpr_v16i8_idx9: 3479; GFX7: ; %bb.0: 3480; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3481; GFX7-NEXT: s_mov_b32 s6, 0 3482; GFX7-NEXT: s_mov_b32 s7, 0xf000 3483; GFX7-NEXT: s_mov_b64 s[4:5], 0 3484; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3485; GFX7-NEXT: s_waitcnt vmcnt(0) 3486; GFX7-NEXT: v_bfe_u32 v3, v2, 8, 8 3487; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 3488; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v2 3489; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 3490; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 3491; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3492; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 3493; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 3494; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 3495; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 3496; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3497; GFX7-NEXT: s_setpc_b64 s[30:31] 3498; 3499; GFX10-LABEL: extractelement_vgpr_v16i8_idx9: 3500; GFX10: ; %bb.0: 3501; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3502; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3503; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3504; GFX10-NEXT: s_mov_b32 s4, 8 3505; GFX10-NEXT: s_waitcnt vmcnt(0) 3506; GFX10-NEXT: v_mov_b32_e32 v0, 16 3507; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3508; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v2 3509; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3510; GFX10-NEXT: v_and_or_b32 v1, 0xff, v2, v1 3511; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 3512; GFX10-NEXT: v_or3_b32 v0, v1, v0, v2 3513; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3514; GFX10-NEXT: s_setpc_b64 s[30:31] 3515 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3516 %element = extractelement <16 x i8> %vector, i32 9 3517 ret i8 %element 3518} 3519 3520define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) { 3521; GFX9-LABEL: extractelement_vgpr_v16i8_idx10: 3522; GFX9: ; %bb.0: 3523; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3524; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3525; GFX9-NEXT: s_waitcnt vmcnt(0) 3526; GFX9-NEXT: v_mov_b32_e32 v1, 8 3527; GFX9-NEXT: s_mov_b32 s4, 16 3528; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 3529; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v2 3530; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3531; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3532; GFX9-NEXT: v_and_or_b32 v0, v2, v0, v1 3533; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3534; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 3535; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3536; GFX9-NEXT: s_setpc_b64 s[30:31] 3537; 3538; GFX8-LABEL: extractelement_vgpr_v16i8_idx10: 3539; GFX8: ; %bb.0: 3540; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3541; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3542; GFX8-NEXT: s_waitcnt vmcnt(0) 3543; GFX8-NEXT: v_mov_b32_e32 v0, 8 3544; GFX8-NEXT: v_mov_b32_e32 v1, 16 3545; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3546; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v2 3547; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3548; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3549; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3550; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3551; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3552; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3553; GFX8-NEXT: s_setpc_b64 s[30:31] 3554; 3555; GFX7-LABEL: extractelement_vgpr_v16i8_idx10: 3556; GFX7: ; %bb.0: 3557; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3558; GFX7-NEXT: s_mov_b32 s6, 0 3559; GFX7-NEXT: s_mov_b32 s7, 0xf000 3560; GFX7-NEXT: s_mov_b64 s[4:5], 0 3561; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3562; GFX7-NEXT: s_waitcnt vmcnt(0) 3563; GFX7-NEXT: v_bfe_u32 v3, v2, 8, 8 3564; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 3565; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v2 3566; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 3567; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 3568; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3569; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 3570; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 3571; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 3572; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 3573; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3574; GFX7-NEXT: s_setpc_b64 s[30:31] 3575; 3576; GFX10-LABEL: extractelement_vgpr_v16i8_idx10: 3577; GFX10: ; %bb.0: 3578; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3579; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3580; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3581; GFX10-NEXT: s_waitcnt vmcnt(0) 3582; GFX10-NEXT: v_mov_b32_e32 v0, 8 3583; GFX10-NEXT: s_mov_b32 s4, 16 3584; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3585; GFX10-NEXT: v_lshrrev_b32_e32 v1, 24, v2 3586; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3587; GFX10-NEXT: v_and_or_b32 v0, 0xff, v2, v0 3588; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 3589; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 3590; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3591; GFX10-NEXT: s_setpc_b64 s[30:31] 3592 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3593 %element = extractelement <16 x i8> %vector, i32 10 3594 ret i8 %element 3595} 3596 3597define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) { 3598; GFX9-LABEL: extractelement_vgpr_v16i8_idx11: 3599; GFX9: ; %bb.0: 3600; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3601; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3602; GFX9-NEXT: s_waitcnt vmcnt(0) 3603; GFX9-NEXT: v_mov_b32_e32 v1, 8 3604; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 3605; GFX9-NEXT: v_mov_b32_e32 v3, 16 3606; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v2 3607; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3608; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3609; GFX9-NEXT: v_and_or_b32 v0, v2, v0, v1 3610; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 3611; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 3612; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3613; GFX9-NEXT: s_setpc_b64 s[30:31] 3614; 3615; GFX8-LABEL: extractelement_vgpr_v16i8_idx11: 3616; GFX8: ; %bb.0: 3617; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3618; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3619; GFX8-NEXT: s_waitcnt vmcnt(0) 3620; GFX8-NEXT: v_mov_b32_e32 v0, 8 3621; GFX8-NEXT: v_mov_b32_e32 v1, 16 3622; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3623; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v2 3624; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3625; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3626; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3627; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 3628; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3629; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3630; GFX8-NEXT: s_setpc_b64 s[30:31] 3631; 3632; GFX7-LABEL: extractelement_vgpr_v16i8_idx11: 3633; GFX7: ; %bb.0: 3634; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3635; GFX7-NEXT: s_mov_b32 s6, 0 3636; GFX7-NEXT: s_mov_b32 s7, 0xf000 3637; GFX7-NEXT: s_mov_b64 s[4:5], 0 3638; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3639; GFX7-NEXT: s_waitcnt vmcnt(0) 3640; GFX7-NEXT: v_bfe_u32 v3, v2, 8, 8 3641; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 3642; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v2 3643; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 3644; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 3645; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3646; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 3647; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 3648; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 3649; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 3650; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3651; GFX7-NEXT: s_setpc_b64 s[30:31] 3652; 3653; GFX10-LABEL: extractelement_vgpr_v16i8_idx11: 3654; GFX10: ; %bb.0: 3655; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3656; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3657; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3658; GFX10-NEXT: s_waitcnt vmcnt(0) 3659; GFX10-NEXT: v_mov_b32_e32 v0, 8 3660; GFX10-NEXT: v_mov_b32_e32 v1, 16 3661; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3662; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v2 3663; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3664; GFX10-NEXT: v_and_or_b32 v0, 0xff, v2, v0 3665; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 3666; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 3667; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3668; GFX10-NEXT: s_setpc_b64 s[30:31] 3669 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3670 %element = extractelement <16 x i8> %vector, i32 11 3671 ret i8 %element 3672} 3673 3674define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) { 3675; GFX9-LABEL: extractelement_vgpr_v16i8_idx12: 3676; GFX9: ; %bb.0: 3677; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3678; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3679; GFX9-NEXT: s_waitcnt vmcnt(0) 3680; GFX9-NEXT: v_mov_b32_e32 v1, 8 3681; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 3682; GFX9-NEXT: v_mov_b32_e32 v2, 16 3683; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v3 3684; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3685; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3686; GFX9-NEXT: v_and_or_b32 v0, v3, v0, v1 3687; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 3688; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 3689; GFX9-NEXT: s_setpc_b64 s[30:31] 3690; 3691; GFX8-LABEL: extractelement_vgpr_v16i8_idx12: 3692; GFX8: ; %bb.0: 3693; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3694; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3695; GFX8-NEXT: s_waitcnt vmcnt(0) 3696; GFX8-NEXT: v_mov_b32_e32 v0, 8 3697; GFX8-NEXT: v_mov_b32_e32 v1, 16 3698; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3699; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v3 3700; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3701; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3702; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3703; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 3704; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3705; GFX8-NEXT: s_setpc_b64 s[30:31] 3706; 3707; GFX7-LABEL: extractelement_vgpr_v16i8_idx12: 3708; GFX7: ; %bb.0: 3709; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3710; GFX7-NEXT: s_mov_b32 s6, 0 3711; GFX7-NEXT: s_mov_b32 s7, 0xf000 3712; GFX7-NEXT: s_mov_b64 s[4:5], 0 3713; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3714; GFX7-NEXT: s_waitcnt vmcnt(0) 3715; GFX7-NEXT: v_bfe_u32 v2, v3, 8, 8 3716; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 3717; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v3 3718; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 3719; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 3720; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3721; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 3722; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 3723; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 3724; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 3725; GFX7-NEXT: s_setpc_b64 s[30:31] 3726; 3727; GFX10-LABEL: extractelement_vgpr_v16i8_idx12: 3728; GFX10: ; %bb.0: 3729; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3730; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3731; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3732; GFX10-NEXT: s_waitcnt vmcnt(0) 3733; GFX10-NEXT: v_mov_b32_e32 v0, 8 3734; GFX10-NEXT: v_mov_b32_e32 v1, 16 3735; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3736; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v3 3737; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3738; GFX10-NEXT: v_and_or_b32 v0, 0xff, v3, v0 3739; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 3740; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 3741; GFX10-NEXT: s_setpc_b64 s[30:31] 3742 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3743 %element = extractelement <16 x i8> %vector, i32 12 3744 ret i8 %element 3745} 3746 3747define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) { 3748; GFX9-LABEL: extractelement_vgpr_v16i8_idx13: 3749; GFX9: ; %bb.0: 3750; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3751; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3752; GFX9-NEXT: s_mov_b32 s4, 8 3753; GFX9-NEXT: s_waitcnt vmcnt(0) 3754; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 3755; GFX9-NEXT: v_mov_b32_e32 v1, 16 3756; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v3 3757; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3758; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3759; GFX9-NEXT: v_and_or_b32 v0, v3, v0, v4 3760; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 3761; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 3762; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3763; GFX9-NEXT: s_setpc_b64 s[30:31] 3764; 3765; GFX8-LABEL: extractelement_vgpr_v16i8_idx13: 3766; GFX8: ; %bb.0: 3767; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3768; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3769; GFX8-NEXT: s_waitcnt vmcnt(0) 3770; GFX8-NEXT: v_mov_b32_e32 v0, 8 3771; GFX8-NEXT: v_mov_b32_e32 v1, 16 3772; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3773; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v3 3774; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3775; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3776; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3777; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 3778; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3779; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3780; GFX8-NEXT: s_setpc_b64 s[30:31] 3781; 3782; GFX7-LABEL: extractelement_vgpr_v16i8_idx13: 3783; GFX7: ; %bb.0: 3784; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3785; GFX7-NEXT: s_mov_b32 s6, 0 3786; GFX7-NEXT: s_mov_b32 s7, 0xf000 3787; GFX7-NEXT: s_mov_b64 s[4:5], 0 3788; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3789; GFX7-NEXT: s_waitcnt vmcnt(0) 3790; GFX7-NEXT: v_bfe_u32 v2, v3, 8, 8 3791; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 3792; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v3 3793; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 3794; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 3795; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3796; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 3797; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 3798; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 3799; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 3800; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3801; GFX7-NEXT: s_setpc_b64 s[30:31] 3802; 3803; GFX10-LABEL: extractelement_vgpr_v16i8_idx13: 3804; GFX10: ; %bb.0: 3805; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3806; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3807; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3808; GFX10-NEXT: s_mov_b32 s4, 8 3809; GFX10-NEXT: s_waitcnt vmcnt(0) 3810; GFX10-NEXT: v_mov_b32_e32 v0, 16 3811; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3812; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v3 3813; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3814; GFX10-NEXT: v_and_or_b32 v1, 0xff, v3, v1 3815; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 3816; GFX10-NEXT: v_or3_b32 v0, v1, v0, v2 3817; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 3818; GFX10-NEXT: s_setpc_b64 s[30:31] 3819 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3820 %element = extractelement <16 x i8> %vector, i32 13 3821 ret i8 %element 3822} 3823 3824define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) { 3825; GFX9-LABEL: extractelement_vgpr_v16i8_idx14: 3826; GFX9: ; %bb.0: 3827; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3828; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3829; GFX9-NEXT: s_waitcnt vmcnt(0) 3830; GFX9-NEXT: v_mov_b32_e32 v1, 8 3831; GFX9-NEXT: s_mov_b32 s4, 16 3832; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 3833; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v3 3834; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3835; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3836; GFX9-NEXT: v_and_or_b32 v0, v3, v0, v1 3837; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 3838; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 3839; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3840; GFX9-NEXT: s_setpc_b64 s[30:31] 3841; 3842; GFX8-LABEL: extractelement_vgpr_v16i8_idx14: 3843; GFX8: ; %bb.0: 3844; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3845; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3846; GFX8-NEXT: s_waitcnt vmcnt(0) 3847; GFX8-NEXT: v_mov_b32_e32 v0, 8 3848; GFX8-NEXT: v_mov_b32_e32 v1, 16 3849; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3850; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v3 3851; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3852; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3853; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3854; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 3855; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3856; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3857; GFX8-NEXT: s_setpc_b64 s[30:31] 3858; 3859; GFX7-LABEL: extractelement_vgpr_v16i8_idx14: 3860; GFX7: ; %bb.0: 3861; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3862; GFX7-NEXT: s_mov_b32 s6, 0 3863; GFX7-NEXT: s_mov_b32 s7, 0xf000 3864; GFX7-NEXT: s_mov_b64 s[4:5], 0 3865; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3866; GFX7-NEXT: s_waitcnt vmcnt(0) 3867; GFX7-NEXT: v_bfe_u32 v2, v3, 8, 8 3868; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 3869; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v3 3870; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 3871; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 3872; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3873; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 3874; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 3875; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 3876; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 3877; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3878; GFX7-NEXT: s_setpc_b64 s[30:31] 3879; 3880; GFX10-LABEL: extractelement_vgpr_v16i8_idx14: 3881; GFX10: ; %bb.0: 3882; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3883; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3884; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3885; GFX10-NEXT: s_waitcnt vmcnt(0) 3886; GFX10-NEXT: v_mov_b32_e32 v0, 8 3887; GFX10-NEXT: s_mov_b32 s4, 16 3888; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3889; GFX10-NEXT: v_lshrrev_b32_e32 v1, 24, v3 3890; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3891; GFX10-NEXT: v_and_or_b32 v0, 0xff, v3, v0 3892; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 3893; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 3894; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3895; GFX10-NEXT: s_setpc_b64 s[30:31] 3896 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3897 %element = extractelement <16 x i8> %vector, i32 14 3898 ret i8 %element 3899} 3900 3901define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) { 3902; GFX9-LABEL: extractelement_vgpr_v16i8_idx15: 3903; GFX9: ; %bb.0: 3904; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3905; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3906; GFX9-NEXT: s_waitcnt vmcnt(0) 3907; GFX9-NEXT: v_mov_b32_e32 v1, 8 3908; GFX9-NEXT: v_mov_b32_e32 v0, 0xff 3909; GFX9-NEXT: v_mov_b32_e32 v2, 16 3910; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v3 3911; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3912; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3913; GFX9-NEXT: v_and_or_b32 v0, v3, v0, v1 3914; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 3915; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 3916; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3917; GFX9-NEXT: s_setpc_b64 s[30:31] 3918; 3919; GFX8-LABEL: extractelement_vgpr_v16i8_idx15: 3920; GFX8: ; %bb.0: 3921; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3922; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3923; GFX8-NEXT: s_waitcnt vmcnt(0) 3924; GFX8-NEXT: v_mov_b32_e32 v0, 8 3925; GFX8-NEXT: v_mov_b32_e32 v1, 16 3926; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3927; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v3 3928; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3929; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 3930; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3931; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 3932; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3933; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3934; GFX8-NEXT: s_setpc_b64 s[30:31] 3935; 3936; GFX7-LABEL: extractelement_vgpr_v16i8_idx15: 3937; GFX7: ; %bb.0: 3938; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3939; GFX7-NEXT: s_mov_b32 s6, 0 3940; GFX7-NEXT: s_mov_b32 s7, 0xf000 3941; GFX7-NEXT: s_mov_b64 s[4:5], 0 3942; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 3943; GFX7-NEXT: s_waitcnt vmcnt(0) 3944; GFX7-NEXT: v_bfe_u32 v2, v3, 8, 8 3945; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 3946; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v3 3947; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 3948; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 3949; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3950; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 3951; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 3952; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 3953; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 3954; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3955; GFX7-NEXT: s_setpc_b64 s[30:31] 3956; 3957; GFX10-LABEL: extractelement_vgpr_v16i8_idx15: 3958; GFX10: ; %bb.0: 3959; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3960; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3961; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 3962; GFX10-NEXT: s_waitcnt vmcnt(0) 3963; GFX10-NEXT: v_mov_b32_e32 v0, 8 3964; GFX10-NEXT: v_mov_b32_e32 v1, 16 3965; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 3966; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v3 3967; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 3968; GFX10-NEXT: v_and_or_b32 v0, 0xff, v3, v0 3969; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 3970; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 3971; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 3972; GFX10-NEXT: s_setpc_b64 s[30:31] 3973 %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 3974 %element = extractelement <16 x i8> %vector, i32 15 3975 ret i8 %element 3976} 3977