1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 5 6define float @dyn_extract_v8f32_const_s_v(i32 %sel) { 7; GCN-LABEL: dyn_extract_v8f32_const_s_v: 8; GCN: ; %bb.0: ; %entry 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 11; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 12; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, 2.0, vcc 13; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 14; GCN-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc 15; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 16; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 17; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc 18; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 19; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 20; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 21; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 22; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 23; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 24; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 25; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 26; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 27; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 28; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc 29; GCN-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX10-LABEL: dyn_extract_v8f32_const_s_v: 32; GFX10: ; %bb.0: ; %entry 33; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 35; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 36; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo 37; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 38; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo 39; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 40; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo 41; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 42; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo 43; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 44; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo 45; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 46; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo 47; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 48; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo 49; GFX10-NEXT: s_setpc_b64 s[30:31] 50entry: 51 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel 52 ret float %ext 53} 54 55define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) { 56; GCN-LABEL: dyn_extract_v8f32_const_s_s: 57; GCN: ; %bb.0: ; %entry 58; GCN-NEXT: s_cmp_eq_u32 s2, 1 59; GCN-NEXT: s_cselect_b32 s0, 2.0, 1.0 60; GCN-NEXT: s_cmp_eq_u32 s2, 2 61; GCN-NEXT: s_cselect_b32 s0, 0x40400000, s0 62; GCN-NEXT: s_cmp_eq_u32 s2, 3 63; GCN-NEXT: s_cselect_b32 s0, 4.0, s0 64; GCN-NEXT: s_cmp_eq_u32 s2, 4 65; GCN-NEXT: s_cselect_b32 s0, 0x40a00000, s0 66; GCN-NEXT: s_cmp_eq_u32 s2, 5 67; GCN-NEXT: s_cselect_b32 s0, 0x40c00000, s0 68; GCN-NEXT: s_cmp_eq_u32 s2, 6 69; GCN-NEXT: s_cselect_b32 s0, 0x40e00000, s0 70; GCN-NEXT: s_cmp_eq_u32 s2, 7 71; GCN-NEXT: s_cselect_b32 s0, 0x41000000, s0 72; GCN-NEXT: v_mov_b32_e32 v0, s0 73; GCN-NEXT: ; return to shader part epilog 74; 75; GFX10-LABEL: dyn_extract_v8f32_const_s_s: 76; GFX10: ; %bb.0: ; %entry 77; GFX10-NEXT: s_cmp_eq_u32 s2, 1 78; GFX10-NEXT: s_cselect_b32 s0, 2.0, 1.0 79; GFX10-NEXT: s_cmp_eq_u32 s2, 2 80; GFX10-NEXT: s_cselect_b32 s0, 0x40400000, s0 81; GFX10-NEXT: s_cmp_eq_u32 s2, 3 82; GFX10-NEXT: s_cselect_b32 s0, 4.0, s0 83; GFX10-NEXT: s_cmp_eq_u32 s2, 4 84; GFX10-NEXT: s_cselect_b32 s0, 0x40a00000, s0 85; GFX10-NEXT: s_cmp_eq_u32 s2, 5 86; GFX10-NEXT: s_cselect_b32 s0, 0x40c00000, s0 87; GFX10-NEXT: s_cmp_eq_u32 s2, 6 88; GFX10-NEXT: s_cselect_b32 s0, 0x40e00000, s0 89; GFX10-NEXT: s_cmp_eq_u32 s2, 7 90; GFX10-NEXT: s_cselect_b32 s0, 0x41000000, s0 91; GFX10-NEXT: v_mov_b32_e32 v0, s0 92; GFX10-NEXT: ; return to shader part epilog 93entry: 94 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel 95 ret float %ext 96} 97 98define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) { 99; GCN-LABEL: dyn_extract_v8f32_s_v: 100; GCN: ; %bb.0: ; %entry 101; GCN-NEXT: s_mov_b32 s0, s2 102; GCN-NEXT: s_mov_b32 s1, s3 103; GCN-NEXT: s_mov_b32 s2, s4 104; GCN-NEXT: v_mov_b32_e32 v1, s0 105; GCN-NEXT: v_mov_b32_e32 v2, s1 106; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 107; GCN-NEXT: s_mov_b32 s3, s5 108; GCN-NEXT: v_mov_b32_e32 v3, s2 109; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 110; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 111; GCN-NEXT: v_mov_b32_e32 v4, s3 112; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 113; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 114; GCN-NEXT: v_mov_b32_e32 v5, s6 115; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 116; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 117; GCN-NEXT: v_mov_b32_e32 v6, s7 118; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 119; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 120; GCN-NEXT: v_mov_b32_e32 v7, s8 121; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 122; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 123; GCN-NEXT: v_mov_b32_e32 v8, s9 124; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 125; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 126; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v8, vcc 127; GCN-NEXT: ; return to shader part epilog 128; 129; GFX10-LABEL: dyn_extract_v8f32_s_v: 130; GFX10: ; %bb.0: ; %entry 131; GFX10-NEXT: s_mov_b32 s1, s3 132; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 133; GFX10-NEXT: v_mov_b32_e32 v1, s1 134; GFX10-NEXT: s_mov_b32 s0, s2 135; GFX10-NEXT: s_mov_b32 s2, s4 136; GFX10-NEXT: s_mov_b32 s3, s5 137; GFX10-NEXT: s_mov_b32 s4, s6 138; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 139; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 140; GFX10-NEXT: s_mov_b32 s5, s7 141; GFX10-NEXT: s_mov_b32 s6, s8 142; GFX10-NEXT: s_mov_b32 s7, s9 143; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 144; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 145; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 146; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 147; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 148; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 149; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo 150; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 151; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 152; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 153; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s7, vcc_lo 154; GFX10-NEXT: ; return to shader part epilog 155entry: 156 %ext = extractelement <8 x float> %vec, i32 %sel 157 ret float %ext 158} 159 160define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) { 161; GCN-LABEL: dyn_extract_v8f32_v_v: 162; GCN: ; %bb.0: ; %entry 163; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 165; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 166; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 167; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 168; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 169; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 170; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 171; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 172; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 173; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 174; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 175; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 176; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 177; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 178; GCN-NEXT: s_setpc_b64 s[30:31] 179; 180; GFX10-LABEL: dyn_extract_v8f32_v_v: 181; GFX10: ; %bb.0: ; %entry 182; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 184; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 185; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 186; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 187; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 188; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 189; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 190; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 191; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 192; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 193; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 194; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 195; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 196; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 197; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 198; GFX10-NEXT: s_setpc_b64 s[30:31] 199entry: 200 %ext = extractelement <8 x float> %vec, i32 %sel 201 ret float %ext 202} 203 204define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) { 205; GCN-LABEL: dyn_extract_v8f32_v_s: 206; GCN: ; %bb.0: ; %entry 207; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 208; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 209; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 210; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 211; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 212; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 213; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 214; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 215; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 216; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 217; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 218; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 219; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7 220; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 221; GCN-NEXT: ; return to shader part epilog 222; 223; GFX10-LABEL: dyn_extract_v8f32_v_s: 224; GFX10: ; %bb.0: ; %entry 225; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 226; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 227; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 228; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 229; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 230; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 231; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 232; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 233; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 234; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 235; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6 236; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 237; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 7 238; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 239; GFX10-NEXT: ; return to shader part epilog 240entry: 241 %ext = extractelement <8 x float> %vec, i32 %sel 242 ret float %ext 243} 244 245define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) { 246; GCN-LABEL: dyn_extract_v8f32_s_s: 247; GCN: ; %bb.0: ; %entry 248; GCN-NEXT: s_cmp_eq_u32 s10, 1 249; GCN-NEXT: s_cselect_b32 s0, s3, s2 250; GCN-NEXT: s_cmp_eq_u32 s10, 2 251; GCN-NEXT: s_cselect_b32 s0, s4, s0 252; GCN-NEXT: s_cmp_eq_u32 s10, 3 253; GCN-NEXT: s_cselect_b32 s0, s5, s0 254; GCN-NEXT: s_cmp_eq_u32 s10, 4 255; GCN-NEXT: s_cselect_b32 s0, s6, s0 256; GCN-NEXT: s_cmp_eq_u32 s10, 5 257; GCN-NEXT: s_cselect_b32 s0, s7, s0 258; GCN-NEXT: s_cmp_eq_u32 s10, 6 259; GCN-NEXT: s_cselect_b32 s0, s8, s0 260; GCN-NEXT: s_cmp_eq_u32 s10, 7 261; GCN-NEXT: s_cselect_b32 s0, s9, s0 262; GCN-NEXT: v_mov_b32_e32 v0, s0 263; GCN-NEXT: ; return to shader part epilog 264; 265; GFX10-LABEL: dyn_extract_v8f32_s_s: 266; GFX10: ; %bb.0: ; %entry 267; GFX10-NEXT: s_cmp_eq_u32 s10, 1 268; GFX10-NEXT: s_cselect_b32 s0, s3, s2 269; GFX10-NEXT: s_cmp_eq_u32 s10, 2 270; GFX10-NEXT: s_cselect_b32 s0, s4, s0 271; GFX10-NEXT: s_cmp_eq_u32 s10, 3 272; GFX10-NEXT: s_cselect_b32 s0, s5, s0 273; GFX10-NEXT: s_cmp_eq_u32 s10, 4 274; GFX10-NEXT: s_cselect_b32 s0, s6, s0 275; GFX10-NEXT: s_cmp_eq_u32 s10, 5 276; GFX10-NEXT: s_cselect_b32 s0, s7, s0 277; GFX10-NEXT: s_cmp_eq_u32 s10, 6 278; GFX10-NEXT: s_cselect_b32 s0, s8, s0 279; GFX10-NEXT: s_cmp_eq_u32 s10, 7 280; GFX10-NEXT: s_cselect_b32 s0, s9, s0 281; GFX10-NEXT: v_mov_b32_e32 v0, s0 282; GFX10-NEXT: ; return to shader part epilog 283entry: 284 %ext = extractelement <8 x float> %vec, i32 %sel 285 ret float %ext 286} 287 288define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { 289; GCN-LABEL: dyn_extract_v8i64_const_s_v: 290; GCN: ; %bb.0: ; %entry 291; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 292; GCN-NEXT: s_mov_b64 s[4:5], 1 293; GCN-NEXT: s_mov_b64 s[6:7], 2 294; GCN-NEXT: v_mov_b32_e32 v1, s4 295; GCN-NEXT: v_mov_b32_e32 v2, s5 296; GCN-NEXT: v_mov_b32_e32 v3, s6 297; GCN-NEXT: v_mov_b32_e32 v4, s7 298; GCN-NEXT: s_mov_b64 s[8:9], 3 299; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 300; GCN-NEXT: v_mov_b32_e32 v5, s8 301; GCN-NEXT: v_mov_b32_e32 v6, s9 302; GCN-NEXT: s_mov_b64 s[10:11], 4 303; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 304; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 305; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 306; GCN-NEXT: v_mov_b32_e32 v7, s10 307; GCN-NEXT: v_mov_b32_e32 v8, s11 308; GCN-NEXT: s_mov_b64 s[12:13], 5 309; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 310; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 311; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 312; GCN-NEXT: s_mov_b64 s[14:15], 6 313; GCN-NEXT: v_mov_b32_e32 v9, s12 314; GCN-NEXT: v_mov_b32_e32 v10, s13 315; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 316; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 317; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 318; GCN-NEXT: s_mov_b64 s[16:17], 7 319; GCN-NEXT: v_mov_b32_e32 v11, s14 320; GCN-NEXT: v_mov_b32_e32 v12, s15 321; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 322; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 323; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 324; GCN-NEXT: s_mov_b64 s[18:19], 8 325; GCN-NEXT: v_mov_b32_e32 v13, s16 326; GCN-NEXT: v_mov_b32_e32 v14, s17 327; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 328; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 329; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 330; GCN-NEXT: v_mov_b32_e32 v15, s18 331; GCN-NEXT: v_mov_b32_e32 v16, s19 332; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 333; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 334; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 335; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 336; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 337; GCN-NEXT: s_setpc_b64 s[30:31] 338; 339; GFX10-LABEL: dyn_extract_v8i64_const_s_v: 340; GFX10: ; %bb.0: ; %entry 341; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 342; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 343; GFX10-NEXT: s_mov_b64 s[6:7], 2 344; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 345; GFX10-NEXT: v_mov_b32_e32 v1, s6 346; GFX10-NEXT: v_mov_b32_e32 v2, s7 347; GFX10-NEXT: s_mov_b64 s[4:5], 1 348; GFX10-NEXT: s_mov_b64 s[8:9], 3 349; GFX10-NEXT: s_mov_b64 s[10:11], 4 350; GFX10-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo 351; GFX10-NEXT: v_cndmask_b32_e32 v2, s5, v2, vcc_lo 352; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 353; GFX10-NEXT: s_mov_b64 s[12:13], 5 354; GFX10-NEXT: s_mov_b64 s[14:15], 6 355; GFX10-NEXT: s_mov_b64 s[16:17], 7 356; GFX10-NEXT: s_mov_b64 s[18:19], 8 357; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 358; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 359; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 360; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 361; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 362; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 363; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 364; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 365; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 366; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo 367; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo 368; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 369; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s16, vcc_lo 370; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s17, vcc_lo 371; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 372; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s18, vcc_lo 373; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s19, vcc_lo 374; GFX10-NEXT: s_setpc_b64 s[30:31] 375entry: 376 %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel 377 ret i64 %ext 378} 379 380define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) { 381; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s: 382; GPRIDX: ; %bb.0: ; %entry 383; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 384; GPRIDX-NEXT: s_mov_b32 m0, s2 385; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 386; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 387; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 388; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 389; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 390; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 391; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 392; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] 393; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 394; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 395; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 396; GPRIDX-NEXT: s_endpgm 397; 398; MOVREL-LABEL: dyn_extract_v8i64_const_s_s: 399; MOVREL: ; %bb.0: ; %entry 400; MOVREL-NEXT: s_mov_b64 s[4:5], 1 401; MOVREL-NEXT: s_mov_b32 m0, s2 402; MOVREL-NEXT: s_mov_b64 s[18:19], 8 403; MOVREL-NEXT: s_mov_b64 s[16:17], 7 404; MOVREL-NEXT: s_mov_b64 s[14:15], 6 405; MOVREL-NEXT: s_mov_b64 s[12:13], 5 406; MOVREL-NEXT: s_mov_b64 s[10:11], 4 407; MOVREL-NEXT: s_mov_b64 s[8:9], 3 408; MOVREL-NEXT: s_mov_b64 s[6:7], 2 409; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] 410; MOVREL-NEXT: v_mov_b32_e32 v0, s0 411; MOVREL-NEXT: v_mov_b32_e32 v1, s1 412; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 413; MOVREL-NEXT: s_endpgm 414; 415; GFX10-LABEL: dyn_extract_v8i64_const_s_s: 416; GFX10: ; %bb.0: ; %entry 417; GFX10-NEXT: s_mov_b64 s[4:5], 1 418; GFX10-NEXT: s_mov_b32 m0, s2 419; GFX10-NEXT: s_mov_b64 s[18:19], 8 420; GFX10-NEXT: s_mov_b64 s[16:17], 7 421; GFX10-NEXT: s_mov_b64 s[14:15], 6 422; GFX10-NEXT: s_mov_b64 s[12:13], 5 423; GFX10-NEXT: s_mov_b64 s[10:11], 4 424; GFX10-NEXT: s_mov_b64 s[8:9], 3 425; GFX10-NEXT: s_mov_b64 s[6:7], 2 426; GFX10-NEXT: s_movrels_b64 s[0:1], s[4:5] 427; GFX10-NEXT: v_mov_b32_e32 v0, s0 428; GFX10-NEXT: v_mov_b32_e32 v1, s1 429; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 430; GFX10-NEXT: s_endpgm 431entry: 432 %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel 433 store i64 %ext, i64 addrspace(1)* undef 434 ret void 435} 436 437define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { 438; GPRIDX-LABEL: dyn_extract_v8i64_s_v: 439; GPRIDX: ; %bb.0: ; %entry 440; GPRIDX-NEXT: s_mov_b32 s0, s2 441; GPRIDX-NEXT: s_mov_b32 s1, s3 442; GPRIDX-NEXT: s_mov_b32 s2, s4 443; GPRIDX-NEXT: s_mov_b32 s3, s5 444; GPRIDX-NEXT: s_mov_b32 s4, s6 445; GPRIDX-NEXT: s_mov_b32 s5, s7 446; GPRIDX-NEXT: v_mov_b32_e32 v1, s0 447; GPRIDX-NEXT: v_mov_b32_e32 v2, s1 448; GPRIDX-NEXT: v_mov_b32_e32 v3, s2 449; GPRIDX-NEXT: v_mov_b32_e32 v4, s3 450; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 451; GPRIDX-NEXT: s_mov_b32 s6, s8 452; GPRIDX-NEXT: s_mov_b32 s7, s9 453; GPRIDX-NEXT: v_mov_b32_e32 v5, s4 454; GPRIDX-NEXT: v_mov_b32_e32 v6, s5 455; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 456; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 457; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 458; GPRIDX-NEXT: s_mov_b32 s8, s10 459; GPRIDX-NEXT: s_mov_b32 s9, s11 460; GPRIDX-NEXT: v_mov_b32_e32 v7, s6 461; GPRIDX-NEXT: v_mov_b32_e32 v8, s7 462; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 463; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 464; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 465; GPRIDX-NEXT: s_mov_b32 s10, s12 466; GPRIDX-NEXT: s_mov_b32 s11, s13 467; GPRIDX-NEXT: v_mov_b32_e32 v9, s8 468; GPRIDX-NEXT: v_mov_b32_e32 v10, s9 469; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 470; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 471; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 472; GPRIDX-NEXT: v_mov_b32_e32 v11, s10 473; GPRIDX-NEXT: v_mov_b32_e32 v12, s11 474; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 475; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 476; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 477; GPRIDX-NEXT: v_mov_b32_e32 v13, s14 478; GPRIDX-NEXT: v_mov_b32_e32 v14, s15 479; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 480; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 481; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 482; GPRIDX-NEXT: v_mov_b32_e32 v15, s16 483; GPRIDX-NEXT: v_mov_b32_e32 v16, s17 484; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 485; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 486; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 487; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 488; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 489; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 490; GPRIDX-NEXT: s_endpgm 491; 492; MOVREL-LABEL: dyn_extract_v8i64_s_v: 493; MOVREL: ; %bb.0: ; %entry 494; MOVREL-NEXT: s_mov_b32 s0, s2 495; MOVREL-NEXT: s_mov_b32 s1, s3 496; MOVREL-NEXT: s_mov_b32 s2, s4 497; MOVREL-NEXT: s_mov_b32 s3, s5 498; MOVREL-NEXT: s_mov_b32 s4, s6 499; MOVREL-NEXT: s_mov_b32 s5, s7 500; MOVREL-NEXT: v_mov_b32_e32 v1, s0 501; MOVREL-NEXT: v_mov_b32_e32 v2, s1 502; MOVREL-NEXT: v_mov_b32_e32 v3, s2 503; MOVREL-NEXT: v_mov_b32_e32 v4, s3 504; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 505; MOVREL-NEXT: s_mov_b32 s6, s8 506; MOVREL-NEXT: s_mov_b32 s7, s9 507; MOVREL-NEXT: v_mov_b32_e32 v5, s4 508; MOVREL-NEXT: v_mov_b32_e32 v6, s5 509; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 510; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 511; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 512; MOVREL-NEXT: s_mov_b32 s8, s10 513; MOVREL-NEXT: s_mov_b32 s9, s11 514; MOVREL-NEXT: v_mov_b32_e32 v7, s6 515; MOVREL-NEXT: v_mov_b32_e32 v8, s7 516; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 517; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 518; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 519; MOVREL-NEXT: s_mov_b32 s10, s12 520; MOVREL-NEXT: s_mov_b32 s11, s13 521; MOVREL-NEXT: v_mov_b32_e32 v9, s8 522; MOVREL-NEXT: v_mov_b32_e32 v10, s9 523; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 524; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 525; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 526; MOVREL-NEXT: v_mov_b32_e32 v11, s10 527; MOVREL-NEXT: v_mov_b32_e32 v12, s11 528; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 529; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 530; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 531; MOVREL-NEXT: v_mov_b32_e32 v13, s14 532; MOVREL-NEXT: v_mov_b32_e32 v14, s15 533; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 534; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 535; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 536; MOVREL-NEXT: v_mov_b32_e32 v15, s16 537; MOVREL-NEXT: v_mov_b32_e32 v16, s17 538; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 539; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 540; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 541; MOVREL-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 542; MOVREL-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 543; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 544; MOVREL-NEXT: s_endpgm 545; 546; GFX10-LABEL: dyn_extract_v8i64_s_v: 547; GFX10: ; %bb.0: ; %entry 548; GFX10-NEXT: s_mov_b32 s0, s2 549; GFX10-NEXT: s_mov_b32 s2, s4 550; GFX10-NEXT: s_mov_b32 s19, s5 551; GFX10-NEXT: v_mov_b32_e32 v1, s2 552; GFX10-NEXT: v_mov_b32_e32 v2, s19 553; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 554; GFX10-NEXT: s_mov_b32 s1, s3 555; GFX10-NEXT: s_mov_b32 s4, s6 556; GFX10-NEXT: s_mov_b32 s5, s7 557; GFX10-NEXT: s_mov_b32 s6, s8 558; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 559; GFX10-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo 560; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 561; GFX10-NEXT: s_mov_b32 s7, s9 562; GFX10-NEXT: s_mov_b32 s8, s10 563; GFX10-NEXT: s_mov_b32 s9, s11 564; GFX10-NEXT: s_mov_b32 s10, s12 565; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 566; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 567; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 568; GFX10-NEXT: s_mov_b32 s11, s13 569; GFX10-NEXT: s_mov_b32 s12, s14 570; GFX10-NEXT: s_mov_b32 s13, s15 571; GFX10-NEXT: s_mov_b32 s14, s16 572; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 573; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 574; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 575; GFX10-NEXT: s_mov_b32 s15, s17 576; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 577; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 578; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 579; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 580; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 581; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 582; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 583; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 584; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 585; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo 586; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo 587; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 588; GFX10-NEXT: s_endpgm 589entry: 590 %ext = extractelement <8 x i64> %vec, i32 %sel 591 store i64 %ext, i64 addrspace(1)* undef 592 ret void 593} 594 595define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) { 596; GCN-LABEL: dyn_extract_v8i64_v_v: 597; GCN: ; %bb.0: ; %entry 598; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 599; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 600; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 601; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 602; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 603; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 604; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 605; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 606; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 607; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 608; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 609; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 610; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 611; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 612; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 613; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 614; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 615; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 616; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 617; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 618; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 619; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 620; GCN-NEXT: s_setpc_b64 s[30:31] 621; 622; GFX10-LABEL: dyn_extract_v8i64_v_v: 623; GFX10: ; %bb.0: ; %entry 624; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 625; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 626; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 627; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 628; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 629; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 630; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 631; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 632; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 633; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 634; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 635; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 636; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 637; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 638; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 639; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 640; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 641; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 642; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 643; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 644; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 645; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 646; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 647; GFX10-NEXT: s_setpc_b64 s[30:31] 648entry: 649 %ext = extractelement <8 x i64> %vec, i32 %sel 650 ret i64 %ext 651} 652 653define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) { 654; GPRIDX-LABEL: dyn_extract_v8i64_v_s: 655; GPRIDX: ; %bb.0: ; %entry 656; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 657; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 658; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 659; GPRIDX-NEXT: v_mov_b32_e32 v17, v1 660; GPRIDX-NEXT: s_set_gpr_idx_off 661; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off 662; GPRIDX-NEXT: s_endpgm 663; 664; MOVREL-LABEL: dyn_extract_v8i64_v_s: 665; MOVREL: ; %bb.0: ; %entry 666; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 667; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 668; MOVREL-NEXT: v_movrels_b32_e32 v17, v1 669; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17] 670; MOVREL-NEXT: s_endpgm 671; 672; GFX10-LABEL: dyn_extract_v8i64_v_s: 673; GFX10: ; %bb.0: ; %entry 674; GFX10-NEXT: s_lshl_b32 m0, s2, 1 675; GFX10-NEXT: v_movrels_b32_e32 v16, v0 676; GFX10-NEXT: v_movrels_b32_e32 v17, v1 677; GFX10-NEXT: global_store_dwordx2 v[0:1], v[16:17], off 678; GFX10-NEXT: s_endpgm 679entry: 680 %ext = extractelement <8 x i64> %vec, i32 %sel 681 store i64 %ext, i64 addrspace(1)* undef 682 ret void 683} 684 685define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) { 686; GPRIDX-LABEL: dyn_extract_v8i64_s_s: 687; GPRIDX: ; %bb.0: ; %entry 688; GPRIDX-NEXT: s_mov_b32 s0, s2 689; GPRIDX-NEXT: s_mov_b32 s1, s3 690; GPRIDX-NEXT: s_mov_b32 m0, s18 691; GPRIDX-NEXT: s_mov_b32 s2, s4 692; GPRIDX-NEXT: s_mov_b32 s3, s5 693; GPRIDX-NEXT: s_mov_b32 s4, s6 694; GPRIDX-NEXT: s_mov_b32 s5, s7 695; GPRIDX-NEXT: s_mov_b32 s6, s8 696; GPRIDX-NEXT: s_mov_b32 s7, s9 697; GPRIDX-NEXT: s_mov_b32 s8, s10 698; GPRIDX-NEXT: s_mov_b32 s9, s11 699; GPRIDX-NEXT: s_mov_b32 s10, s12 700; GPRIDX-NEXT: s_mov_b32 s11, s13 701; GPRIDX-NEXT: s_mov_b32 s12, s14 702; GPRIDX-NEXT: s_mov_b32 s13, s15 703; GPRIDX-NEXT: s_mov_b32 s14, s16 704; GPRIDX-NEXT: s_mov_b32 s15, s17 705; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] 706; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 707; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 708; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 709; GPRIDX-NEXT: s_endpgm 710; 711; MOVREL-LABEL: dyn_extract_v8i64_s_s: 712; MOVREL: ; %bb.0: ; %entry 713; MOVREL-NEXT: s_mov_b32 s0, s2 714; MOVREL-NEXT: s_mov_b32 s1, s3 715; MOVREL-NEXT: s_mov_b32 m0, s18 716; MOVREL-NEXT: s_mov_b32 s2, s4 717; MOVREL-NEXT: s_mov_b32 s3, s5 718; MOVREL-NEXT: s_mov_b32 s4, s6 719; MOVREL-NEXT: s_mov_b32 s5, s7 720; MOVREL-NEXT: s_mov_b32 s6, s8 721; MOVREL-NEXT: s_mov_b32 s7, s9 722; MOVREL-NEXT: s_mov_b32 s8, s10 723; MOVREL-NEXT: s_mov_b32 s9, s11 724; MOVREL-NEXT: s_mov_b32 s10, s12 725; MOVREL-NEXT: s_mov_b32 s11, s13 726; MOVREL-NEXT: s_mov_b32 s12, s14 727; MOVREL-NEXT: s_mov_b32 s13, s15 728; MOVREL-NEXT: s_mov_b32 s14, s16 729; MOVREL-NEXT: s_mov_b32 s15, s17 730; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] 731; MOVREL-NEXT: v_mov_b32_e32 v0, s0 732; MOVREL-NEXT: v_mov_b32_e32 v1, s1 733; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 734; MOVREL-NEXT: s_endpgm 735; 736; GFX10-LABEL: dyn_extract_v8i64_s_s: 737; GFX10: ; %bb.0: ; %entry 738; GFX10-NEXT: s_mov_b32 s0, s2 739; GFX10-NEXT: s_mov_b32 s1, s3 740; GFX10-NEXT: s_mov_b32 m0, s18 741; GFX10-NEXT: s_mov_b32 s2, s4 742; GFX10-NEXT: s_mov_b32 s3, s5 743; GFX10-NEXT: s_mov_b32 s4, s6 744; GFX10-NEXT: s_mov_b32 s5, s7 745; GFX10-NEXT: s_mov_b32 s6, s8 746; GFX10-NEXT: s_mov_b32 s7, s9 747; GFX10-NEXT: s_mov_b32 s8, s10 748; GFX10-NEXT: s_mov_b32 s9, s11 749; GFX10-NEXT: s_mov_b32 s10, s12 750; GFX10-NEXT: s_mov_b32 s11, s13 751; GFX10-NEXT: s_mov_b32 s12, s14 752; GFX10-NEXT: s_mov_b32 s13, s15 753; GFX10-NEXT: s_mov_b32 s14, s16 754; GFX10-NEXT: s_mov_b32 s15, s17 755; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 756; GFX10-NEXT: v_mov_b32_e32 v0, s0 757; GFX10-NEXT: v_mov_b32_e32 v1, s1 758; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 759; GFX10-NEXT: s_endpgm 760entry: 761 %ext = extractelement <8 x i64> %vec, i32 %sel 762 store i64 %ext, i64 addrspace(1)* undef 763 ret void 764} 765 766define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) { 767; GCN-LABEL: dyn_extract_v8f32_s_s_offset3: 768; GCN: ; %bb.0: ; %entry 769; GCN-NEXT: s_add_i32 s10, s10, 3 770; GCN-NEXT: s_cmp_eq_u32 s10, 1 771; GCN-NEXT: s_cselect_b32 s0, s3, s2 772; GCN-NEXT: s_cmp_eq_u32 s10, 2 773; GCN-NEXT: s_cselect_b32 s0, s4, s0 774; GCN-NEXT: s_cmp_eq_u32 s10, 3 775; GCN-NEXT: s_cselect_b32 s0, s5, s0 776; GCN-NEXT: s_cmp_eq_u32 s10, 4 777; GCN-NEXT: s_cselect_b32 s0, s6, s0 778; GCN-NEXT: s_cmp_eq_u32 s10, 5 779; GCN-NEXT: s_cselect_b32 s0, s7, s0 780; GCN-NEXT: s_cmp_eq_u32 s10, 6 781; GCN-NEXT: s_cselect_b32 s0, s8, s0 782; GCN-NEXT: s_cmp_eq_u32 s10, 7 783; GCN-NEXT: s_cselect_b32 s0, s9, s0 784; GCN-NEXT: v_mov_b32_e32 v0, s0 785; GCN-NEXT: ; return to shader part epilog 786; 787; GFX10-LABEL: dyn_extract_v8f32_s_s_offset3: 788; GFX10: ; %bb.0: ; %entry 789; GFX10-NEXT: s_add_i32 s10, s10, 3 790; GFX10-NEXT: s_cmp_eq_u32 s10, 1 791; GFX10-NEXT: s_cselect_b32 s0, s3, s2 792; GFX10-NEXT: s_cmp_eq_u32 s10, 2 793; GFX10-NEXT: s_cselect_b32 s0, s4, s0 794; GFX10-NEXT: s_cmp_eq_u32 s10, 3 795; GFX10-NEXT: s_cselect_b32 s0, s5, s0 796; GFX10-NEXT: s_cmp_eq_u32 s10, 4 797; GFX10-NEXT: s_cselect_b32 s0, s6, s0 798; GFX10-NEXT: s_cmp_eq_u32 s10, 5 799; GFX10-NEXT: s_cselect_b32 s0, s7, s0 800; GFX10-NEXT: s_cmp_eq_u32 s10, 6 801; GFX10-NEXT: s_cselect_b32 s0, s8, s0 802; GFX10-NEXT: s_cmp_eq_u32 s10, 7 803; GFX10-NEXT: s_cselect_b32 s0, s9, s0 804; GFX10-NEXT: v_mov_b32_e32 v0, s0 805; GFX10-NEXT: ; return to shader part epilog 806entry: 807 %add = add i32 %sel, 3 808 %ext = extractelement <8 x float> %vec, i32 %add 809 ret float %ext 810} 811 812define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) { 813; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3: 814; GPRIDX: ; %bb.0: ; %entry 815; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 816; GPRIDX-NEXT: v_add_u32_e32 v8, 3, v8 817; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 818; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 819; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 820; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 821; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 822; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 823; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 824; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 825; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 826; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 827; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 828; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 829; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 830; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 831; GPRIDX-NEXT: s_setpc_b64 s[30:31] 832; 833; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3: 834; MOVREL: ; %bb.0: ; %entry 835; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 836; MOVREL-NEXT: v_add_u32_e32 v8, vcc, 3, v8 837; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 838; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 839; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 840; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 841; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 842; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 843; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 844; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 845; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 846; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 847; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 848; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 849; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 850; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 851; MOVREL-NEXT: s_setpc_b64 s[30:31] 852; 853; GFX10-LABEL: dyn_extract_v8f32_v_v_offset3: 854; GFX10: ; %bb.0: ; %entry 855; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 856; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 857; GFX10-NEXT: v_add_nc_u32_e32 v8, 3, v8 858; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 859; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 860; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 861; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 862; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 863; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 864; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 865; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 866; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 867; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 868; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 869; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 870; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 871; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 872; GFX10-NEXT: s_setpc_b64 s[30:31] 873entry: 874 %add = add i32 %sel, 3 875 %ext = extractelement <8 x float> %vec, i32 %add 876 ret float %ext 877} 878 879define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) { 880; GCN-LABEL: dyn_extract_v8f64_s_s_offset1: 881; GCN: ; %bb.0: ; %entry 882; GCN-NEXT: s_mov_b32 s0, s2 883; GCN-NEXT: s_mov_b32 s1, s3 884; GCN-NEXT: s_mov_b32 s2, s4 885; GCN-NEXT: s_mov_b32 s3, s5 886; GCN-NEXT: s_mov_b32 m0, s18 887; GCN-NEXT: s_mov_b32 s4, s6 888; GCN-NEXT: s_mov_b32 s5, s7 889; GCN-NEXT: s_mov_b32 s6, s8 890; GCN-NEXT: s_mov_b32 s7, s9 891; GCN-NEXT: s_mov_b32 s8, s10 892; GCN-NEXT: s_mov_b32 s9, s11 893; GCN-NEXT: s_mov_b32 s10, s12 894; GCN-NEXT: s_mov_b32 s11, s13 895; GCN-NEXT: s_mov_b32 s12, s14 896; GCN-NEXT: s_mov_b32 s13, s15 897; GCN-NEXT: s_mov_b32 s14, s16 898; GCN-NEXT: s_mov_b32 s15, s17 899; GCN-NEXT: s_movrels_b64 s[0:1], s[2:3] 900; GCN-NEXT: ; return to shader part epilog 901; 902; GFX10-LABEL: dyn_extract_v8f64_s_s_offset1: 903; GFX10: ; %bb.0: ; %entry 904; GFX10-NEXT: s_mov_b32 s0, s2 905; GFX10-NEXT: s_mov_b32 s1, s3 906; GFX10-NEXT: s_mov_b32 s2, s4 907; GFX10-NEXT: s_mov_b32 s3, s5 908; GFX10-NEXT: s_mov_b32 m0, s18 909; GFX10-NEXT: s_mov_b32 s4, s6 910; GFX10-NEXT: s_mov_b32 s5, s7 911; GFX10-NEXT: s_mov_b32 s6, s8 912; GFX10-NEXT: s_mov_b32 s7, s9 913; GFX10-NEXT: s_mov_b32 s8, s10 914; GFX10-NEXT: s_mov_b32 s9, s11 915; GFX10-NEXT: s_mov_b32 s10, s12 916; GFX10-NEXT: s_mov_b32 s11, s13 917; GFX10-NEXT: s_mov_b32 s12, s14 918; GFX10-NEXT: s_mov_b32 s13, s15 919; GFX10-NEXT: s_mov_b32 s14, s16 920; GFX10-NEXT: s_mov_b32 s15, s17 921; GFX10-NEXT: s_movrels_b64 s[0:1], s[2:3] 922; GFX10-NEXT: ; return to shader part epilog 923entry: 924 %add = add i32 %sel, 1 925 %ext = extractelement <8 x double> %vec, i32 %add 926 ret double %ext 927} 928 929define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) { 930; GCN-LABEL: dyn_extract_v8f64_s_s_offset2: 931; GCN: ; %bb.0: ; %entry 932; GCN-NEXT: s_mov_b32 s0, s2 933; GCN-NEXT: s_mov_b32 s1, s3 934; GCN-NEXT: s_mov_b32 s2, s4 935; GCN-NEXT: s_mov_b32 s3, s5 936; GCN-NEXT: s_mov_b32 s4, s6 937; GCN-NEXT: s_mov_b32 s5, s7 938; GCN-NEXT: s_mov_b32 m0, s18 939; GCN-NEXT: s_mov_b32 s6, s8 940; GCN-NEXT: s_mov_b32 s7, s9 941; GCN-NEXT: s_mov_b32 s8, s10 942; GCN-NEXT: s_mov_b32 s9, s11 943; GCN-NEXT: s_mov_b32 s10, s12 944; GCN-NEXT: s_mov_b32 s11, s13 945; GCN-NEXT: s_mov_b32 s12, s14 946; GCN-NEXT: s_mov_b32 s13, s15 947; GCN-NEXT: s_mov_b32 s14, s16 948; GCN-NEXT: s_mov_b32 s15, s17 949; GCN-NEXT: s_movrels_b64 s[0:1], s[4:5] 950; GCN-NEXT: ; return to shader part epilog 951; 952; GFX10-LABEL: dyn_extract_v8f64_s_s_offset2: 953; GFX10: ; %bb.0: ; %entry 954; GFX10-NEXT: s_mov_b32 s0, s2 955; GFX10-NEXT: s_mov_b32 s1, s3 956; GFX10-NEXT: s_mov_b32 s2, s4 957; GFX10-NEXT: s_mov_b32 s3, s5 958; GFX10-NEXT: s_mov_b32 s4, s6 959; GFX10-NEXT: s_mov_b32 s5, s7 960; GFX10-NEXT: s_mov_b32 m0, s18 961; GFX10-NEXT: s_mov_b32 s6, s8 962; GFX10-NEXT: s_mov_b32 s7, s9 963; GFX10-NEXT: s_mov_b32 s8, s10 964; GFX10-NEXT: s_mov_b32 s9, s11 965; GFX10-NEXT: s_mov_b32 s10, s12 966; GFX10-NEXT: s_mov_b32 s11, s13 967; GFX10-NEXT: s_mov_b32 s12, s14 968; GFX10-NEXT: s_mov_b32 s13, s15 969; GFX10-NEXT: s_mov_b32 s14, s16 970; GFX10-NEXT: s_mov_b32 s15, s17 971; GFX10-NEXT: s_movrels_b64 s[0:1], s[4:5] 972; GFX10-NEXT: ; return to shader part epilog 973entry: 974 %add = add i32 %sel, 2 975 %ext = extractelement <8 x double> %vec, i32 %add 976 ret double %ext 977} 978 979define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) { 980; GCN-LABEL: dyn_extract_v8f64_s_s_offset3: 981; GCN: ; %bb.0: ; %entry 982; GCN-NEXT: s_mov_b32 s0, s2 983; GCN-NEXT: s_mov_b32 s1, s3 984; GCN-NEXT: s_mov_b32 s2, s4 985; GCN-NEXT: s_mov_b32 s3, s5 986; GCN-NEXT: s_mov_b32 s4, s6 987; GCN-NEXT: s_mov_b32 s5, s7 988; GCN-NEXT: s_mov_b32 s6, s8 989; GCN-NEXT: s_mov_b32 s7, s9 990; GCN-NEXT: s_mov_b32 m0, s18 991; GCN-NEXT: s_mov_b32 s8, s10 992; GCN-NEXT: s_mov_b32 s9, s11 993; GCN-NEXT: s_mov_b32 s10, s12 994; GCN-NEXT: s_mov_b32 s11, s13 995; GCN-NEXT: s_mov_b32 s12, s14 996; GCN-NEXT: s_mov_b32 s13, s15 997; GCN-NEXT: s_mov_b32 s14, s16 998; GCN-NEXT: s_mov_b32 s15, s17 999; GCN-NEXT: s_movrels_b64 s[0:1], s[6:7] 1000; GCN-NEXT: ; return to shader part epilog 1001; 1002; GFX10-LABEL: dyn_extract_v8f64_s_s_offset3: 1003; GFX10: ; %bb.0: ; %entry 1004; GFX10-NEXT: s_mov_b32 s0, s2 1005; GFX10-NEXT: s_mov_b32 s1, s3 1006; GFX10-NEXT: s_mov_b32 s2, s4 1007; GFX10-NEXT: s_mov_b32 s3, s5 1008; GFX10-NEXT: s_mov_b32 s4, s6 1009; GFX10-NEXT: s_mov_b32 s5, s7 1010; GFX10-NEXT: s_mov_b32 s6, s8 1011; GFX10-NEXT: s_mov_b32 s7, s9 1012; GFX10-NEXT: s_mov_b32 m0, s18 1013; GFX10-NEXT: s_mov_b32 s8, s10 1014; GFX10-NEXT: s_mov_b32 s9, s11 1015; GFX10-NEXT: s_mov_b32 s10, s12 1016; GFX10-NEXT: s_mov_b32 s11, s13 1017; GFX10-NEXT: s_mov_b32 s12, s14 1018; GFX10-NEXT: s_mov_b32 s13, s15 1019; GFX10-NEXT: s_mov_b32 s14, s16 1020; GFX10-NEXT: s_mov_b32 s15, s17 1021; GFX10-NEXT: s_movrels_b64 s[0:1], s[6:7] 1022; GFX10-NEXT: ; return to shader part epilog 1023entry: 1024 %add = add i32 %sel, 3 1025 %ext = extractelement <8 x double> %vec, i32 %add 1026 ret double %ext 1027} 1028 1029define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) { 1030; GCN-LABEL: dyn_extract_v8f64_s_s_offset4: 1031; GCN: ; %bb.0: ; %entry 1032; GCN-NEXT: s_mov_b32 s0, s2 1033; GCN-NEXT: s_mov_b32 s1, s3 1034; GCN-NEXT: s_mov_b32 s2, s4 1035; GCN-NEXT: s_mov_b32 s3, s5 1036; GCN-NEXT: s_mov_b32 s4, s6 1037; GCN-NEXT: s_mov_b32 s5, s7 1038; GCN-NEXT: s_mov_b32 s6, s8 1039; GCN-NEXT: s_mov_b32 s7, s9 1040; GCN-NEXT: s_mov_b32 s8, s10 1041; GCN-NEXT: s_mov_b32 s9, s11 1042; GCN-NEXT: s_mov_b32 m0, s18 1043; GCN-NEXT: s_mov_b32 s10, s12 1044; GCN-NEXT: s_mov_b32 s11, s13 1045; GCN-NEXT: s_mov_b32 s12, s14 1046; GCN-NEXT: s_mov_b32 s13, s15 1047; GCN-NEXT: s_mov_b32 s14, s16 1048; GCN-NEXT: s_mov_b32 s15, s17 1049; GCN-NEXT: s_movrels_b64 s[0:1], s[8:9] 1050; GCN-NEXT: ; return to shader part epilog 1051; 1052; GFX10-LABEL: dyn_extract_v8f64_s_s_offset4: 1053; GFX10: ; %bb.0: ; %entry 1054; GFX10-NEXT: s_mov_b32 s0, s2 1055; GFX10-NEXT: s_mov_b32 s1, s3 1056; GFX10-NEXT: s_mov_b32 s2, s4 1057; GFX10-NEXT: s_mov_b32 s3, s5 1058; GFX10-NEXT: s_mov_b32 s4, s6 1059; GFX10-NEXT: s_mov_b32 s5, s7 1060; GFX10-NEXT: s_mov_b32 s6, s8 1061; GFX10-NEXT: s_mov_b32 s7, s9 1062; GFX10-NEXT: s_mov_b32 s8, s10 1063; GFX10-NEXT: s_mov_b32 s9, s11 1064; GFX10-NEXT: s_mov_b32 m0, s18 1065; GFX10-NEXT: s_mov_b32 s10, s12 1066; GFX10-NEXT: s_mov_b32 s11, s13 1067; GFX10-NEXT: s_mov_b32 s12, s14 1068; GFX10-NEXT: s_mov_b32 s13, s15 1069; GFX10-NEXT: s_mov_b32 s14, s16 1070; GFX10-NEXT: s_mov_b32 s15, s17 1071; GFX10-NEXT: s_movrels_b64 s[0:1], s[8:9] 1072; GFX10-NEXT: ; return to shader part epilog 1073entry: 1074 %add = add i32 %sel, 4 1075 %ext = extractelement <8 x double> %vec, i32 %add 1076 ret double %ext 1077} 1078 1079define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) { 1080; GCN-LABEL: dyn_extract_v8f64_s_s_offset5: 1081; GCN: ; %bb.0: ; %entry 1082; GCN-NEXT: s_mov_b32 s0, s2 1083; GCN-NEXT: s_mov_b32 s1, s3 1084; GCN-NEXT: s_mov_b32 s2, s4 1085; GCN-NEXT: s_mov_b32 s3, s5 1086; GCN-NEXT: s_mov_b32 s4, s6 1087; GCN-NEXT: s_mov_b32 s5, s7 1088; GCN-NEXT: s_mov_b32 s6, s8 1089; GCN-NEXT: s_mov_b32 s7, s9 1090; GCN-NEXT: s_mov_b32 s8, s10 1091; GCN-NEXT: s_mov_b32 s9, s11 1092; GCN-NEXT: s_mov_b32 s10, s12 1093; GCN-NEXT: s_mov_b32 s11, s13 1094; GCN-NEXT: s_mov_b32 m0, s18 1095; GCN-NEXT: s_mov_b32 s12, s14 1096; GCN-NEXT: s_mov_b32 s13, s15 1097; GCN-NEXT: s_mov_b32 s14, s16 1098; GCN-NEXT: s_mov_b32 s15, s17 1099; GCN-NEXT: s_movrels_b64 s[0:1], s[10:11] 1100; GCN-NEXT: ; return to shader part epilog 1101; 1102; GFX10-LABEL: dyn_extract_v8f64_s_s_offset5: 1103; GFX10: ; %bb.0: ; %entry 1104; GFX10-NEXT: s_mov_b32 s0, s2 1105; GFX10-NEXT: s_mov_b32 s1, s3 1106; GFX10-NEXT: s_mov_b32 s2, s4 1107; GFX10-NEXT: s_mov_b32 s3, s5 1108; GFX10-NEXT: s_mov_b32 s4, s6 1109; GFX10-NEXT: s_mov_b32 s5, s7 1110; GFX10-NEXT: s_mov_b32 s6, s8 1111; GFX10-NEXT: s_mov_b32 s7, s9 1112; GFX10-NEXT: s_mov_b32 s8, s10 1113; GFX10-NEXT: s_mov_b32 s9, s11 1114; GFX10-NEXT: s_mov_b32 s10, s12 1115; GFX10-NEXT: s_mov_b32 s11, s13 1116; GFX10-NEXT: s_mov_b32 m0, s18 1117; GFX10-NEXT: s_mov_b32 s12, s14 1118; GFX10-NEXT: s_mov_b32 s13, s15 1119; GFX10-NEXT: s_mov_b32 s14, s16 1120; GFX10-NEXT: s_mov_b32 s15, s17 1121; GFX10-NEXT: s_movrels_b64 s[0:1], s[10:11] 1122; GFX10-NEXT: ; return to shader part epilog 1123entry: 1124 %add = add i32 %sel, 5 1125 %ext = extractelement <8 x double> %vec, i32 %add 1126 ret double %ext 1127} 1128 1129define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) { 1130; GCN-LABEL: dyn_extract_v8f64_s_s_offset6: 1131; GCN: ; %bb.0: ; %entry 1132; GCN-NEXT: s_mov_b32 s0, s2 1133; GCN-NEXT: s_mov_b32 s1, s3 1134; GCN-NEXT: s_mov_b32 s2, s4 1135; GCN-NEXT: s_mov_b32 s3, s5 1136; GCN-NEXT: s_mov_b32 s4, s6 1137; GCN-NEXT: s_mov_b32 s5, s7 1138; GCN-NEXT: s_mov_b32 s6, s8 1139; GCN-NEXT: s_mov_b32 s7, s9 1140; GCN-NEXT: s_mov_b32 s8, s10 1141; GCN-NEXT: s_mov_b32 s9, s11 1142; GCN-NEXT: s_mov_b32 s10, s12 1143; GCN-NEXT: s_mov_b32 s11, s13 1144; GCN-NEXT: s_mov_b32 s12, s14 1145; GCN-NEXT: s_mov_b32 s13, s15 1146; GCN-NEXT: s_mov_b32 m0, s18 1147; GCN-NEXT: s_mov_b32 s14, s16 1148; GCN-NEXT: s_mov_b32 s15, s17 1149; GCN-NEXT: s_movrels_b64 s[0:1], s[12:13] 1150; GCN-NEXT: ; return to shader part epilog 1151; 1152; GFX10-LABEL: dyn_extract_v8f64_s_s_offset6: 1153; GFX10: ; %bb.0: ; %entry 1154; GFX10-NEXT: s_mov_b32 s0, s2 1155; GFX10-NEXT: s_mov_b32 s1, s3 1156; GFX10-NEXT: s_mov_b32 s2, s4 1157; GFX10-NEXT: s_mov_b32 s3, s5 1158; GFX10-NEXT: s_mov_b32 s4, s6 1159; GFX10-NEXT: s_mov_b32 s5, s7 1160; GFX10-NEXT: s_mov_b32 s6, s8 1161; GFX10-NEXT: s_mov_b32 s7, s9 1162; GFX10-NEXT: s_mov_b32 s8, s10 1163; GFX10-NEXT: s_mov_b32 s9, s11 1164; GFX10-NEXT: s_mov_b32 s10, s12 1165; GFX10-NEXT: s_mov_b32 s11, s13 1166; GFX10-NEXT: s_mov_b32 s12, s14 1167; GFX10-NEXT: s_mov_b32 s13, s15 1168; GFX10-NEXT: s_mov_b32 m0, s18 1169; GFX10-NEXT: s_mov_b32 s14, s16 1170; GFX10-NEXT: s_mov_b32 s15, s17 1171; GFX10-NEXT: s_movrels_b64 s[0:1], s[12:13] 1172; GFX10-NEXT: ; return to shader part epilog 1173entry: 1174 %add = add i32 %sel, 6 1175 %ext = extractelement <8 x double> %vec, i32 %add 1176 ret double %ext 1177} 1178 1179define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) { 1180; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7: 1181; GPRIDX: ; %bb.0: ; %entry 1182; GPRIDX-NEXT: s_mov_b32 s0, s2 1183; GPRIDX-NEXT: s_mov_b32 s1, s3 1184; GPRIDX-NEXT: s_mov_b32 s2, s4 1185; GPRIDX-NEXT: s_mov_b32 s3, s5 1186; GPRIDX-NEXT: s_mov_b32 s4, s6 1187; GPRIDX-NEXT: s_mov_b32 s5, s7 1188; GPRIDX-NEXT: s_mov_b32 s6, s8 1189; GPRIDX-NEXT: s_mov_b32 s7, s9 1190; GPRIDX-NEXT: s_mov_b32 s8, s10 1191; GPRIDX-NEXT: s_mov_b32 s9, s11 1192; GPRIDX-NEXT: s_mov_b32 s10, s12 1193; GPRIDX-NEXT: s_mov_b32 s11, s13 1194; GPRIDX-NEXT: s_mov_b32 s12, s14 1195; GPRIDX-NEXT: s_mov_b32 s13, s15 1196; GPRIDX-NEXT: s_mov_b32 s14, s16 1197; GPRIDX-NEXT: s_mov_b32 s15, s17 1198; GPRIDX-NEXT: s_mov_b32 m0, s18 1199; GPRIDX-NEXT: s_nop 0 1200; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15] 1201; GPRIDX-NEXT: ; return to shader part epilog 1202; 1203; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7: 1204; MOVREL: ; %bb.0: ; %entry 1205; MOVREL-NEXT: s_mov_b32 s0, s2 1206; MOVREL-NEXT: s_mov_b32 s1, s3 1207; MOVREL-NEXT: s_mov_b32 s2, s4 1208; MOVREL-NEXT: s_mov_b32 s3, s5 1209; MOVREL-NEXT: s_mov_b32 s4, s6 1210; MOVREL-NEXT: s_mov_b32 s5, s7 1211; MOVREL-NEXT: s_mov_b32 s6, s8 1212; MOVREL-NEXT: s_mov_b32 s7, s9 1213; MOVREL-NEXT: s_mov_b32 s8, s10 1214; MOVREL-NEXT: s_mov_b32 s9, s11 1215; MOVREL-NEXT: s_mov_b32 s10, s12 1216; MOVREL-NEXT: s_mov_b32 s11, s13 1217; MOVREL-NEXT: s_mov_b32 s12, s14 1218; MOVREL-NEXT: s_mov_b32 s13, s15 1219; MOVREL-NEXT: s_mov_b32 s14, s16 1220; MOVREL-NEXT: s_mov_b32 s15, s17 1221; MOVREL-NEXT: s_mov_b32 m0, s18 1222; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15] 1223; MOVREL-NEXT: ; return to shader part epilog 1224; 1225; GFX10-LABEL: dyn_extract_v8f64_s_s_offset7: 1226; GFX10: ; %bb.0: ; %entry 1227; GFX10-NEXT: s_mov_b32 s0, s2 1228; GFX10-NEXT: s_mov_b32 s1, s3 1229; GFX10-NEXT: s_mov_b32 s2, s4 1230; GFX10-NEXT: s_mov_b32 s3, s5 1231; GFX10-NEXT: s_mov_b32 s4, s6 1232; GFX10-NEXT: s_mov_b32 s5, s7 1233; GFX10-NEXT: s_mov_b32 s6, s8 1234; GFX10-NEXT: s_mov_b32 s7, s9 1235; GFX10-NEXT: s_mov_b32 s8, s10 1236; GFX10-NEXT: s_mov_b32 s9, s11 1237; GFX10-NEXT: s_mov_b32 s10, s12 1238; GFX10-NEXT: s_mov_b32 s11, s13 1239; GFX10-NEXT: s_mov_b32 s12, s14 1240; GFX10-NEXT: s_mov_b32 s13, s15 1241; GFX10-NEXT: s_mov_b32 s14, s16 1242; GFX10-NEXT: s_mov_b32 s15, s17 1243; GFX10-NEXT: s_mov_b32 m0, s18 1244; GFX10-NEXT: s_movrels_b64 s[0:1], s[14:15] 1245; GFX10-NEXT: ; return to shader part epilog 1246entry: 1247 %add = add i32 %sel, 7 1248 %ext = extractelement <8 x double> %vec, i32 %add 1249 ret double %ext 1250} 1251 1252define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) { 1253; GCN-LABEL: dyn_extract_v8f64_s_s_offsetm1: 1254; GCN: ; %bb.0: ; %entry 1255; GCN-NEXT: s_mov_b32 s0, s2 1256; GCN-NEXT: s_mov_b32 s1, s3 1257; GCN-NEXT: s_add_i32 m0, s18, -1 1258; GCN-NEXT: s_mov_b32 s2, s4 1259; GCN-NEXT: s_mov_b32 s3, s5 1260; GCN-NEXT: s_mov_b32 s4, s6 1261; GCN-NEXT: s_mov_b32 s5, s7 1262; GCN-NEXT: s_mov_b32 s6, s8 1263; GCN-NEXT: s_mov_b32 s7, s9 1264; GCN-NEXT: s_mov_b32 s8, s10 1265; GCN-NEXT: s_mov_b32 s9, s11 1266; GCN-NEXT: s_mov_b32 s10, s12 1267; GCN-NEXT: s_mov_b32 s11, s13 1268; GCN-NEXT: s_mov_b32 s12, s14 1269; GCN-NEXT: s_mov_b32 s13, s15 1270; GCN-NEXT: s_mov_b32 s14, s16 1271; GCN-NEXT: s_mov_b32 s15, s17 1272; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] 1273; GCN-NEXT: ; return to shader part epilog 1274; 1275; GFX10-LABEL: dyn_extract_v8f64_s_s_offsetm1: 1276; GFX10: ; %bb.0: ; %entry 1277; GFX10-NEXT: s_mov_b32 s0, s2 1278; GFX10-NEXT: s_mov_b32 s1, s3 1279; GFX10-NEXT: s_add_i32 m0, s18, -1 1280; GFX10-NEXT: s_mov_b32 s2, s4 1281; GFX10-NEXT: s_mov_b32 s3, s5 1282; GFX10-NEXT: s_mov_b32 s4, s6 1283; GFX10-NEXT: s_mov_b32 s5, s7 1284; GFX10-NEXT: s_mov_b32 s6, s8 1285; GFX10-NEXT: s_mov_b32 s7, s9 1286; GFX10-NEXT: s_mov_b32 s8, s10 1287; GFX10-NEXT: s_mov_b32 s9, s11 1288; GFX10-NEXT: s_mov_b32 s10, s12 1289; GFX10-NEXT: s_mov_b32 s11, s13 1290; GFX10-NEXT: s_mov_b32 s12, s14 1291; GFX10-NEXT: s_mov_b32 s13, s15 1292; GFX10-NEXT: s_mov_b32 s14, s16 1293; GFX10-NEXT: s_mov_b32 s15, s17 1294; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 1295; GFX10-NEXT: ; return to shader part epilog 1296entry: 1297 %add = add i32 %sel, -1 1298 %ext = extractelement <8 x double> %vec, i32 %add 1299 ret double %ext 1300} 1301 1302define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) { 1303; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3: 1304; GPRIDX: ; %bb.0: ; %entry 1305; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1306; GPRIDX-NEXT: v_add_u32_e32 v16, 3, v16 1307; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 1308; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1309; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1310; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 1311; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1312; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1313; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 1314; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1315; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1316; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 1317; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1318; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 1319; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 1320; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 1321; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 1322; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 1323; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 1324; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 1325; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 1326; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 1327; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 1328; GPRIDX-NEXT: s_setpc_b64 s[30:31] 1329; 1330; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3: 1331; MOVREL: ; %bb.0: ; %entry 1332; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1333; MOVREL-NEXT: v_add_u32_e32 v16, vcc, 3, v16 1334; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 1335; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1336; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1337; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 1338; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1339; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1340; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 1341; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1342; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1343; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 1344; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1345; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 1346; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 1347; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 1348; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 1349; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 1350; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 1351; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 1352; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 1353; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 1354; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 1355; MOVREL-NEXT: s_setpc_b64 s[30:31] 1356; 1357; GFX10-LABEL: dyn_extract_v8f64_v_v_offset3: 1358; GFX10: ; %bb.0: ; %entry 1359; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1360; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1361; GFX10-NEXT: v_add_nc_u32_e32 v16, 3, v16 1362; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 1363; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1364; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 1365; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1366; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 1367; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 1368; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 1369; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 1370; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 1371; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1372; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1373; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 1374; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 1375; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 1376; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 1377; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1378; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 1379; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 1380; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 1381; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 1382; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 1383; GFX10-NEXT: s_setpc_b64 s[30:31] 1384entry: 1385 %add = add i32 %sel, 3 1386 %ext = extractelement <8 x double> %vec, i32 %add 1387 ret double %ext 1388} 1389 1390define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) { 1391; GCN-LABEL: dyn_extract_v8p3_v_v: 1392; GCN: ; %bb.0: ; %entry 1393; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1394; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 1395; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1396; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 1397; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1398; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 1399; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1400; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 1401; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1402; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 1403; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 1404; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 1405; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1406; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 1407; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 1408; GCN-NEXT: s_setpc_b64 s[30:31] 1409; 1410; GFX10-LABEL: dyn_extract_v8p3_v_v: 1411; GFX10: ; %bb.0: ; %entry 1412; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1413; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1414; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 1415; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1416; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 1417; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1418; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 1419; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 1420; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 1421; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 1422; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 1423; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 1424; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 1425; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 1426; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 1427; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 1428; GFX10-NEXT: s_setpc_b64 s[30:31] 1429entry: 1430 %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx 1431 ret i8 addrspace(3)* %ext 1432} 1433 1434define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) { 1435; GPRIDX-LABEL: dyn_extract_v8p3_s_s: 1436; GPRIDX: ; %bb.0: ; %entry 1437; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 1438; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 1439; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 1440; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 1441; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 1442; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 1443; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 1444; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 1445; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 1446; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 1447; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 1448; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 1449; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 1450; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 1451; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 1452; GPRIDX-NEXT: ds_write_b32 v0, v0 1453; GPRIDX-NEXT: s_endpgm 1454; 1455; MOVREL-LABEL: dyn_extract_v8p3_s_s: 1456; MOVREL: ; %bb.0: ; %entry 1457; MOVREL-NEXT: s_cmp_eq_u32 s10, 1 1458; MOVREL-NEXT: s_cselect_b32 s0, s3, s2 1459; MOVREL-NEXT: s_cmp_eq_u32 s10, 2 1460; MOVREL-NEXT: s_cselect_b32 s0, s4, s0 1461; MOVREL-NEXT: s_cmp_eq_u32 s10, 3 1462; MOVREL-NEXT: s_cselect_b32 s0, s5, s0 1463; MOVREL-NEXT: s_cmp_eq_u32 s10, 4 1464; MOVREL-NEXT: s_cselect_b32 s0, s6, s0 1465; MOVREL-NEXT: s_cmp_eq_u32 s10, 5 1466; MOVREL-NEXT: s_cselect_b32 s0, s7, s0 1467; MOVREL-NEXT: s_cmp_eq_u32 s10, 6 1468; MOVREL-NEXT: s_cselect_b32 s0, s8, s0 1469; MOVREL-NEXT: s_cmp_eq_u32 s10, 7 1470; MOVREL-NEXT: s_cselect_b32 s0, s9, s0 1471; MOVREL-NEXT: v_mov_b32_e32 v0, s0 1472; MOVREL-NEXT: s_mov_b32 m0, -1 1473; MOVREL-NEXT: ds_write_b32 v0, v0 1474; MOVREL-NEXT: s_endpgm 1475; 1476; GFX10-LABEL: dyn_extract_v8p3_s_s: 1477; GFX10: ; %bb.0: ; %entry 1478; GFX10-NEXT: s_cmp_eq_u32 s10, 1 1479; GFX10-NEXT: s_cselect_b32 s0, s3, s2 1480; GFX10-NEXT: s_cmp_eq_u32 s10, 2 1481; GFX10-NEXT: s_cselect_b32 s0, s4, s0 1482; GFX10-NEXT: s_cmp_eq_u32 s10, 3 1483; GFX10-NEXT: s_cselect_b32 s0, s5, s0 1484; GFX10-NEXT: s_cmp_eq_u32 s10, 4 1485; GFX10-NEXT: s_cselect_b32 s0, s6, s0 1486; GFX10-NEXT: s_cmp_eq_u32 s10, 5 1487; GFX10-NEXT: s_cselect_b32 s0, s7, s0 1488; GFX10-NEXT: s_cmp_eq_u32 s10, 6 1489; GFX10-NEXT: s_cselect_b32 s0, s8, s0 1490; GFX10-NEXT: s_cmp_eq_u32 s10, 7 1491; GFX10-NEXT: s_cselect_b32 s0, s9, s0 1492; GFX10-NEXT: v_mov_b32_e32 v0, s0 1493; GFX10-NEXT: ds_write_b32 v0, v0 1494; GFX10-NEXT: s_endpgm 1495entry: 1496 %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx 1497 store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef 1498 ret void 1499} 1500 1501define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) { 1502; GCN-LABEL: dyn_extract_v8p1_v_v: 1503; GCN: ; %bb.0: ; %entry 1504; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1505; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 1506; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1507; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1508; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 1509; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1510; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1511; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 1512; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1513; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1514; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 1515; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1516; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 1517; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 1518; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 1519; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 1520; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 1521; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 1522; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 1523; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 1524; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 1525; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 1526; GCN-NEXT: s_setpc_b64 s[30:31] 1527; 1528; GFX10-LABEL: dyn_extract_v8p1_v_v: 1529; GFX10: ; %bb.0: ; %entry 1530; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1531; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1532; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 1533; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1534; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 1535; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1536; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 1537; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 1538; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 1539; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 1540; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 1541; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1542; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1543; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 1544; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 1545; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 1546; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 1547; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1548; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 1549; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 1550; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 1551; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 1552; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 1553; GFX10-NEXT: s_setpc_b64 s[30:31] 1554entry: 1555 %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx 1556 ret i8 addrspace(1)* %ext 1557} 1558 1559define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) { 1560; GPRIDX-LABEL: dyn_extract_v8p1_s_s: 1561; GPRIDX: ; %bb.0: ; %entry 1562; GPRIDX-NEXT: s_mov_b32 s0, s2 1563; GPRIDX-NEXT: s_mov_b32 s1, s3 1564; GPRIDX-NEXT: s_mov_b32 m0, s18 1565; GPRIDX-NEXT: s_mov_b32 s2, s4 1566; GPRIDX-NEXT: s_mov_b32 s3, s5 1567; GPRIDX-NEXT: s_mov_b32 s4, s6 1568; GPRIDX-NEXT: s_mov_b32 s5, s7 1569; GPRIDX-NEXT: s_mov_b32 s6, s8 1570; GPRIDX-NEXT: s_mov_b32 s7, s9 1571; GPRIDX-NEXT: s_mov_b32 s8, s10 1572; GPRIDX-NEXT: s_mov_b32 s9, s11 1573; GPRIDX-NEXT: s_mov_b32 s10, s12 1574; GPRIDX-NEXT: s_mov_b32 s11, s13 1575; GPRIDX-NEXT: s_mov_b32 s12, s14 1576; GPRIDX-NEXT: s_mov_b32 s13, s15 1577; GPRIDX-NEXT: s_mov_b32 s14, s16 1578; GPRIDX-NEXT: s_mov_b32 s15, s17 1579; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] 1580; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 1581; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 1582; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1583; GPRIDX-NEXT: s_endpgm 1584; 1585; MOVREL-LABEL: dyn_extract_v8p1_s_s: 1586; MOVREL: ; %bb.0: ; %entry 1587; MOVREL-NEXT: s_mov_b32 s0, s2 1588; MOVREL-NEXT: s_mov_b32 s1, s3 1589; MOVREL-NEXT: s_mov_b32 m0, s18 1590; MOVREL-NEXT: s_mov_b32 s2, s4 1591; MOVREL-NEXT: s_mov_b32 s3, s5 1592; MOVREL-NEXT: s_mov_b32 s4, s6 1593; MOVREL-NEXT: s_mov_b32 s5, s7 1594; MOVREL-NEXT: s_mov_b32 s6, s8 1595; MOVREL-NEXT: s_mov_b32 s7, s9 1596; MOVREL-NEXT: s_mov_b32 s8, s10 1597; MOVREL-NEXT: s_mov_b32 s9, s11 1598; MOVREL-NEXT: s_mov_b32 s10, s12 1599; MOVREL-NEXT: s_mov_b32 s11, s13 1600; MOVREL-NEXT: s_mov_b32 s12, s14 1601; MOVREL-NEXT: s_mov_b32 s13, s15 1602; MOVREL-NEXT: s_mov_b32 s14, s16 1603; MOVREL-NEXT: s_mov_b32 s15, s17 1604; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] 1605; MOVREL-NEXT: v_mov_b32_e32 v0, s0 1606; MOVREL-NEXT: v_mov_b32_e32 v1, s1 1607; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1608; MOVREL-NEXT: s_endpgm 1609; 1610; GFX10-LABEL: dyn_extract_v8p1_s_s: 1611; GFX10: ; %bb.0: ; %entry 1612; GFX10-NEXT: s_mov_b32 s0, s2 1613; GFX10-NEXT: s_mov_b32 s1, s3 1614; GFX10-NEXT: s_mov_b32 m0, s18 1615; GFX10-NEXT: s_mov_b32 s2, s4 1616; GFX10-NEXT: s_mov_b32 s3, s5 1617; GFX10-NEXT: s_mov_b32 s4, s6 1618; GFX10-NEXT: s_mov_b32 s5, s7 1619; GFX10-NEXT: s_mov_b32 s6, s8 1620; GFX10-NEXT: s_mov_b32 s7, s9 1621; GFX10-NEXT: s_mov_b32 s8, s10 1622; GFX10-NEXT: s_mov_b32 s9, s11 1623; GFX10-NEXT: s_mov_b32 s10, s12 1624; GFX10-NEXT: s_mov_b32 s11, s13 1625; GFX10-NEXT: s_mov_b32 s12, s14 1626; GFX10-NEXT: s_mov_b32 s13, s15 1627; GFX10-NEXT: s_mov_b32 s14, s16 1628; GFX10-NEXT: s_mov_b32 s15, s17 1629; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 1630; GFX10-NEXT: v_mov_b32_e32 v0, s0 1631; GFX10-NEXT: v_mov_b32_e32 v1, s1 1632; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1633; GFX10-NEXT: s_endpgm 1634entry: 1635 %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx 1636 store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef 1637 ret void 1638} 1639 1640define amdgpu_ps float @dyn_extract_v16f32_v_s(<16 x float> %vec, i32 inreg %sel) { 1641; GPRIDX-LABEL: dyn_extract_v16f32_v_s: 1642; GPRIDX: ; %bb.0: ; %entry 1643; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) 1644; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 1645; GPRIDX-NEXT: s_set_gpr_idx_off 1646; GPRIDX-NEXT: ; return to shader part epilog 1647; 1648; MOVREL-LABEL: dyn_extract_v16f32_v_s: 1649; MOVREL: ; %bb.0: ; %entry 1650; MOVREL-NEXT: s_mov_b32 m0, s2 1651; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 1652; MOVREL-NEXT: ; return to shader part epilog 1653; 1654; GFX10-LABEL: dyn_extract_v16f32_v_s: 1655; GFX10: ; %bb.0: ; %entry 1656; GFX10-NEXT: s_mov_b32 m0, s2 1657; GFX10-NEXT: v_movrels_b32_e32 v0, v0 1658; GFX10-NEXT: ; return to shader part epilog 1659entry: 1660 %ext = extractelement <16 x float> %vec, i32 %sel 1661 ret float %ext 1662} 1663 1664define amdgpu_ps float @dyn_extract_v32f32_v_s(<32 x float> %vec, i32 inreg %sel) { 1665; GPRIDX-LABEL: dyn_extract_v32f32_v_s: 1666; GPRIDX: ; %bb.0: ; %entry 1667; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) 1668; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 1669; GPRIDX-NEXT: s_set_gpr_idx_off 1670; GPRIDX-NEXT: ; return to shader part epilog 1671; 1672; MOVREL-LABEL: dyn_extract_v32f32_v_s: 1673; MOVREL: ; %bb.0: ; %entry 1674; MOVREL-NEXT: s_mov_b32 m0, s2 1675; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 1676; MOVREL-NEXT: ; return to shader part epilog 1677; 1678; GFX10-LABEL: dyn_extract_v32f32_v_s: 1679; GFX10: ; %bb.0: ; %entry 1680; GFX10-NEXT: s_mov_b32 m0, s2 1681; GFX10-NEXT: v_movrels_b32_e32 v0, v0 1682; GFX10-NEXT: ; return to shader part epilog 1683entry: 1684 %ext = extractelement <32 x float> %vec, i32 %sel 1685 ret float %ext 1686} 1687 1688define amdgpu_ps double @dyn_extract_v16f64_v_s(<16 x double> %vec, i32 inreg %sel) { 1689; GPRIDX-LABEL: dyn_extract_v16f64_v_s: 1690; GPRIDX: ; %bb.0: ; %entry 1691; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 1692; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 1693; GPRIDX-NEXT: v_mov_b32_e32 v32, v0 1694; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 1695; GPRIDX-NEXT: s_set_gpr_idx_off 1696; GPRIDX-NEXT: v_readfirstlane_b32 s0, v32 1697; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 1698; GPRIDX-NEXT: ; return to shader part epilog 1699; 1700; MOVREL-LABEL: dyn_extract_v16f64_v_s: 1701; MOVREL: ; %bb.0: ; %entry 1702; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 1703; MOVREL-NEXT: v_movrels_b32_e32 v32, v0 1704; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 1705; MOVREL-NEXT: v_readfirstlane_b32 s0, v32 1706; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 1707; MOVREL-NEXT: ; return to shader part epilog 1708; 1709; GFX10-LABEL: dyn_extract_v16f64_v_s: 1710; GFX10: ; %bb.0: ; %entry 1711; GFX10-NEXT: s_lshl_b32 m0, s2, 1 1712; GFX10-NEXT: v_movrels_b32_e32 v32, v0 1713; GFX10-NEXT: v_movrels_b32_e32 v0, v1 1714; GFX10-NEXT: v_readfirstlane_b32 s0, v32 1715; GFX10-NEXT: v_readfirstlane_b32 s1, v0 1716; GFX10-NEXT: ; return to shader part epilog 1717entry: 1718 %ext = extractelement <16 x double> %vec, i32 %sel 1719 ret double %ext 1720} 1721 1722define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) { 1723; GCN-LABEL: dyn_extract_v16f32_s_s: 1724; GCN: ; %bb.0: ; %entry 1725; GCN-NEXT: s_mov_b32 s4, 1.0 1726; GCN-NEXT: s_mov_b32 m0, s2 1727; GCN-NEXT: s_mov_b32 s19, 0x41800000 1728; GCN-NEXT: s_mov_b32 s18, 0x41700000 1729; GCN-NEXT: s_mov_b32 s17, 0x41600000 1730; GCN-NEXT: s_mov_b32 s16, 0x41500000 1731; GCN-NEXT: s_mov_b32 s15, 0x41400000 1732; GCN-NEXT: s_mov_b32 s14, 0x41300000 1733; GCN-NEXT: s_mov_b32 s13, 0x41200000 1734; GCN-NEXT: s_mov_b32 s12, 0x41100000 1735; GCN-NEXT: s_mov_b32 s11, 0x41000000 1736; GCN-NEXT: s_mov_b32 s10, 0x40e00000 1737; GCN-NEXT: s_mov_b32 s9, 0x40c00000 1738; GCN-NEXT: s_mov_b32 s8, 0x40a00000 1739; GCN-NEXT: s_mov_b32 s7, 4.0 1740; GCN-NEXT: s_mov_b32 s6, 0x40400000 1741; GCN-NEXT: s_mov_b32 s5, 2.0 1742; GCN-NEXT: s_movrels_b32 s0, s4 1743; GCN-NEXT: v_mov_b32_e32 v0, s0 1744; GCN-NEXT: ; return to shader part epilog 1745; 1746; GFX10-LABEL: dyn_extract_v16f32_s_s: 1747; GFX10: ; %bb.0: ; %entry 1748; GFX10-NEXT: s_mov_b32 s4, 1.0 1749; GFX10-NEXT: s_mov_b32 m0, s2 1750; GFX10-NEXT: s_mov_b32 s19, 0x41800000 1751; GFX10-NEXT: s_mov_b32 s18, 0x41700000 1752; GFX10-NEXT: s_mov_b32 s17, 0x41600000 1753; GFX10-NEXT: s_mov_b32 s16, 0x41500000 1754; GFX10-NEXT: s_mov_b32 s15, 0x41400000 1755; GFX10-NEXT: s_mov_b32 s14, 0x41300000 1756; GFX10-NEXT: s_mov_b32 s13, 0x41200000 1757; GFX10-NEXT: s_mov_b32 s12, 0x41100000 1758; GFX10-NEXT: s_mov_b32 s11, 0x41000000 1759; GFX10-NEXT: s_mov_b32 s10, 0x40e00000 1760; GFX10-NEXT: s_mov_b32 s9, 0x40c00000 1761; GFX10-NEXT: s_mov_b32 s8, 0x40a00000 1762; GFX10-NEXT: s_mov_b32 s7, 4.0 1763; GFX10-NEXT: s_mov_b32 s6, 0x40400000 1764; GFX10-NEXT: s_mov_b32 s5, 2.0 1765; GFX10-NEXT: s_movrels_b32 s0, s4 1766; GFX10-NEXT: v_mov_b32_e32 v0, s0 1767; GFX10-NEXT: ; return to shader part epilog 1768entry: 1769 %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel 1770 ret float %ext 1771} 1772 1773define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) { 1774; GCN-LABEL: dyn_extract_v32f32_s_s: 1775; GCN: ; %bb.0: ; %entry 1776; GCN-NEXT: s_mov_b32 s36, 1.0 1777; GCN-NEXT: s_mov_b32 m0, s2 1778; GCN-NEXT: s_mov_b32 s67, 0x42000000 1779; GCN-NEXT: s_mov_b32 s66, 0x41f80000 1780; GCN-NEXT: s_mov_b32 s65, 0x41f00000 1781; GCN-NEXT: s_mov_b32 s64, 0x41e80000 1782; GCN-NEXT: s_mov_b32 s63, 0x41e00000 1783; GCN-NEXT: s_mov_b32 s62, 0x41d80000 1784; GCN-NEXT: s_mov_b32 s61, 0x41d00000 1785; GCN-NEXT: s_mov_b32 s60, 0x41c80000 1786; GCN-NEXT: s_mov_b32 s59, 0x41c00000 1787; GCN-NEXT: s_mov_b32 s58, 0x41b80000 1788; GCN-NEXT: s_mov_b32 s57, 0x41b00000 1789; GCN-NEXT: s_mov_b32 s56, 0x41a80000 1790; GCN-NEXT: s_mov_b32 s55, 0x41a00000 1791; GCN-NEXT: s_mov_b32 s54, 0x41980000 1792; GCN-NEXT: s_mov_b32 s53, 0x41900000 1793; GCN-NEXT: s_mov_b32 s52, 0x41880000 1794; GCN-NEXT: s_mov_b32 s51, 0x41800000 1795; GCN-NEXT: s_mov_b32 s50, 0x41700000 1796; GCN-NEXT: s_mov_b32 s49, 0x41600000 1797; GCN-NEXT: s_mov_b32 s48, 0x41500000 1798; GCN-NEXT: s_mov_b32 s47, 0x41400000 1799; GCN-NEXT: s_mov_b32 s46, 0x41300000 1800; GCN-NEXT: s_mov_b32 s45, 0x41200000 1801; GCN-NEXT: s_mov_b32 s44, 0x41100000 1802; GCN-NEXT: s_mov_b32 s43, 0x41000000 1803; GCN-NEXT: s_mov_b32 s42, 0x40e00000 1804; GCN-NEXT: s_mov_b32 s41, 0x40c00000 1805; GCN-NEXT: s_mov_b32 s40, 0x40a00000 1806; GCN-NEXT: s_mov_b32 s39, 4.0 1807; GCN-NEXT: s_mov_b32 s38, 0x40400000 1808; GCN-NEXT: s_mov_b32 s37, 2.0 1809; GCN-NEXT: s_movrels_b32 s0, s36 1810; GCN-NEXT: v_mov_b32_e32 v0, s0 1811; GCN-NEXT: ; return to shader part epilog 1812; 1813; GFX10-LABEL: dyn_extract_v32f32_s_s: 1814; GFX10: ; %bb.0: ; %entry 1815; GFX10-NEXT: s_mov_b32 s36, 1.0 1816; GFX10-NEXT: s_mov_b32 m0, s2 1817; GFX10-NEXT: s_mov_b32 s67, 0x42000000 1818; GFX10-NEXT: s_mov_b32 s66, 0x41f80000 1819; GFX10-NEXT: s_mov_b32 s65, 0x41f00000 1820; GFX10-NEXT: s_mov_b32 s64, 0x41e80000 1821; GFX10-NEXT: s_mov_b32 s63, 0x41e00000 1822; GFX10-NEXT: s_mov_b32 s62, 0x41d80000 1823; GFX10-NEXT: s_mov_b32 s61, 0x41d00000 1824; GFX10-NEXT: s_mov_b32 s60, 0x41c80000 1825; GFX10-NEXT: s_mov_b32 s59, 0x41c00000 1826; GFX10-NEXT: s_mov_b32 s58, 0x41b80000 1827; GFX10-NEXT: s_mov_b32 s57, 0x41b00000 1828; GFX10-NEXT: s_mov_b32 s56, 0x41a80000 1829; GFX10-NEXT: s_mov_b32 s55, 0x41a00000 1830; GFX10-NEXT: s_mov_b32 s54, 0x41980000 1831; GFX10-NEXT: s_mov_b32 s53, 0x41900000 1832; GFX10-NEXT: s_mov_b32 s52, 0x41880000 1833; GFX10-NEXT: s_mov_b32 s51, 0x41800000 1834; GFX10-NEXT: s_mov_b32 s50, 0x41700000 1835; GFX10-NEXT: s_mov_b32 s49, 0x41600000 1836; GFX10-NEXT: s_mov_b32 s48, 0x41500000 1837; GFX10-NEXT: s_mov_b32 s47, 0x41400000 1838; GFX10-NEXT: s_mov_b32 s46, 0x41300000 1839; GFX10-NEXT: s_mov_b32 s45, 0x41200000 1840; GFX10-NEXT: s_mov_b32 s44, 0x41100000 1841; GFX10-NEXT: s_mov_b32 s43, 0x41000000 1842; GFX10-NEXT: s_mov_b32 s42, 0x40e00000 1843; GFX10-NEXT: s_mov_b32 s41, 0x40c00000 1844; GFX10-NEXT: s_mov_b32 s40, 0x40a00000 1845; GFX10-NEXT: s_mov_b32 s39, 4.0 1846; GFX10-NEXT: s_mov_b32 s38, 0x40400000 1847; GFX10-NEXT: s_mov_b32 s37, 2.0 1848; GFX10-NEXT: s_movrels_b32 s0, s36 1849; GFX10-NEXT: v_mov_b32_e32 v0, s0 1850; GFX10-NEXT: ; return to shader part epilog 1851entry: 1852 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel 1853 ret float %ext 1854} 1855 1856define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) { 1857; GCN-LABEL: dyn_extract_v16f64_s_s: 1858; GCN: ; %bb.0: ; %entry 1859; GCN-NEXT: s_mov_b32 s66, 0 1860; GCN-NEXT: s_mov_b64 s[36:37], 1.0 1861; GCN-NEXT: s_mov_b32 m0, s2 1862; GCN-NEXT: s_mov_b32 s67, 0x40300000 1863; GCN-NEXT: s_mov_b32 s65, 0x402e0000 1864; GCN-NEXT: s_mov_b32 s64, s66 1865; GCN-NEXT: s_mov_b32 s63, 0x402c0000 1866; GCN-NEXT: s_mov_b32 s62, s66 1867; GCN-NEXT: s_mov_b32 s61, 0x402a0000 1868; GCN-NEXT: s_mov_b32 s60, s66 1869; GCN-NEXT: s_mov_b32 s59, 0x40280000 1870; GCN-NEXT: s_mov_b32 s58, s66 1871; GCN-NEXT: s_mov_b32 s57, 0x40260000 1872; GCN-NEXT: s_mov_b32 s56, s66 1873; GCN-NEXT: s_mov_b32 s55, 0x40240000 1874; GCN-NEXT: s_mov_b32 s54, s66 1875; GCN-NEXT: s_mov_b32 s53, 0x40220000 1876; GCN-NEXT: s_mov_b32 s52, s66 1877; GCN-NEXT: s_mov_b32 s51, 0x40200000 1878; GCN-NEXT: s_mov_b32 s50, s66 1879; GCN-NEXT: s_mov_b32 s49, 0x401c0000 1880; GCN-NEXT: s_mov_b32 s48, s66 1881; GCN-NEXT: s_mov_b32 s47, 0x40180000 1882; GCN-NEXT: s_mov_b32 s46, s66 1883; GCN-NEXT: s_mov_b32 s45, 0x40140000 1884; GCN-NEXT: s_mov_b32 s44, s66 1885; GCN-NEXT: s_mov_b64 s[42:43], 4.0 1886; GCN-NEXT: s_mov_b32 s41, 0x40080000 1887; GCN-NEXT: s_mov_b32 s40, s66 1888; GCN-NEXT: s_mov_b64 s[38:39], 2.0 1889; GCN-NEXT: s_movrels_b64 s[0:1], s[36:37] 1890; GCN-NEXT: ; return to shader part epilog 1891; 1892; GFX10-LABEL: dyn_extract_v16f64_s_s: 1893; GFX10: ; %bb.0: ; %entry 1894; GFX10-NEXT: s_mov_b32 s66, 0 1895; GFX10-NEXT: s_mov_b64 s[36:37], 1.0 1896; GFX10-NEXT: s_mov_b32 m0, s2 1897; GFX10-NEXT: s_mov_b32 s67, 0x40300000 1898; GFX10-NEXT: s_mov_b32 s65, 0x402e0000 1899; GFX10-NEXT: s_mov_b32 s64, s66 1900; GFX10-NEXT: s_mov_b32 s63, 0x402c0000 1901; GFX10-NEXT: s_mov_b32 s62, s66 1902; GFX10-NEXT: s_mov_b32 s61, 0x402a0000 1903; GFX10-NEXT: s_mov_b32 s60, s66 1904; GFX10-NEXT: s_mov_b32 s59, 0x40280000 1905; GFX10-NEXT: s_mov_b32 s58, s66 1906; GFX10-NEXT: s_mov_b32 s57, 0x40260000 1907; GFX10-NEXT: s_mov_b32 s56, s66 1908; GFX10-NEXT: s_mov_b32 s55, 0x40240000 1909; GFX10-NEXT: s_mov_b32 s54, s66 1910; GFX10-NEXT: s_mov_b32 s53, 0x40220000 1911; GFX10-NEXT: s_mov_b32 s52, s66 1912; GFX10-NEXT: s_mov_b32 s51, 0x40200000 1913; GFX10-NEXT: s_mov_b32 s50, s66 1914; GFX10-NEXT: s_mov_b32 s49, 0x401c0000 1915; GFX10-NEXT: s_mov_b32 s48, s66 1916; GFX10-NEXT: s_mov_b32 s47, 0x40180000 1917; GFX10-NEXT: s_mov_b32 s46, s66 1918; GFX10-NEXT: s_mov_b32 s45, 0x40140000 1919; GFX10-NEXT: s_mov_b32 s44, s66 1920; GFX10-NEXT: s_mov_b64 s[42:43], 4.0 1921; GFX10-NEXT: s_mov_b32 s41, 0x40080000 1922; GFX10-NEXT: s_mov_b32 s40, s66 1923; GFX10-NEXT: s_mov_b64 s[38:39], 2.0 1924; GFX10-NEXT: s_movrels_b64 s[0:1], s[36:37] 1925; GFX10-NEXT: ; return to shader part epilog 1926entry: 1927 %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel 1928 ret double %ext 1929} 1930 1931define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) { 1932; GCN-LABEL: dyn_extract_v6f32_s_v: 1933; GCN: ; %bb.0: ; %entry 1934; GCN-NEXT: s_mov_b32 s0, s2 1935; GCN-NEXT: s_mov_b32 s1, s3 1936; GCN-NEXT: v_mov_b32_e32 v1, s0 1937; GCN-NEXT: v_mov_b32_e32 v2, s1 1938; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 1939; GCN-NEXT: v_mov_b32_e32 v3, s4 1940; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1941; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 1942; GCN-NEXT: v_mov_b32_e32 v4, s5 1943; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1944; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 1945; GCN-NEXT: v_mov_b32_e32 v5, s6 1946; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1947; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 1948; GCN-NEXT: v_mov_b32_e32 v6, s7 1949; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1950; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 1951; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v6, vcc 1952; GCN-NEXT: ; return to shader part epilog 1953; 1954; GFX10-LABEL: dyn_extract_v6f32_s_v: 1955; GFX10: ; %bb.0: ; %entry 1956; GFX10-NEXT: s_mov_b32 s1, s3 1957; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 1958; GFX10-NEXT: v_mov_b32_e32 v1, s1 1959; GFX10-NEXT: s_mov_b32 s0, s2 1960; GFX10-NEXT: s_mov_b32 s2, s4 1961; GFX10-NEXT: s_mov_b32 s3, s5 1962; GFX10-NEXT: s_mov_b32 s4, s6 1963; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 1964; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 1965; GFX10-NEXT: s_mov_b32 s5, s7 1966; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 1967; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 1968; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 1969; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 1970; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 1971; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 1972; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s5, vcc_lo 1973; GFX10-NEXT: ; return to shader part epilog 1974entry: 1975 %ext = extractelement <6 x float> %vec, i32 %sel 1976 ret float %ext 1977} 1978 1979define float @dyn_extract_v6f32_v_v(<6 x float> %vec, i32 %sel) { 1980; GCN-LABEL: dyn_extract_v6f32_v_v: 1981; GCN: ; %bb.0: ; %entry 1982; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1983; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 1984; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1985; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v6 1986; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1987; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v6 1988; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1989; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v6 1990; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1991; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v6 1992; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 1993; GCN-NEXT: s_setpc_b64 s[30:31] 1994; 1995; GFX10-LABEL: dyn_extract_v6f32_v_v: 1996; GFX10: ; %bb.0: ; %entry 1997; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1998; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1999; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 2000; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2001; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v6 2002; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2003; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v6 2004; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2005; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v6 2006; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2007; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v6 2008; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2009; GFX10-NEXT: s_setpc_b64 s[30:31] 2010entry: 2011 %ext = extractelement <6 x float> %vec, i32 %sel 2012 ret float %ext 2013} 2014 2015define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) { 2016; GCN-LABEL: dyn_extract_v6f32_v_s: 2017; GCN: ; %bb.0: ; %entry 2018; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 2019; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2020; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 2021; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2022; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 2023; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2024; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 2025; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2026; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 2027; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2028; GCN-NEXT: ; return to shader part epilog 2029; 2030; GFX10-LABEL: dyn_extract_v6f32_v_s: 2031; GFX10: ; %bb.0: ; %entry 2032; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 2033; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2034; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 2035; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2036; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 2037; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2038; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 2039; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2040; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 2041; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2042; GFX10-NEXT: ; return to shader part epilog 2043entry: 2044 %ext = extractelement <6 x float> %vec, i32 %sel 2045 ret float %ext 2046} 2047 2048define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) { 2049; GCN-LABEL: dyn_extract_v6f32_s_s: 2050; GCN: ; %bb.0: ; %entry 2051; GCN-NEXT: s_cmp_eq_u32 s8, 1 2052; GCN-NEXT: s_cselect_b32 s0, s3, s2 2053; GCN-NEXT: s_cmp_eq_u32 s8, 2 2054; GCN-NEXT: s_cselect_b32 s0, s4, s0 2055; GCN-NEXT: s_cmp_eq_u32 s8, 3 2056; GCN-NEXT: s_cselect_b32 s0, s5, s0 2057; GCN-NEXT: s_cmp_eq_u32 s8, 4 2058; GCN-NEXT: s_cselect_b32 s0, s6, s0 2059; GCN-NEXT: s_cmp_eq_u32 s8, 5 2060; GCN-NEXT: s_cselect_b32 s0, s7, s0 2061; GCN-NEXT: v_mov_b32_e32 v0, s0 2062; GCN-NEXT: ; return to shader part epilog 2063; 2064; GFX10-LABEL: dyn_extract_v6f32_s_s: 2065; GFX10: ; %bb.0: ; %entry 2066; GFX10-NEXT: s_cmp_eq_u32 s8, 1 2067; GFX10-NEXT: s_cselect_b32 s0, s3, s2 2068; GFX10-NEXT: s_cmp_eq_u32 s8, 2 2069; GFX10-NEXT: s_cselect_b32 s0, s4, s0 2070; GFX10-NEXT: s_cmp_eq_u32 s8, 3 2071; GFX10-NEXT: s_cselect_b32 s0, s5, s0 2072; GFX10-NEXT: s_cmp_eq_u32 s8, 4 2073; GFX10-NEXT: s_cselect_b32 s0, s6, s0 2074; GFX10-NEXT: s_cmp_eq_u32 s8, 5 2075; GFX10-NEXT: s_cselect_b32 s0, s7, s0 2076; GFX10-NEXT: v_mov_b32_e32 v0, s0 2077; GFX10-NEXT: ; return to shader part epilog 2078entry: 2079 %ext = extractelement <6 x float> %vec, i32 %sel 2080 ret float %ext 2081} 2082 2083define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) { 2084; GCN-LABEL: dyn_extract_v7f32_s_v: 2085; GCN: ; %bb.0: ; %entry 2086; GCN-NEXT: s_mov_b32 s0, s2 2087; GCN-NEXT: s_mov_b32 s1, s3 2088; GCN-NEXT: s_mov_b32 s2, s4 2089; GCN-NEXT: v_mov_b32_e32 v1, s0 2090; GCN-NEXT: v_mov_b32_e32 v2, s1 2091; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2092; GCN-NEXT: v_mov_b32_e32 v3, s2 2093; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2094; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2095; GCN-NEXT: v_mov_b32_e32 v4, s5 2096; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2097; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2098; GCN-NEXT: v_mov_b32_e32 v5, s6 2099; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2100; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2101; GCN-NEXT: v_mov_b32_e32 v6, s7 2102; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2103; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2104; GCN-NEXT: v_mov_b32_e32 v7, s8 2105; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 2106; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 2107; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v7, vcc 2108; GCN-NEXT: ; return to shader part epilog 2109; 2110; GFX10-LABEL: dyn_extract_v7f32_s_v: 2111; GFX10: ; %bb.0: ; %entry 2112; GFX10-NEXT: s_mov_b32 s1, s3 2113; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2114; GFX10-NEXT: v_mov_b32_e32 v1, s1 2115; GFX10-NEXT: s_mov_b32 s0, s2 2116; GFX10-NEXT: s_mov_b32 s2, s4 2117; GFX10-NEXT: s_mov_b32 s3, s5 2118; GFX10-NEXT: s_mov_b32 s4, s6 2119; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 2120; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2121; GFX10-NEXT: s_mov_b32 s5, s7 2122; GFX10-NEXT: s_mov_b32 s6, s8 2123; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 2124; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2125; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 2126; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2127; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2128; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2129; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo 2130; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 2131; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s6, vcc_lo 2132; GFX10-NEXT: ; return to shader part epilog 2133entry: 2134 %ext = extractelement <7 x float> %vec, i32 %sel 2135 ret float %ext 2136} 2137 2138define float @dyn_extract_v7f32_v_v(<7 x float> %vec, i32 %sel) { 2139; GCN-LABEL: dyn_extract_v7f32_v_v: 2140; GCN: ; %bb.0: ; %entry 2141; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2142; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 2143; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2144; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v7 2145; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2146; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v7 2147; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2148; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v7 2149; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2150; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v7 2151; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2152; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v7 2153; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2154; GCN-NEXT: s_setpc_b64 s[30:31] 2155; 2156; GFX10-LABEL: dyn_extract_v7f32_v_v: 2157; GFX10: ; %bb.0: ; %entry 2158; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2159; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2160; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7 2161; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2162; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v7 2163; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2164; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v7 2165; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2166; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v7 2167; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2168; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v7 2169; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2170; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v7 2171; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2172; GFX10-NEXT: s_setpc_b64 s[30:31] 2173entry: 2174 %ext = extractelement <7 x float> %vec, i32 %sel 2175 ret float %ext 2176} 2177 2178define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) { 2179; GCN-LABEL: dyn_extract_v7f32_v_s: 2180; GCN: ; %bb.0: ; %entry 2181; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 2182; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2183; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 2184; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2185; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 2186; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2187; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 2188; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2189; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 2190; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2191; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 2192; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2193; GCN-NEXT: ; return to shader part epilog 2194; 2195; GFX10-LABEL: dyn_extract_v7f32_v_s: 2196; GFX10: ; %bb.0: ; %entry 2197; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 2198; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2199; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 2200; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2201; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 2202; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2203; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 2204; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2205; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 2206; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2207; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6 2208; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2209; GFX10-NEXT: ; return to shader part epilog 2210entry: 2211 %ext = extractelement <7 x float> %vec, i32 %sel 2212 ret float %ext 2213} 2214 2215define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) { 2216; GCN-LABEL: dyn_extract_v7f32_s_s: 2217; GCN: ; %bb.0: ; %entry 2218; GCN-NEXT: s_cmp_eq_u32 s9, 1 2219; GCN-NEXT: s_cselect_b32 s0, s3, s2 2220; GCN-NEXT: s_cmp_eq_u32 s9, 2 2221; GCN-NEXT: s_cselect_b32 s0, s4, s0 2222; GCN-NEXT: s_cmp_eq_u32 s9, 3 2223; GCN-NEXT: s_cselect_b32 s0, s5, s0 2224; GCN-NEXT: s_cmp_eq_u32 s9, 4 2225; GCN-NEXT: s_cselect_b32 s0, s6, s0 2226; GCN-NEXT: s_cmp_eq_u32 s9, 5 2227; GCN-NEXT: s_cselect_b32 s0, s7, s0 2228; GCN-NEXT: s_cmp_eq_u32 s9, 6 2229; GCN-NEXT: s_cselect_b32 s0, s8, s0 2230; GCN-NEXT: v_mov_b32_e32 v0, s0 2231; GCN-NEXT: ; return to shader part epilog 2232; 2233; GFX10-LABEL: dyn_extract_v7f32_s_s: 2234; GFX10: ; %bb.0: ; %entry 2235; GFX10-NEXT: s_cmp_eq_u32 s9, 1 2236; GFX10-NEXT: s_cselect_b32 s0, s3, s2 2237; GFX10-NEXT: s_cmp_eq_u32 s9, 2 2238; GFX10-NEXT: s_cselect_b32 s0, s4, s0 2239; GFX10-NEXT: s_cmp_eq_u32 s9, 3 2240; GFX10-NEXT: s_cselect_b32 s0, s5, s0 2241; GFX10-NEXT: s_cmp_eq_u32 s9, 4 2242; GFX10-NEXT: s_cselect_b32 s0, s6, s0 2243; GFX10-NEXT: s_cmp_eq_u32 s9, 5 2244; GFX10-NEXT: s_cselect_b32 s0, s7, s0 2245; GFX10-NEXT: s_cmp_eq_u32 s9, 6 2246; GFX10-NEXT: s_cselect_b32 s0, s8, s0 2247; GFX10-NEXT: v_mov_b32_e32 v0, s0 2248; GFX10-NEXT: ; return to shader part epilog 2249entry: 2250 %ext = extractelement <7 x float> %vec, i32 %sel 2251 ret float %ext 2252} 2253 2254define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) { 2255; GCN-LABEL: dyn_extract_v6f64_s_v: 2256; GCN: ; %bb.0: ; %entry 2257; GCN-NEXT: s_mov_b32 s0, s2 2258; GCN-NEXT: s_mov_b32 s1, s3 2259; GCN-NEXT: s_mov_b32 s2, s4 2260; GCN-NEXT: s_mov_b32 s3, s5 2261; GCN-NEXT: s_mov_b32 s4, s6 2262; GCN-NEXT: s_mov_b32 s5, s7 2263; GCN-NEXT: v_mov_b32_e32 v1, s0 2264; GCN-NEXT: v_mov_b32_e32 v2, s1 2265; GCN-NEXT: v_mov_b32_e32 v3, s2 2266; GCN-NEXT: v_mov_b32_e32 v4, s3 2267; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2268; GCN-NEXT: s_mov_b32 s6, s8 2269; GCN-NEXT: s_mov_b32 s7, s9 2270; GCN-NEXT: v_mov_b32_e32 v5, s4 2271; GCN-NEXT: v_mov_b32_e32 v6, s5 2272; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2273; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2274; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2275; GCN-NEXT: v_mov_b32_e32 v7, s6 2276; GCN-NEXT: v_mov_b32_e32 v8, s7 2277; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2278; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 2279; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2280; GCN-NEXT: v_mov_b32_e32 v9, s10 2281; GCN-NEXT: v_mov_b32_e32 v10, s11 2282; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2283; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 2284; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2285; GCN-NEXT: v_mov_b32_e32 v11, s12 2286; GCN-NEXT: v_mov_b32_e32 v12, s13 2287; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2288; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 2289; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2290; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v11, vcc 2291; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v12, vcc 2292; GCN-NEXT: v_readfirstlane_b32 s0, v0 2293; GCN-NEXT: v_readfirstlane_b32 s1, v1 2294; GCN-NEXT: ; return to shader part epilog 2295; 2296; GFX10-LABEL: dyn_extract_v6f64_s_v: 2297; GFX10: ; %bb.0: ; %entry 2298; GFX10-NEXT: s_mov_b32 s0, s2 2299; GFX10-NEXT: s_mov_b32 s2, s4 2300; GFX10-NEXT: s_mov_b32 s15, s5 2301; GFX10-NEXT: v_mov_b32_e32 v1, s2 2302; GFX10-NEXT: v_mov_b32_e32 v2, s15 2303; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2304; GFX10-NEXT: s_mov_b32 s1, s3 2305; GFX10-NEXT: s_mov_b32 s4, s6 2306; GFX10-NEXT: s_mov_b32 s5, s7 2307; GFX10-NEXT: s_mov_b32 s6, s8 2308; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 2309; GFX10-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo 2310; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2311; GFX10-NEXT: s_mov_b32 s7, s9 2312; GFX10-NEXT: s_mov_b32 s8, s10 2313; GFX10-NEXT: s_mov_b32 s9, s11 2314; GFX10-NEXT: s_mov_b32 s10, s12 2315; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2316; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 2317; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2318; GFX10-NEXT: s_mov_b32 s11, s13 2319; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2320; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2321; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2322; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2323; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2324; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2325; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s10, vcc_lo 2326; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s11, vcc_lo 2327; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2328; GFX10-NEXT: v_readfirstlane_b32 s1, v1 2329; GFX10-NEXT: ; return to shader part epilog 2330entry: 2331 %ext = extractelement <6 x double> %vec, i32 %sel 2332 ret double %ext 2333} 2334 2335define double @dyn_extract_v6f64_v_v(<6 x double> %vec, i32 %sel) { 2336; GCN-LABEL: dyn_extract_v6f64_v_v: 2337; GCN: ; %bb.0: ; %entry 2338; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2339; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12 2340; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2341; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2342; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v12 2343; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2344; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2345; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v12 2346; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2347; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2348; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v12 2349; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 2350; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2351; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v12 2352; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 2353; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 2354; GCN-NEXT: s_setpc_b64 s[30:31] 2355; 2356; GFX10-LABEL: dyn_extract_v6f64_v_v: 2357; GFX10: ; %bb.0: ; %entry 2358; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2359; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2360; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 2361; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2362; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2363; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 2364; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2365; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 2366; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12 2367; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2368; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 2369; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12 2370; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 2371; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 2372; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12 2373; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 2374; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 2375; GFX10-NEXT: s_setpc_b64 s[30:31] 2376entry: 2377 %ext = extractelement <6 x double> %vec, i32 %sel 2378 ret double %ext 2379} 2380 2381define amdgpu_ps double @dyn_extract_v6f64_v_s(<6 x double> %vec, i32 inreg %sel) { 2382; GPRIDX-LABEL: dyn_extract_v6f64_v_s: 2383; GPRIDX: ; %bb.0: ; %entry 2384; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 2385; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 2386; GPRIDX-NEXT: v_mov_b32_e32 v12, v0 2387; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 2388; GPRIDX-NEXT: s_set_gpr_idx_off 2389; GPRIDX-NEXT: v_readfirstlane_b32 s0, v12 2390; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 2391; GPRIDX-NEXT: ; return to shader part epilog 2392; 2393; MOVREL-LABEL: dyn_extract_v6f64_v_s: 2394; MOVREL: ; %bb.0: ; %entry 2395; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 2396; MOVREL-NEXT: v_movrels_b32_e32 v12, v0 2397; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 2398; MOVREL-NEXT: v_readfirstlane_b32 s0, v12 2399; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 2400; MOVREL-NEXT: ; return to shader part epilog 2401; 2402; GFX10-LABEL: dyn_extract_v6f64_v_s: 2403; GFX10: ; %bb.0: ; %entry 2404; GFX10-NEXT: s_lshl_b32 m0, s2, 1 2405; GFX10-NEXT: v_movrels_b32_e32 v12, v0 2406; GFX10-NEXT: v_movrels_b32_e32 v0, v1 2407; GFX10-NEXT: v_readfirstlane_b32 s0, v12 2408; GFX10-NEXT: v_readfirstlane_b32 s1, v0 2409; GFX10-NEXT: ; return to shader part epilog 2410entry: 2411 %ext = extractelement <6 x double> %vec, i32 %sel 2412 ret double %ext 2413} 2414 2415define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) { 2416; GCN-LABEL: dyn_extract_v6f64_s_s: 2417; GCN: ; %bb.0: ; %entry 2418; GCN-NEXT: s_mov_b32 s0, s2 2419; GCN-NEXT: s_mov_b32 s1, s3 2420; GCN-NEXT: s_mov_b32 m0, s14 2421; GCN-NEXT: s_mov_b32 s2, s4 2422; GCN-NEXT: s_mov_b32 s3, s5 2423; GCN-NEXT: s_mov_b32 s4, s6 2424; GCN-NEXT: s_mov_b32 s5, s7 2425; GCN-NEXT: s_mov_b32 s6, s8 2426; GCN-NEXT: s_mov_b32 s7, s9 2427; GCN-NEXT: s_mov_b32 s8, s10 2428; GCN-NEXT: s_mov_b32 s9, s11 2429; GCN-NEXT: s_mov_b32 s10, s12 2430; GCN-NEXT: s_mov_b32 s11, s13 2431; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] 2432; GCN-NEXT: ; return to shader part epilog 2433; 2434; GFX10-LABEL: dyn_extract_v6f64_s_s: 2435; GFX10: ; %bb.0: ; %entry 2436; GFX10-NEXT: s_mov_b32 s0, s2 2437; GFX10-NEXT: s_mov_b32 s1, s3 2438; GFX10-NEXT: s_mov_b32 m0, s14 2439; GFX10-NEXT: s_mov_b32 s2, s4 2440; GFX10-NEXT: s_mov_b32 s3, s5 2441; GFX10-NEXT: s_mov_b32 s4, s6 2442; GFX10-NEXT: s_mov_b32 s5, s7 2443; GFX10-NEXT: s_mov_b32 s6, s8 2444; GFX10-NEXT: s_mov_b32 s7, s9 2445; GFX10-NEXT: s_mov_b32 s8, s10 2446; GFX10-NEXT: s_mov_b32 s9, s11 2447; GFX10-NEXT: s_mov_b32 s10, s12 2448; GFX10-NEXT: s_mov_b32 s11, s13 2449; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 2450; GFX10-NEXT: ; return to shader part epilog 2451entry: 2452 %ext = extractelement <6 x double> %vec, i32 %sel 2453 ret double %ext 2454} 2455 2456define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) { 2457; GCN-LABEL: dyn_extract_v7f64_s_v: 2458; GCN: ; %bb.0: ; %entry 2459; GCN-NEXT: s_mov_b32 s0, s2 2460; GCN-NEXT: s_mov_b32 s1, s3 2461; GCN-NEXT: s_mov_b32 s2, s4 2462; GCN-NEXT: s_mov_b32 s3, s5 2463; GCN-NEXT: s_mov_b32 s4, s6 2464; GCN-NEXT: s_mov_b32 s5, s7 2465; GCN-NEXT: v_mov_b32_e32 v1, s0 2466; GCN-NEXT: v_mov_b32_e32 v2, s1 2467; GCN-NEXT: v_mov_b32_e32 v3, s2 2468; GCN-NEXT: v_mov_b32_e32 v4, s3 2469; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2470; GCN-NEXT: s_mov_b32 s6, s8 2471; GCN-NEXT: s_mov_b32 s7, s9 2472; GCN-NEXT: v_mov_b32_e32 v5, s4 2473; GCN-NEXT: v_mov_b32_e32 v6, s5 2474; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2475; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2476; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2477; GCN-NEXT: s_mov_b32 s8, s10 2478; GCN-NEXT: s_mov_b32 s9, s11 2479; GCN-NEXT: v_mov_b32_e32 v7, s6 2480; GCN-NEXT: v_mov_b32_e32 v8, s7 2481; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2482; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 2483; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2484; GCN-NEXT: v_mov_b32_e32 v9, s8 2485; GCN-NEXT: v_mov_b32_e32 v10, s9 2486; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2487; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 2488; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2489; GCN-NEXT: v_mov_b32_e32 v11, s12 2490; GCN-NEXT: v_mov_b32_e32 v12, s13 2491; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2492; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 2493; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2494; GCN-NEXT: v_mov_b32_e32 v13, s14 2495; GCN-NEXT: v_mov_b32_e32 v14, s15 2496; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 2497; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 2498; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 2499; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v13, vcc 2500; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v14, vcc 2501; GCN-NEXT: v_readfirstlane_b32 s0, v0 2502; GCN-NEXT: v_readfirstlane_b32 s1, v1 2503; GCN-NEXT: ; return to shader part epilog 2504; 2505; GFX10-LABEL: dyn_extract_v7f64_s_v: 2506; GFX10: ; %bb.0: ; %entry 2507; GFX10-NEXT: s_mov_b32 s0, s2 2508; GFX10-NEXT: s_mov_b32 s2, s4 2509; GFX10-NEXT: s_mov_b32 s19, s5 2510; GFX10-NEXT: v_mov_b32_e32 v1, s2 2511; GFX10-NEXT: v_mov_b32_e32 v2, s19 2512; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2513; GFX10-NEXT: s_mov_b32 s1, s3 2514; GFX10-NEXT: s_mov_b32 s4, s6 2515; GFX10-NEXT: s_mov_b32 s5, s7 2516; GFX10-NEXT: s_mov_b32 s6, s8 2517; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 2518; GFX10-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo 2519; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2520; GFX10-NEXT: s_mov_b32 s7, s9 2521; GFX10-NEXT: s_mov_b32 s8, s10 2522; GFX10-NEXT: s_mov_b32 s9, s11 2523; GFX10-NEXT: s_mov_b32 s10, s12 2524; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2525; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 2526; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2527; GFX10-NEXT: s_mov_b32 s11, s13 2528; GFX10-NEXT: s_mov_b32 s12, s14 2529; GFX10-NEXT: s_mov_b32 s13, s15 2530; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2531; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2532; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2533; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2534; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2535; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2536; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 2537; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 2538; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 2539; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo 2540; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo 2541; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2542; GFX10-NEXT: v_readfirstlane_b32 s1, v1 2543; GFX10-NEXT: ; return to shader part epilog 2544entry: 2545 %ext = extractelement <7 x double> %vec, i32 %sel 2546 ret double %ext 2547} 2548 2549define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) { 2550; GCN-LABEL: dyn_extract_v7f64_v_v: 2551; GCN: ; %bb.0: ; %entry 2552; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2553; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v14 2554; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2555; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2556; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v14 2557; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2558; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2559; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v14 2560; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2561; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2562; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v14 2563; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 2564; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2565; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v14 2566; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 2567; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 2568; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v14 2569; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 2570; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 2571; GCN-NEXT: s_setpc_b64 s[30:31] 2572; 2573; GFX10-LABEL: dyn_extract_v7f64_v_v: 2574; GFX10: ; %bb.0: ; %entry 2575; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2576; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2577; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14 2578; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2579; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2580; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14 2581; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2582; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 2583; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v14 2584; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2585; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 2586; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v14 2587; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 2588; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 2589; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v14 2590; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 2591; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 2592; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14 2593; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 2594; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 2595; GFX10-NEXT: s_setpc_b64 s[30:31] 2596entry: 2597 %ext = extractelement <7 x double> %vec, i32 %sel 2598 ret double %ext 2599} 2600 2601define amdgpu_ps double @dyn_extract_v7f64_v_s(<7 x double> %vec, i32 inreg %sel) { 2602; GPRIDX-LABEL: dyn_extract_v7f64_v_s: 2603; GPRIDX: ; %bb.0: ; %entry 2604; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 2605; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 2606; GPRIDX-NEXT: v_mov_b32_e32 v14, v0 2607; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 2608; GPRIDX-NEXT: s_set_gpr_idx_off 2609; GPRIDX-NEXT: v_readfirstlane_b32 s0, v14 2610; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 2611; GPRIDX-NEXT: ; return to shader part epilog 2612; 2613; MOVREL-LABEL: dyn_extract_v7f64_v_s: 2614; MOVREL: ; %bb.0: ; %entry 2615; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 2616; MOVREL-NEXT: v_movrels_b32_e32 v14, v0 2617; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 2618; MOVREL-NEXT: v_readfirstlane_b32 s0, v14 2619; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 2620; MOVREL-NEXT: ; return to shader part epilog 2621; 2622; GFX10-LABEL: dyn_extract_v7f64_v_s: 2623; GFX10: ; %bb.0: ; %entry 2624; GFX10-NEXT: s_lshl_b32 m0, s2, 1 2625; GFX10-NEXT: v_movrels_b32_e32 v14, v0 2626; GFX10-NEXT: v_movrels_b32_e32 v0, v1 2627; GFX10-NEXT: v_readfirstlane_b32 s0, v14 2628; GFX10-NEXT: v_readfirstlane_b32 s1, v0 2629; GFX10-NEXT: ; return to shader part epilog 2630entry: 2631 %ext = extractelement <7 x double> %vec, i32 %sel 2632 ret double %ext 2633} 2634 2635define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) { 2636; GCN-LABEL: dyn_extract_v7f64_s_s: 2637; GCN: ; %bb.0: ; %entry 2638; GCN-NEXT: s_mov_b32 s0, s2 2639; GCN-NEXT: s_mov_b32 s1, s3 2640; GCN-NEXT: s_mov_b32 m0, s16 2641; GCN-NEXT: s_mov_b32 s2, s4 2642; GCN-NEXT: s_mov_b32 s3, s5 2643; GCN-NEXT: s_mov_b32 s4, s6 2644; GCN-NEXT: s_mov_b32 s5, s7 2645; GCN-NEXT: s_mov_b32 s6, s8 2646; GCN-NEXT: s_mov_b32 s7, s9 2647; GCN-NEXT: s_mov_b32 s8, s10 2648; GCN-NEXT: s_mov_b32 s9, s11 2649; GCN-NEXT: s_mov_b32 s10, s12 2650; GCN-NEXT: s_mov_b32 s11, s13 2651; GCN-NEXT: s_mov_b32 s12, s14 2652; GCN-NEXT: s_mov_b32 s13, s15 2653; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] 2654; GCN-NEXT: ; return to shader part epilog 2655; 2656; GFX10-LABEL: dyn_extract_v7f64_s_s: 2657; GFX10: ; %bb.0: ; %entry 2658; GFX10-NEXT: s_mov_b32 s0, s2 2659; GFX10-NEXT: s_mov_b32 s1, s3 2660; GFX10-NEXT: s_mov_b32 m0, s16 2661; GFX10-NEXT: s_mov_b32 s2, s4 2662; GFX10-NEXT: s_mov_b32 s3, s5 2663; GFX10-NEXT: s_mov_b32 s4, s6 2664; GFX10-NEXT: s_mov_b32 s5, s7 2665; GFX10-NEXT: s_mov_b32 s6, s8 2666; GFX10-NEXT: s_mov_b32 s7, s9 2667; GFX10-NEXT: s_mov_b32 s8, s10 2668; GFX10-NEXT: s_mov_b32 s9, s11 2669; GFX10-NEXT: s_mov_b32 s10, s12 2670; GFX10-NEXT: s_mov_b32 s11, s13 2671; GFX10-NEXT: s_mov_b32 s12, s14 2672; GFX10-NEXT: s_mov_b32 s13, s15 2673; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 2674; GFX10-NEXT: ; return to shader part epilog 2675entry: 2676 %ext = extractelement <7 x double> %vec, i32 %sel 2677 ret double %ext 2678} 2679 2680define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 %sel) { 2681; GPRIDX-LABEL: dyn_extract_v5f64_s_s: 2682; GPRIDX: .amd_kernel_code_t 2683; GPRIDX-NEXT: amd_code_version_major = 1 2684; GPRIDX-NEXT: amd_code_version_minor = 2 2685; GPRIDX-NEXT: amd_machine_kind = 1 2686; GPRIDX-NEXT: amd_machine_version_major = 9 2687; GPRIDX-NEXT: amd_machine_version_minor = 0 2688; GPRIDX-NEXT: amd_machine_version_stepping = 0 2689; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 2690; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 2691; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 2692; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1 2693; GPRIDX-NEXT: priority = 0 2694; GPRIDX-NEXT: float_mode = 240 2695; GPRIDX-NEXT: priv = 0 2696; GPRIDX-NEXT: enable_dx10_clamp = 1 2697; GPRIDX-NEXT: debug_mode = 0 2698; GPRIDX-NEXT: enable_ieee_mode = 1 2699; GPRIDX-NEXT: enable_wgp_mode = 0 2700; GPRIDX-NEXT: enable_mem_ordered = 0 2701; GPRIDX-NEXT: enable_fwd_progress = 0 2702; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 2703; GPRIDX-NEXT: user_sgpr_count = 6 2704; GPRIDX-NEXT: enable_trap_handler = 0 2705; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 2706; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 2707; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 2708; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 2709; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 2710; GPRIDX-NEXT: enable_exception_msb = 0 2711; GPRIDX-NEXT: granulated_lds_size = 0 2712; GPRIDX-NEXT: enable_exception = 0 2713; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 2714; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 2715; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 2716; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 2717; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 2718; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 2719; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 2720; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 2721; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 2722; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 2723; GPRIDX-NEXT: enable_wavefront_size32 = 0 2724; GPRIDX-NEXT: enable_ordered_append_gds = 0 2725; GPRIDX-NEXT: private_element_size = 1 2726; GPRIDX-NEXT: is_ptr64 = 1 2727; GPRIDX-NEXT: is_dynamic_callstack = 0 2728; GPRIDX-NEXT: is_debug_enabled = 0 2729; GPRIDX-NEXT: is_xnack_enabled = 1 2730; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 2731; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 2732; GPRIDX-NEXT: gds_segment_byte_size = 0 2733; GPRIDX-NEXT: kernarg_segment_byte_size = 28 2734; GPRIDX-NEXT: workgroup_fbarrier_count = 0 2735; GPRIDX-NEXT: wavefront_sgpr_count = 9 2736; GPRIDX-NEXT: workitem_vgpr_count = 3 2737; GPRIDX-NEXT: reserved_vgpr_first = 0 2738; GPRIDX-NEXT: reserved_vgpr_count = 0 2739; GPRIDX-NEXT: reserved_sgpr_first = 0 2740; GPRIDX-NEXT: reserved_sgpr_count = 0 2741; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 2742; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 2743; GPRIDX-NEXT: kernarg_segment_alignment = 4 2744; GPRIDX-NEXT: group_segment_alignment = 4 2745; GPRIDX-NEXT: private_segment_alignment = 4 2746; GPRIDX-NEXT: wavefront_size = 6 2747; GPRIDX-NEXT: call_convention = -1 2748; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 2749; GPRIDX-NEXT: .end_amd_kernel_code_t 2750; GPRIDX-NEXT: ; %bb.0: ; %entry 2751; GPRIDX-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 2752; GPRIDX-NEXT: s_load_dword s8, s[4:5], 0x8 2753; GPRIDX-NEXT: s_mov_b32 s0, 0 2754; GPRIDX-NEXT: s_mov_b32 s1, 0x40140000 2755; GPRIDX-NEXT: s_mov_b32 s3, 0x40080000 2756; GPRIDX-NEXT: s_mov_b32 s2, s0 2757; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) 2758; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1 2759; GPRIDX-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 2760; GPRIDX-NEXT: s_cmp_eq_u32 s8, 2 2761; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 2762; GPRIDX-NEXT: s_cmp_eq_u32 s8, 3 2763; GPRIDX-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 2764; GPRIDX-NEXT: s_cmp_eq_u32 s8, 4 2765; GPRIDX-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 2766; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 2767; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 2768; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 2769; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] 2770; GPRIDX-NEXT: s_endpgm 2771; 2772; MOVREL-LABEL: dyn_extract_v5f64_s_s: 2773; MOVREL: .amd_kernel_code_t 2774; MOVREL-NEXT: amd_code_version_major = 1 2775; MOVREL-NEXT: amd_code_version_minor = 2 2776; MOVREL-NEXT: amd_machine_kind = 1 2777; MOVREL-NEXT: amd_machine_version_major = 8 2778; MOVREL-NEXT: amd_machine_version_minor = 0 2779; MOVREL-NEXT: amd_machine_version_stepping = 3 2780; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 2781; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 2782; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 2783; MOVREL-NEXT: granulated_wavefront_sgpr_count = 1 2784; MOVREL-NEXT: priority = 0 2785; MOVREL-NEXT: float_mode = 240 2786; MOVREL-NEXT: priv = 0 2787; MOVREL-NEXT: enable_dx10_clamp = 1 2788; MOVREL-NEXT: debug_mode = 0 2789; MOVREL-NEXT: enable_ieee_mode = 1 2790; MOVREL-NEXT: enable_wgp_mode = 0 2791; MOVREL-NEXT: enable_mem_ordered = 0 2792; MOVREL-NEXT: enable_fwd_progress = 0 2793; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 2794; MOVREL-NEXT: user_sgpr_count = 6 2795; MOVREL-NEXT: enable_trap_handler = 0 2796; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 2797; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 2798; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 2799; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 2800; MOVREL-NEXT: enable_vgpr_workitem_id = 0 2801; MOVREL-NEXT: enable_exception_msb = 0 2802; MOVREL-NEXT: granulated_lds_size = 0 2803; MOVREL-NEXT: enable_exception = 0 2804; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 2805; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 2806; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 2807; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 2808; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 2809; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 2810; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 2811; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 2812; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 2813; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 2814; MOVREL-NEXT: enable_wavefront_size32 = 0 2815; MOVREL-NEXT: enable_ordered_append_gds = 0 2816; MOVREL-NEXT: private_element_size = 1 2817; MOVREL-NEXT: is_ptr64 = 1 2818; MOVREL-NEXT: is_dynamic_callstack = 0 2819; MOVREL-NEXT: is_debug_enabled = 0 2820; MOVREL-NEXT: is_xnack_enabled = 0 2821; MOVREL-NEXT: workitem_private_segment_byte_size = 0 2822; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 2823; MOVREL-NEXT: gds_segment_byte_size = 0 2824; MOVREL-NEXT: kernarg_segment_byte_size = 28 2825; MOVREL-NEXT: workgroup_fbarrier_count = 0 2826; MOVREL-NEXT: wavefront_sgpr_count = 9 2827; MOVREL-NEXT: workitem_vgpr_count = 4 2828; MOVREL-NEXT: reserved_vgpr_first = 0 2829; MOVREL-NEXT: reserved_vgpr_count = 0 2830; MOVREL-NEXT: reserved_sgpr_first = 0 2831; MOVREL-NEXT: reserved_sgpr_count = 0 2832; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 2833; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 2834; MOVREL-NEXT: kernarg_segment_alignment = 4 2835; MOVREL-NEXT: group_segment_alignment = 4 2836; MOVREL-NEXT: private_segment_alignment = 4 2837; MOVREL-NEXT: wavefront_size = 6 2838; MOVREL-NEXT: call_convention = -1 2839; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 2840; MOVREL-NEXT: .end_amd_kernel_code_t 2841; MOVREL-NEXT: ; %bb.0: ; %entry 2842; MOVREL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 2843; MOVREL-NEXT: s_load_dword s8, s[4:5], 0x8 2844; MOVREL-NEXT: s_mov_b32 s0, 0 2845; MOVREL-NEXT: s_mov_b32 s1, 0x40140000 2846; MOVREL-NEXT: s_mov_b32 s3, 0x40080000 2847; MOVREL-NEXT: s_mov_b32 s2, s0 2848; MOVREL-NEXT: s_waitcnt lgkmcnt(0) 2849; MOVREL-NEXT: s_cmp_eq_u32 s8, 1 2850; MOVREL-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 2851; MOVREL-NEXT: s_cmp_eq_u32 s8, 2 2852; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 2853; MOVREL-NEXT: s_cmp_eq_u32 s8, 3 2854; MOVREL-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 2855; MOVREL-NEXT: s_cmp_eq_u32 s8, 4 2856; MOVREL-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 2857; MOVREL-NEXT: v_mov_b32_e32 v0, s0 2858; MOVREL-NEXT: v_mov_b32_e32 v2, s6 2859; MOVREL-NEXT: v_mov_b32_e32 v1, s1 2860; MOVREL-NEXT: v_mov_b32_e32 v3, s7 2861; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2862; MOVREL-NEXT: s_endpgm 2863; 2864; GFX10-LABEL: dyn_extract_v5f64_s_s: 2865; GFX10: .amd_kernel_code_t 2866; GFX10-NEXT: amd_code_version_major = 1 2867; GFX10-NEXT: amd_code_version_minor = 2 2868; GFX10-NEXT: amd_machine_kind = 1 2869; GFX10-NEXT: amd_machine_version_major = 10 2870; GFX10-NEXT: amd_machine_version_minor = 1 2871; GFX10-NEXT: amd_machine_version_stepping = 0 2872; GFX10-NEXT: kernel_code_entry_byte_offset = 256 2873; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 2874; GFX10-NEXT: granulated_workitem_vgpr_count = 0 2875; GFX10-NEXT: granulated_wavefront_sgpr_count = 1 2876; GFX10-NEXT: priority = 0 2877; GFX10-NEXT: float_mode = 240 2878; GFX10-NEXT: priv = 0 2879; GFX10-NEXT: enable_dx10_clamp = 1 2880; GFX10-NEXT: debug_mode = 0 2881; GFX10-NEXT: enable_ieee_mode = 1 2882; GFX10-NEXT: enable_wgp_mode = 1 2883; GFX10-NEXT: enable_mem_ordered = 1 2884; GFX10-NEXT: enable_fwd_progress = 0 2885; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 2886; GFX10-NEXT: user_sgpr_count = 6 2887; GFX10-NEXT: enable_trap_handler = 0 2888; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 2889; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0 2890; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0 2891; GFX10-NEXT: enable_sgpr_workgroup_info = 0 2892; GFX10-NEXT: enable_vgpr_workitem_id = 0 2893; GFX10-NEXT: enable_exception_msb = 0 2894; GFX10-NEXT: granulated_lds_size = 0 2895; GFX10-NEXT: enable_exception = 0 2896; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 2897; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0 2898; GFX10-NEXT: enable_sgpr_queue_ptr = 0 2899; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 2900; GFX10-NEXT: enable_sgpr_dispatch_id = 0 2901; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 2902; GFX10-NEXT: enable_sgpr_private_segment_size = 0 2903; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 2904; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 2905; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 2906; GFX10-NEXT: enable_wavefront_size32 = 1 2907; GFX10-NEXT: enable_ordered_append_gds = 0 2908; GFX10-NEXT: private_element_size = 1 2909; GFX10-NEXT: is_ptr64 = 1 2910; GFX10-NEXT: is_dynamic_callstack = 0 2911; GFX10-NEXT: is_debug_enabled = 0 2912; GFX10-NEXT: is_xnack_enabled = 1 2913; GFX10-NEXT: workitem_private_segment_byte_size = 0 2914; GFX10-NEXT: workgroup_group_segment_byte_size = 0 2915; GFX10-NEXT: gds_segment_byte_size = 0 2916; GFX10-NEXT: kernarg_segment_byte_size = 28 2917; GFX10-NEXT: workgroup_fbarrier_count = 0 2918; GFX10-NEXT: wavefront_sgpr_count = 9 2919; GFX10-NEXT: workitem_vgpr_count = 3 2920; GFX10-NEXT: reserved_vgpr_first = 0 2921; GFX10-NEXT: reserved_vgpr_count = 0 2922; GFX10-NEXT: reserved_sgpr_first = 0 2923; GFX10-NEXT: reserved_sgpr_count = 0 2924; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 2925; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 2926; GFX10-NEXT: kernarg_segment_alignment = 4 2927; GFX10-NEXT: group_segment_alignment = 4 2928; GFX10-NEXT: private_segment_alignment = 4 2929; GFX10-NEXT: wavefront_size = 5 2930; GFX10-NEXT: call_convention = -1 2931; GFX10-NEXT: runtime_loader_kernel_symbol = 0 2932; GFX10-NEXT: .end_amd_kernel_code_t 2933; GFX10-NEXT: ; %bb.0: ; %entry 2934; GFX10-NEXT: s_clause 0x1 2935; GFX10-NEXT: s_load_dword s8, s[4:5], 0x8 2936; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2937; GFX10-NEXT: s_mov_b32 s2, 0 2938; GFX10-NEXT: s_mov_b32 s3, 0x40140000 2939; GFX10-NEXT: s_mov_b32 s5, 0x40080000 2940; GFX10-NEXT: s_mov_b32 s4, s2 2941; GFX10-NEXT: v_mov_b32_e32 v2, 0 2942; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2943; GFX10-NEXT: s_cmp_eq_u32 s8, 1 2944; GFX10-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 2945; GFX10-NEXT: s_cmp_eq_u32 s8, 2 2946; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 2947; GFX10-NEXT: s_cmp_eq_u32 s8, 3 2948; GFX10-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] 2949; GFX10-NEXT: s_cmp_eq_u32 s8, 4 2950; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 2951; GFX10-NEXT: v_mov_b32_e32 v0, s2 2952; GFX10-NEXT: v_mov_b32_e32 v1, s3 2953; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2954; GFX10-NEXT: s_endpgm 2955entry: 2956 %ext = extractelement <5 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, i32 %sel 2957 store double %ext, double addrspace(1)* %out 2958 ret void 2959} 2960 2961define float @dyn_extract_v15f32_const_s_v(i32 %sel) { 2962; GCN-LABEL: dyn_extract_v15f32_const_s_v: 2963; GCN: ; %bb.0: ; %entry 2964; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2965; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2966; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 2967; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, 2.0, vcc 2968; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2969; GCN-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc 2970; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2971; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 2972; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc 2973; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2974; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 2975; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2976; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2977; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 2978; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2979; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 2980; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 2981; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2982; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 2983; GCN-NEXT: v_mov_b32_e32 v6, 0x41100000 2984; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2985; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 2986; GCN-NEXT: v_mov_b32_e32 v7, 0x41200000 2987; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 2988; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 2989; GCN-NEXT: v_mov_b32_e32 v8, 0x41300000 2990; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2991; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 2992; GCN-NEXT: v_mov_b32_e32 v9, 0x41400000 2993; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 2994; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0 2995; GCN-NEXT: v_mov_b32_e32 v10, 0x41500000 2996; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2997; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0 2998; GCN-NEXT: v_mov_b32_e32 v11, 0x41600000 2999; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 3000; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0 3001; GCN-NEXT: v_mov_b32_e32 v12, 0x41700000 3002; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 3003; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 3004; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v12, vcc 3005; GCN-NEXT: s_setpc_b64 s[30:31] 3006; 3007; GFX10-LABEL: dyn_extract_v15f32_const_s_v: 3008; GFX10: ; %bb.0: ; %entry 3009; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3010; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3011; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 3012; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo 3013; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 3014; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo 3015; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 3016; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo 3017; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 3018; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo 3019; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 3020; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo 3021; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 3022; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo 3023; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 3024; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo 3025; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 3026; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo 3027; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 3028; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo 3029; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 3030; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo 3031; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 3032; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo 3033; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 3034; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo 3035; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 3036; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo 3037; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 3038; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, 0x41700000, vcc_lo 3039; GFX10-NEXT: s_setpc_b64 s[30:31] 3040entry: 3041 %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel 3042 ret float %ext 3043} 3044 3045define amdgpu_ps float @dyn_extract_v15f32_const_s_s(i32 inreg %sel) { 3046; GCN-LABEL: dyn_extract_v15f32_const_s_s: 3047; GCN: ; %bb.0: ; %entry 3048; GCN-NEXT: s_mov_b32 s4, 1.0 3049; GCN-NEXT: s_mov_b32 m0, s2 3050; GCN-NEXT: s_mov_b32 s18, 0x41700000 3051; GCN-NEXT: s_mov_b32 s17, 0x41600000 3052; GCN-NEXT: s_mov_b32 s16, 0x41500000 3053; GCN-NEXT: s_mov_b32 s15, 0x41400000 3054; GCN-NEXT: s_mov_b32 s14, 0x41300000 3055; GCN-NEXT: s_mov_b32 s13, 0x41200000 3056; GCN-NEXT: s_mov_b32 s12, 0x41100000 3057; GCN-NEXT: s_mov_b32 s11, 0x41000000 3058; GCN-NEXT: s_mov_b32 s10, 0x40e00000 3059; GCN-NEXT: s_mov_b32 s9, 0x40c00000 3060; GCN-NEXT: s_mov_b32 s8, 0x40a00000 3061; GCN-NEXT: s_mov_b32 s7, 4.0 3062; GCN-NEXT: s_mov_b32 s6, 0x40400000 3063; GCN-NEXT: s_mov_b32 s5, 2.0 3064; GCN-NEXT: s_movrels_b32 s0, s4 3065; GCN-NEXT: v_mov_b32_e32 v0, s0 3066; GCN-NEXT: ; return to shader part epilog 3067; 3068; GFX10-LABEL: dyn_extract_v15f32_const_s_s: 3069; GFX10: ; %bb.0: ; %entry 3070; GFX10-NEXT: s_mov_b32 s4, 1.0 3071; GFX10-NEXT: s_mov_b32 m0, s2 3072; GFX10-NEXT: s_mov_b32 s18, 0x41700000 3073; GFX10-NEXT: s_mov_b32 s17, 0x41600000 3074; GFX10-NEXT: s_mov_b32 s16, 0x41500000 3075; GFX10-NEXT: s_mov_b32 s15, 0x41400000 3076; GFX10-NEXT: s_mov_b32 s14, 0x41300000 3077; GFX10-NEXT: s_mov_b32 s13, 0x41200000 3078; GFX10-NEXT: s_mov_b32 s12, 0x41100000 3079; GFX10-NEXT: s_mov_b32 s11, 0x41000000 3080; GFX10-NEXT: s_mov_b32 s10, 0x40e00000 3081; GFX10-NEXT: s_mov_b32 s9, 0x40c00000 3082; GFX10-NEXT: s_mov_b32 s8, 0x40a00000 3083; GFX10-NEXT: s_mov_b32 s7, 4.0 3084; GFX10-NEXT: s_mov_b32 s6, 0x40400000 3085; GFX10-NEXT: s_mov_b32 s5, 2.0 3086; GFX10-NEXT: s_movrels_b32 s0, s4 3087; GFX10-NEXT: v_mov_b32_e32 v0, s0 3088; GFX10-NEXT: ; return to shader part epilog 3089entry: 3090 %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel 3091 ret float %ext 3092} 3093 3094define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) { 3095; GCN-LABEL: dyn_extract_v15f32_s_v: 3096; GCN: ; %bb.0: ; %entry 3097; GCN-NEXT: s_mov_b32 s0, s2 3098; GCN-NEXT: s_mov_b32 s1, s3 3099; GCN-NEXT: s_mov_b32 s2, s4 3100; GCN-NEXT: v_mov_b32_e32 v1, s0 3101; GCN-NEXT: v_mov_b32_e32 v2, s1 3102; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 3103; GCN-NEXT: s_mov_b32 s3, s5 3104; GCN-NEXT: v_mov_b32_e32 v3, s2 3105; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3106; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 3107; GCN-NEXT: s_mov_b32 s4, s6 3108; GCN-NEXT: v_mov_b32_e32 v4, s3 3109; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3110; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 3111; GCN-NEXT: s_mov_b32 s5, s7 3112; GCN-NEXT: v_mov_b32_e32 v5, s4 3113; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 3114; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 3115; GCN-NEXT: s_mov_b32 s6, s8 3116; GCN-NEXT: v_mov_b32_e32 v6, s5 3117; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 3118; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 3119; GCN-NEXT: s_mov_b32 s7, s9 3120; GCN-NEXT: v_mov_b32_e32 v7, s6 3121; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 3122; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 3123; GCN-NEXT: s_mov_b32 s8, s10 3124; GCN-NEXT: v_mov_b32_e32 v8, s7 3125; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 3126; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 3127; GCN-NEXT: s_mov_b32 s9, s11 3128; GCN-NEXT: v_mov_b32_e32 v9, s8 3129; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 3130; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 3131; GCN-NEXT: s_mov_b32 s10, s12 3132; GCN-NEXT: v_mov_b32_e32 v10, s9 3133; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 3134; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 3135; GCN-NEXT: v_mov_b32_e32 v11, s10 3136; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 3137; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 3138; GCN-NEXT: v_mov_b32_e32 v12, s13 3139; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 3140; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0 3141; GCN-NEXT: v_mov_b32_e32 v13, s14 3142; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc 3143; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0 3144; GCN-NEXT: v_mov_b32_e32 v14, s15 3145; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 3146; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0 3147; GCN-NEXT: v_mov_b32_e32 v15, s16 3148; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc 3149; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 3150; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 3151; GCN-NEXT: ; return to shader part epilog 3152; 3153; GFX10-LABEL: dyn_extract_v15f32_s_v: 3154; GFX10: ; %bb.0: ; %entry 3155; GFX10-NEXT: s_mov_b32 s1, s3 3156; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 3157; GFX10-NEXT: v_mov_b32_e32 v1, s1 3158; GFX10-NEXT: s_mov_b32 s0, s2 3159; GFX10-NEXT: s_mov_b32 s2, s4 3160; GFX10-NEXT: s_mov_b32 s3, s5 3161; GFX10-NEXT: s_mov_b32 s4, s6 3162; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo 3163; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 3164; GFX10-NEXT: s_mov_b32 s5, s7 3165; GFX10-NEXT: s_mov_b32 s6, s8 3166; GFX10-NEXT: s_mov_b32 s7, s9 3167; GFX10-NEXT: s_mov_b32 s8, s10 3168; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 3169; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 3170; GFX10-NEXT: s_mov_b32 s9, s11 3171; GFX10-NEXT: s_mov_b32 s10, s12 3172; GFX10-NEXT: s_mov_b32 s11, s13 3173; GFX10-NEXT: s_mov_b32 s12, s14 3174; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 3175; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 3176; GFX10-NEXT: s_mov_b32 s13, s15 3177; GFX10-NEXT: s_mov_b32 s14, s16 3178; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 3179; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 3180; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo 3181; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 3182; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 3183; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 3184; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo 3185; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 3186; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 3187; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 3188; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s9, vcc_lo 3189; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 3190; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 3191; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 3192; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s11, vcc_lo 3193; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 3194; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 3195; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 3196; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s13, vcc_lo 3197; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 3198; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo 3199; GFX10-NEXT: ; return to shader part epilog 3200entry: 3201 %ext = extractelement <15 x float> %vec, i32 %sel 3202 ret float %ext 3203} 3204 3205define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) { 3206; GCN-LABEL: dyn_extract_v15f32_v_v: 3207; GCN: ; %bb.0: ; %entry 3208; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3209; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 3210; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3211; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 3212; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3213; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 3214; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3215; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 3216; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3217; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 3218; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 3219; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 3220; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 3221; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 3222; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3223; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 3224; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 3225; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 3226; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 3227; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 3228; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 3229; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 3230; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 3231; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 3232; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 3233; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 3234; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc 3235; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 3236; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 3237; GCN-NEXT: s_setpc_b64 s[30:31] 3238; 3239; GFX10-LABEL: dyn_extract_v15f32_v_v: 3240; GFX10: ; %bb.0: ; %entry 3241; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3242; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3243; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 3244; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3245; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 3246; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3247; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 3248; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 3249; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 3250; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 3251; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 3252; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 3253; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 3254; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 3255; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 3256; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 3257; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 3258; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 3259; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 3260; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo 3261; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 3262; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 3263; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 3264; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo 3265; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 3266; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 3267; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 3268; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo 3269; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 3270; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 3271; GFX10-NEXT: s_setpc_b64 s[30:31] 3272entry: 3273 %ext = extractelement <15 x float> %vec, i32 %sel 3274 ret float %ext 3275} 3276 3277define amdgpu_ps float @dyn_extract_v15f32_v_s(<15 x float> %vec, i32 inreg %sel) { 3278; GPRIDX-LABEL: dyn_extract_v15f32_v_s: 3279; GPRIDX: ; %bb.0: ; %entry 3280; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) 3281; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 3282; GPRIDX-NEXT: s_set_gpr_idx_off 3283; GPRIDX-NEXT: ; return to shader part epilog 3284; 3285; MOVREL-LABEL: dyn_extract_v15f32_v_s: 3286; MOVREL: ; %bb.0: ; %entry 3287; MOVREL-NEXT: s_mov_b32 m0, s2 3288; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 3289; MOVREL-NEXT: ; return to shader part epilog 3290; 3291; GFX10-LABEL: dyn_extract_v15f32_v_s: 3292; GFX10: ; %bb.0: ; %entry 3293; GFX10-NEXT: s_mov_b32 m0, s2 3294; GFX10-NEXT: v_movrels_b32_e32 v0, v0 3295; GFX10-NEXT: ; return to shader part epilog 3296entry: 3297 %ext = extractelement <15 x float> %vec, i32 %sel 3298 ret float %ext 3299} 3300 3301define amdgpu_ps float @dyn_extract_v15f32_s_s(<15 x float> inreg %vec, i32 inreg %sel) { 3302; GCN-LABEL: dyn_extract_v15f32_s_s: 3303; GCN: ; %bb.0: ; %entry 3304; GCN-NEXT: s_mov_b32 s0, s2 3305; GCN-NEXT: s_mov_b32 m0, s17 3306; GCN-NEXT: s_mov_b32 s1, s3 3307; GCN-NEXT: s_mov_b32 s2, s4 3308; GCN-NEXT: s_mov_b32 s3, s5 3309; GCN-NEXT: s_mov_b32 s4, s6 3310; GCN-NEXT: s_mov_b32 s5, s7 3311; GCN-NEXT: s_mov_b32 s6, s8 3312; GCN-NEXT: s_mov_b32 s7, s9 3313; GCN-NEXT: s_mov_b32 s8, s10 3314; GCN-NEXT: s_mov_b32 s9, s11 3315; GCN-NEXT: s_mov_b32 s10, s12 3316; GCN-NEXT: s_mov_b32 s11, s13 3317; GCN-NEXT: s_mov_b32 s12, s14 3318; GCN-NEXT: s_mov_b32 s13, s15 3319; GCN-NEXT: s_mov_b32 s14, s16 3320; GCN-NEXT: s_movrels_b32 s0, s0 3321; GCN-NEXT: v_mov_b32_e32 v0, s0 3322; GCN-NEXT: ; return to shader part epilog 3323; 3324; GFX10-LABEL: dyn_extract_v15f32_s_s: 3325; GFX10: ; %bb.0: ; %entry 3326; GFX10-NEXT: s_mov_b32 s0, s2 3327; GFX10-NEXT: s_mov_b32 m0, s17 3328; GFX10-NEXT: s_mov_b32 s1, s3 3329; GFX10-NEXT: s_mov_b32 s2, s4 3330; GFX10-NEXT: s_mov_b32 s3, s5 3331; GFX10-NEXT: s_mov_b32 s4, s6 3332; GFX10-NEXT: s_mov_b32 s5, s7 3333; GFX10-NEXT: s_mov_b32 s6, s8 3334; GFX10-NEXT: s_mov_b32 s7, s9 3335; GFX10-NEXT: s_mov_b32 s8, s10 3336; GFX10-NEXT: s_mov_b32 s9, s11 3337; GFX10-NEXT: s_mov_b32 s10, s12 3338; GFX10-NEXT: s_mov_b32 s11, s13 3339; GFX10-NEXT: s_mov_b32 s12, s14 3340; GFX10-NEXT: s_mov_b32 s13, s15 3341; GFX10-NEXT: s_mov_b32 s14, s16 3342; GFX10-NEXT: s_movrels_b32 s0, s0 3343; GFX10-NEXT: v_mov_b32_e32 v0, s0 3344; GFX10-NEXT: ; return to shader part epilog 3345entry: 3346 %ext = extractelement <15 x float> %vec, i32 %sel 3347 ret float %ext 3348} 3349 3350define amdgpu_ps float @dyn_extract_v15f32_s_s_offset3(<15 x float> inreg %vec, i32 inreg %sel) { 3351; GCN-LABEL: dyn_extract_v15f32_s_s_offset3: 3352; GCN: ; %bb.0: ; %entry 3353; GCN-NEXT: s_mov_b32 s0, s2 3354; GCN-NEXT: s_mov_b32 s1, s3 3355; GCN-NEXT: s_mov_b32 s3, s5 3356; GCN-NEXT: s_mov_b32 m0, s17 3357; GCN-NEXT: s_mov_b32 s2, s4 3358; GCN-NEXT: s_mov_b32 s4, s6 3359; GCN-NEXT: s_mov_b32 s5, s7 3360; GCN-NEXT: s_mov_b32 s6, s8 3361; GCN-NEXT: s_mov_b32 s7, s9 3362; GCN-NEXT: s_mov_b32 s8, s10 3363; GCN-NEXT: s_mov_b32 s9, s11 3364; GCN-NEXT: s_mov_b32 s10, s12 3365; GCN-NEXT: s_mov_b32 s11, s13 3366; GCN-NEXT: s_mov_b32 s12, s14 3367; GCN-NEXT: s_mov_b32 s13, s15 3368; GCN-NEXT: s_mov_b32 s14, s16 3369; GCN-NEXT: s_movrels_b32 s0, s3 3370; GCN-NEXT: v_mov_b32_e32 v0, s0 3371; GCN-NEXT: ; return to shader part epilog 3372; 3373; GFX10-LABEL: dyn_extract_v15f32_s_s_offset3: 3374; GFX10: ; %bb.0: ; %entry 3375; GFX10-NEXT: s_mov_b32 s1, s3 3376; GFX10-NEXT: s_mov_b32 s3, s5 3377; GFX10-NEXT: s_mov_b32 m0, s17 3378; GFX10-NEXT: s_mov_b32 s0, s2 3379; GFX10-NEXT: s_mov_b32 s2, s4 3380; GFX10-NEXT: s_mov_b32 s4, s6 3381; GFX10-NEXT: s_mov_b32 s5, s7 3382; GFX10-NEXT: s_mov_b32 s6, s8 3383; GFX10-NEXT: s_mov_b32 s7, s9 3384; GFX10-NEXT: s_mov_b32 s8, s10 3385; GFX10-NEXT: s_mov_b32 s9, s11 3386; GFX10-NEXT: s_mov_b32 s10, s12 3387; GFX10-NEXT: s_mov_b32 s11, s13 3388; GFX10-NEXT: s_mov_b32 s12, s14 3389; GFX10-NEXT: s_mov_b32 s13, s15 3390; GFX10-NEXT: s_mov_b32 s14, s16 3391; GFX10-NEXT: s_movrels_b32 s0, s3 3392; GFX10-NEXT: v_mov_b32_e32 v0, s0 3393; GFX10-NEXT: ; return to shader part epilog 3394entry: 3395 %add = add i32 %sel, 3 3396 %ext = extractelement <15 x float> %vec, i32 %add 3397 ret float %ext 3398} 3399 3400define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) { 3401; GPRIDX-LABEL: dyn_extract_v15f32_v_v_offset3: 3402; GPRIDX: ; %bb.0: ; %entry 3403; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3404; GPRIDX-NEXT: v_add_u32_e32 v15, 3, v15 3405; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 3406; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3407; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 3408; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3409; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 3410; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3411; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 3412; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3413; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 3414; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 3415; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 3416; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 3417; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 3418; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3419; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 3420; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 3421; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 3422; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 3423; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 3424; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 3425; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 3426; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 3427; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 3428; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 3429; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 3430; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc 3431; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 3432; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 3433; GPRIDX-NEXT: s_setpc_b64 s[30:31] 3434; 3435; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3: 3436; MOVREL: ; %bb.0: ; %entry 3437; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3438; MOVREL-NEXT: v_add_u32_e32 v15, vcc, 3, v15 3439; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 3440; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3441; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 3442; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3443; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 3444; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3445; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 3446; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3447; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 3448; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 3449; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 3450; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 3451; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 3452; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3453; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 3454; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 3455; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 3456; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 3457; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 3458; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 3459; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 3460; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 3461; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 3462; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 3463; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 3464; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc 3465; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 3466; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 3467; MOVREL-NEXT: s_setpc_b64 s[30:31] 3468; 3469; GFX10-LABEL: dyn_extract_v15f32_v_v_offset3: 3470; GFX10: ; %bb.0: ; %entry 3471; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3472; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3473; GFX10-NEXT: v_add_nc_u32_e32 v15, 3, v15 3474; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 3475; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3476; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 3477; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3478; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 3479; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 3480; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 3481; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 3482; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 3483; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 3484; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 3485; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 3486; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 3487; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 3488; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 3489; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 3490; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 3491; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo 3492; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 3493; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 3494; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 3495; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo 3496; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 3497; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 3498; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 3499; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo 3500; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 3501; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 3502; GFX10-NEXT: s_setpc_b64 s[30:31] 3503entry: 3504 %add = add i32 %sel, 3 3505 %ext = extractelement <15 x float> %vec, i32 %add 3506 ret float %ext 3507} 3508 3509define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32 %sel) { 3510; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s: 3511; GPRIDX: .amd_kernel_code_t 3512; GPRIDX-NEXT: amd_code_version_major = 1 3513; GPRIDX-NEXT: amd_code_version_minor = 2 3514; GPRIDX-NEXT: amd_machine_kind = 1 3515; GPRIDX-NEXT: amd_machine_version_major = 9 3516; GPRIDX-NEXT: amd_machine_version_minor = 0 3517; GPRIDX-NEXT: amd_machine_version_stepping = 0 3518; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 3519; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 3520; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 3521; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 0 3522; GPRIDX-NEXT: priority = 0 3523; GPRIDX-NEXT: float_mode = 240 3524; GPRIDX-NEXT: priv = 0 3525; GPRIDX-NEXT: enable_dx10_clamp = 1 3526; GPRIDX-NEXT: debug_mode = 0 3527; GPRIDX-NEXT: enable_ieee_mode = 1 3528; GPRIDX-NEXT: enable_wgp_mode = 0 3529; GPRIDX-NEXT: enable_mem_ordered = 0 3530; GPRIDX-NEXT: enable_fwd_progress = 0 3531; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3532; GPRIDX-NEXT: user_sgpr_count = 6 3533; GPRIDX-NEXT: enable_trap_handler = 0 3534; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 3535; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 3536; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 3537; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 3538; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 3539; GPRIDX-NEXT: enable_exception_msb = 0 3540; GPRIDX-NEXT: granulated_lds_size = 0 3541; GPRIDX-NEXT: enable_exception = 0 3542; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 3543; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 3544; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 3545; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3546; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 3547; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 3548; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 3549; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3550; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3551; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3552; GPRIDX-NEXT: enable_wavefront_size32 = 0 3553; GPRIDX-NEXT: enable_ordered_append_gds = 0 3554; GPRIDX-NEXT: private_element_size = 1 3555; GPRIDX-NEXT: is_ptr64 = 1 3556; GPRIDX-NEXT: is_dynamic_callstack = 0 3557; GPRIDX-NEXT: is_debug_enabled = 0 3558; GPRIDX-NEXT: is_xnack_enabled = 1 3559; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 3560; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 3561; GPRIDX-NEXT: gds_segment_byte_size = 0 3562; GPRIDX-NEXT: kernarg_segment_byte_size = 28 3563; GPRIDX-NEXT: workgroup_fbarrier_count = 0 3564; GPRIDX-NEXT: wavefront_sgpr_count = 6 3565; GPRIDX-NEXT: workitem_vgpr_count = 2 3566; GPRIDX-NEXT: reserved_vgpr_first = 0 3567; GPRIDX-NEXT: reserved_vgpr_count = 0 3568; GPRIDX-NEXT: reserved_sgpr_first = 0 3569; GPRIDX-NEXT: reserved_sgpr_count = 0 3570; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3571; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 3572; GPRIDX-NEXT: kernarg_segment_alignment = 4 3573; GPRIDX-NEXT: group_segment_alignment = 4 3574; GPRIDX-NEXT: private_segment_alignment = 4 3575; GPRIDX-NEXT: wavefront_size = 6 3576; GPRIDX-NEXT: call_convention = -1 3577; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 3578; GPRIDX-NEXT: .end_amd_kernel_code_t 3579; GPRIDX-NEXT: ; %bb.0: ; %entry 3580; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3581; GPRIDX-NEXT: s_load_dword s2, s[4:5], 0x8 3582; GPRIDX-NEXT: v_mov_b32_e32 v1, 0 3583; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) 3584; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1 3585; GPRIDX-NEXT: s_cselect_b32 s3, 2.0, 1.0 3586; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2 3587; GPRIDX-NEXT: s_cselect_b32 s3, 0x40400000, s3 3588; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3 3589; GPRIDX-NEXT: s_cselect_b32 s2, 4.0, s3 3590; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 3591; GPRIDX-NEXT: global_store_dword v1, v0, s[0:1] 3592; GPRIDX-NEXT: s_endpgm 3593; 3594; MOVREL-LABEL: dyn_extract_v4f32_s_s_s: 3595; MOVREL: .amd_kernel_code_t 3596; MOVREL-NEXT: amd_code_version_major = 1 3597; MOVREL-NEXT: amd_code_version_minor = 2 3598; MOVREL-NEXT: amd_machine_kind = 1 3599; MOVREL-NEXT: amd_machine_version_major = 8 3600; MOVREL-NEXT: amd_machine_version_minor = 0 3601; MOVREL-NEXT: amd_machine_version_stepping = 3 3602; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 3603; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 3604; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 3605; MOVREL-NEXT: granulated_wavefront_sgpr_count = 0 3606; MOVREL-NEXT: priority = 0 3607; MOVREL-NEXT: float_mode = 240 3608; MOVREL-NEXT: priv = 0 3609; MOVREL-NEXT: enable_dx10_clamp = 1 3610; MOVREL-NEXT: debug_mode = 0 3611; MOVREL-NEXT: enable_ieee_mode = 1 3612; MOVREL-NEXT: enable_wgp_mode = 0 3613; MOVREL-NEXT: enable_mem_ordered = 0 3614; MOVREL-NEXT: enable_fwd_progress = 0 3615; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3616; MOVREL-NEXT: user_sgpr_count = 6 3617; MOVREL-NEXT: enable_trap_handler = 0 3618; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 3619; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 3620; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 3621; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 3622; MOVREL-NEXT: enable_vgpr_workitem_id = 0 3623; MOVREL-NEXT: enable_exception_msb = 0 3624; MOVREL-NEXT: granulated_lds_size = 0 3625; MOVREL-NEXT: enable_exception = 0 3626; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 3627; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 3628; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 3629; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3630; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 3631; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 3632; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 3633; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3634; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3635; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3636; MOVREL-NEXT: enable_wavefront_size32 = 0 3637; MOVREL-NEXT: enable_ordered_append_gds = 0 3638; MOVREL-NEXT: private_element_size = 1 3639; MOVREL-NEXT: is_ptr64 = 1 3640; MOVREL-NEXT: is_dynamic_callstack = 0 3641; MOVREL-NEXT: is_debug_enabled = 0 3642; MOVREL-NEXT: is_xnack_enabled = 0 3643; MOVREL-NEXT: workitem_private_segment_byte_size = 0 3644; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 3645; MOVREL-NEXT: gds_segment_byte_size = 0 3646; MOVREL-NEXT: kernarg_segment_byte_size = 28 3647; MOVREL-NEXT: workgroup_fbarrier_count = 0 3648; MOVREL-NEXT: wavefront_sgpr_count = 6 3649; MOVREL-NEXT: workitem_vgpr_count = 3 3650; MOVREL-NEXT: reserved_vgpr_first = 0 3651; MOVREL-NEXT: reserved_vgpr_count = 0 3652; MOVREL-NEXT: reserved_sgpr_first = 0 3653; MOVREL-NEXT: reserved_sgpr_count = 0 3654; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3655; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 3656; MOVREL-NEXT: kernarg_segment_alignment = 4 3657; MOVREL-NEXT: group_segment_alignment = 4 3658; MOVREL-NEXT: private_segment_alignment = 4 3659; MOVREL-NEXT: wavefront_size = 6 3660; MOVREL-NEXT: call_convention = -1 3661; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 3662; MOVREL-NEXT: .end_amd_kernel_code_t 3663; MOVREL-NEXT: ; %bb.0: ; %entry 3664; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3665; MOVREL-NEXT: s_load_dword s2, s[4:5], 0x8 3666; MOVREL-NEXT: s_waitcnt lgkmcnt(0) 3667; MOVREL-NEXT: v_mov_b32_e32 v0, s0 3668; MOVREL-NEXT: s_cmp_eq_u32 s2, 1 3669; MOVREL-NEXT: s_cselect_b32 s3, 2.0, 1.0 3670; MOVREL-NEXT: s_cmp_eq_u32 s2, 2 3671; MOVREL-NEXT: s_cselect_b32 s3, 0x40400000, s3 3672; MOVREL-NEXT: s_cmp_eq_u32 s2, 3 3673; MOVREL-NEXT: s_cselect_b32 s2, 4.0, s3 3674; MOVREL-NEXT: v_mov_b32_e32 v2, s2 3675; MOVREL-NEXT: v_mov_b32_e32 v1, s1 3676; MOVREL-NEXT: flat_store_dword v[0:1], v2 3677; MOVREL-NEXT: s_endpgm 3678; 3679; GFX10-LABEL: dyn_extract_v4f32_s_s_s: 3680; GFX10: .amd_kernel_code_t 3681; GFX10-NEXT: amd_code_version_major = 1 3682; GFX10-NEXT: amd_code_version_minor = 2 3683; GFX10-NEXT: amd_machine_kind = 1 3684; GFX10-NEXT: amd_machine_version_major = 10 3685; GFX10-NEXT: amd_machine_version_minor = 1 3686; GFX10-NEXT: amd_machine_version_stepping = 0 3687; GFX10-NEXT: kernel_code_entry_byte_offset = 256 3688; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 3689; GFX10-NEXT: granulated_workitem_vgpr_count = 0 3690; GFX10-NEXT: granulated_wavefront_sgpr_count = 0 3691; GFX10-NEXT: priority = 0 3692; GFX10-NEXT: float_mode = 240 3693; GFX10-NEXT: priv = 0 3694; GFX10-NEXT: enable_dx10_clamp = 1 3695; GFX10-NEXT: debug_mode = 0 3696; GFX10-NEXT: enable_ieee_mode = 1 3697; GFX10-NEXT: enable_wgp_mode = 1 3698; GFX10-NEXT: enable_mem_ordered = 1 3699; GFX10-NEXT: enable_fwd_progress = 0 3700; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3701; GFX10-NEXT: user_sgpr_count = 6 3702; GFX10-NEXT: enable_trap_handler = 0 3703; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 3704; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0 3705; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0 3706; GFX10-NEXT: enable_sgpr_workgroup_info = 0 3707; GFX10-NEXT: enable_vgpr_workitem_id = 0 3708; GFX10-NEXT: enable_exception_msb = 0 3709; GFX10-NEXT: granulated_lds_size = 0 3710; GFX10-NEXT: enable_exception = 0 3711; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 3712; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0 3713; GFX10-NEXT: enable_sgpr_queue_ptr = 0 3714; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3715; GFX10-NEXT: enable_sgpr_dispatch_id = 0 3716; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 3717; GFX10-NEXT: enable_sgpr_private_segment_size = 0 3718; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3719; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3720; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3721; GFX10-NEXT: enable_wavefront_size32 = 1 3722; GFX10-NEXT: enable_ordered_append_gds = 0 3723; GFX10-NEXT: private_element_size = 1 3724; GFX10-NEXT: is_ptr64 = 1 3725; GFX10-NEXT: is_dynamic_callstack = 0 3726; GFX10-NEXT: is_debug_enabled = 0 3727; GFX10-NEXT: is_xnack_enabled = 1 3728; GFX10-NEXT: workitem_private_segment_byte_size = 0 3729; GFX10-NEXT: workgroup_group_segment_byte_size = 0 3730; GFX10-NEXT: gds_segment_byte_size = 0 3731; GFX10-NEXT: kernarg_segment_byte_size = 28 3732; GFX10-NEXT: workgroup_fbarrier_count = 0 3733; GFX10-NEXT: wavefront_sgpr_count = 6 3734; GFX10-NEXT: workitem_vgpr_count = 2 3735; GFX10-NEXT: reserved_vgpr_first = 0 3736; GFX10-NEXT: reserved_vgpr_count = 0 3737; GFX10-NEXT: reserved_sgpr_first = 0 3738; GFX10-NEXT: reserved_sgpr_count = 0 3739; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3740; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 3741; GFX10-NEXT: kernarg_segment_alignment = 4 3742; GFX10-NEXT: group_segment_alignment = 4 3743; GFX10-NEXT: private_segment_alignment = 4 3744; GFX10-NEXT: wavefront_size = 5 3745; GFX10-NEXT: call_convention = -1 3746; GFX10-NEXT: runtime_loader_kernel_symbol = 0 3747; GFX10-NEXT: .end_amd_kernel_code_t 3748; GFX10-NEXT: ; %bb.0: ; %entry 3749; GFX10-NEXT: s_clause 0x1 3750; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 3751; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3752; GFX10-NEXT: v_mov_b32_e32 v1, 0 3753; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3754; GFX10-NEXT: s_cmp_eq_u32 s2, 1 3755; GFX10-NEXT: s_cselect_b32 s3, 2.0, 1.0 3756; GFX10-NEXT: s_cmp_eq_u32 s2, 2 3757; GFX10-NEXT: s_cselect_b32 s3, 0x40400000, s3 3758; GFX10-NEXT: s_cmp_eq_u32 s2, 3 3759; GFX10-NEXT: s_cselect_b32 s2, 4.0, s3 3760; GFX10-NEXT: v_mov_b32_e32 v0, s2 3761; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 3762; GFX10-NEXT: s_endpgm 3763entry: 3764 %ext = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %sel 3765 store float %ext, float addrspace(1)* %out 3766 ret void 3767} 3768 3769define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i32 %sel) { 3770; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s: 3771; GPRIDX: .amd_kernel_code_t 3772; GPRIDX-NEXT: amd_code_version_major = 1 3773; GPRIDX-NEXT: amd_code_version_minor = 2 3774; GPRIDX-NEXT: amd_machine_kind = 1 3775; GPRIDX-NEXT: amd_machine_version_major = 9 3776; GPRIDX-NEXT: amd_machine_version_minor = 0 3777; GPRIDX-NEXT: amd_machine_version_stepping = 0 3778; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 3779; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 3780; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 3781; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 0 3782; GPRIDX-NEXT: priority = 0 3783; GPRIDX-NEXT: float_mode = 240 3784; GPRIDX-NEXT: priv = 0 3785; GPRIDX-NEXT: enable_dx10_clamp = 1 3786; GPRIDX-NEXT: debug_mode = 0 3787; GPRIDX-NEXT: enable_ieee_mode = 1 3788; GPRIDX-NEXT: enable_wgp_mode = 0 3789; GPRIDX-NEXT: enable_mem_ordered = 0 3790; GPRIDX-NEXT: enable_fwd_progress = 0 3791; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3792; GPRIDX-NEXT: user_sgpr_count = 6 3793; GPRIDX-NEXT: enable_trap_handler = 0 3794; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 3795; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 3796; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 3797; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 3798; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 3799; GPRIDX-NEXT: enable_exception_msb = 0 3800; GPRIDX-NEXT: granulated_lds_size = 0 3801; GPRIDX-NEXT: enable_exception = 0 3802; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 3803; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 3804; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 3805; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3806; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 3807; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 3808; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 3809; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3810; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3811; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3812; GPRIDX-NEXT: enable_wavefront_size32 = 0 3813; GPRIDX-NEXT: enable_ordered_append_gds = 0 3814; GPRIDX-NEXT: private_element_size = 1 3815; GPRIDX-NEXT: is_ptr64 = 1 3816; GPRIDX-NEXT: is_dynamic_callstack = 0 3817; GPRIDX-NEXT: is_debug_enabled = 0 3818; GPRIDX-NEXT: is_xnack_enabled = 1 3819; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 3820; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 3821; GPRIDX-NEXT: gds_segment_byte_size = 0 3822; GPRIDX-NEXT: kernarg_segment_byte_size = 28 3823; GPRIDX-NEXT: workgroup_fbarrier_count = 0 3824; GPRIDX-NEXT: wavefront_sgpr_count = 7 3825; GPRIDX-NEXT: workitem_vgpr_count = 3 3826; GPRIDX-NEXT: reserved_vgpr_first = 0 3827; GPRIDX-NEXT: reserved_vgpr_count = 0 3828; GPRIDX-NEXT: reserved_sgpr_first = 0 3829; GPRIDX-NEXT: reserved_sgpr_count = 0 3830; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3831; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 3832; GPRIDX-NEXT: kernarg_segment_alignment = 4 3833; GPRIDX-NEXT: group_segment_alignment = 4 3834; GPRIDX-NEXT: private_segment_alignment = 4 3835; GPRIDX-NEXT: wavefront_size = 6 3836; GPRIDX-NEXT: call_convention = -1 3837; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 3838; GPRIDX-NEXT: .end_amd_kernel_code_t 3839; GPRIDX-NEXT: ; %bb.0: ; %entry 3840; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3841; GPRIDX-NEXT: s_load_dword s6, s[4:5], 0x8 3842; GPRIDX-NEXT: s_mov_b32 s2, 0 3843; GPRIDX-NEXT: s_mov_b32 s3, 0x40080000 3844; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 3845; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) 3846; GPRIDX-NEXT: s_cmp_eq_u32 s6, 1 3847; GPRIDX-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 3848; GPRIDX-NEXT: s_cmp_eq_u32 s6, 2 3849; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 3850; GPRIDX-NEXT: s_cmp_eq_u32 s6, 3 3851; GPRIDX-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 3852; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 3853; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 3854; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 3855; GPRIDX-NEXT: s_endpgm 3856; 3857; MOVREL-LABEL: dyn_extract_v4f64_s_s_s: 3858; MOVREL: .amd_kernel_code_t 3859; MOVREL-NEXT: amd_code_version_major = 1 3860; MOVREL-NEXT: amd_code_version_minor = 2 3861; MOVREL-NEXT: amd_machine_kind = 1 3862; MOVREL-NEXT: amd_machine_version_major = 8 3863; MOVREL-NEXT: amd_machine_version_minor = 0 3864; MOVREL-NEXT: amd_machine_version_stepping = 3 3865; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 3866; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 3867; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 3868; MOVREL-NEXT: granulated_wavefront_sgpr_count = 0 3869; MOVREL-NEXT: priority = 0 3870; MOVREL-NEXT: float_mode = 240 3871; MOVREL-NEXT: priv = 0 3872; MOVREL-NEXT: enable_dx10_clamp = 1 3873; MOVREL-NEXT: debug_mode = 0 3874; MOVREL-NEXT: enable_ieee_mode = 1 3875; MOVREL-NEXT: enable_wgp_mode = 0 3876; MOVREL-NEXT: enable_mem_ordered = 0 3877; MOVREL-NEXT: enable_fwd_progress = 0 3878; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3879; MOVREL-NEXT: user_sgpr_count = 6 3880; MOVREL-NEXT: enable_trap_handler = 0 3881; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 3882; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 3883; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 3884; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 3885; MOVREL-NEXT: enable_vgpr_workitem_id = 0 3886; MOVREL-NEXT: enable_exception_msb = 0 3887; MOVREL-NEXT: granulated_lds_size = 0 3888; MOVREL-NEXT: enable_exception = 0 3889; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 3890; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 3891; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 3892; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3893; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 3894; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 3895; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 3896; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3897; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3898; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3899; MOVREL-NEXT: enable_wavefront_size32 = 0 3900; MOVREL-NEXT: enable_ordered_append_gds = 0 3901; MOVREL-NEXT: private_element_size = 1 3902; MOVREL-NEXT: is_ptr64 = 1 3903; MOVREL-NEXT: is_dynamic_callstack = 0 3904; MOVREL-NEXT: is_debug_enabled = 0 3905; MOVREL-NEXT: is_xnack_enabled = 0 3906; MOVREL-NEXT: workitem_private_segment_byte_size = 0 3907; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 3908; MOVREL-NEXT: gds_segment_byte_size = 0 3909; MOVREL-NEXT: kernarg_segment_byte_size = 28 3910; MOVREL-NEXT: workgroup_fbarrier_count = 0 3911; MOVREL-NEXT: wavefront_sgpr_count = 7 3912; MOVREL-NEXT: workitem_vgpr_count = 4 3913; MOVREL-NEXT: reserved_vgpr_first = 0 3914; MOVREL-NEXT: reserved_vgpr_count = 0 3915; MOVREL-NEXT: reserved_sgpr_first = 0 3916; MOVREL-NEXT: reserved_sgpr_count = 0 3917; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3918; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 3919; MOVREL-NEXT: kernarg_segment_alignment = 4 3920; MOVREL-NEXT: group_segment_alignment = 4 3921; MOVREL-NEXT: private_segment_alignment = 4 3922; MOVREL-NEXT: wavefront_size = 6 3923; MOVREL-NEXT: call_convention = -1 3924; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 3925; MOVREL-NEXT: .end_amd_kernel_code_t 3926; MOVREL-NEXT: ; %bb.0: ; %entry 3927; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3928; MOVREL-NEXT: s_load_dword s6, s[4:5], 0x8 3929; MOVREL-NEXT: s_mov_b32 s2, 0 3930; MOVREL-NEXT: s_mov_b32 s3, 0x40080000 3931; MOVREL-NEXT: s_waitcnt lgkmcnt(0) 3932; MOVREL-NEXT: v_mov_b32_e32 v3, s1 3933; MOVREL-NEXT: s_cmp_eq_u32 s6, 1 3934; MOVREL-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 3935; MOVREL-NEXT: s_cmp_eq_u32 s6, 2 3936; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 3937; MOVREL-NEXT: s_cmp_eq_u32 s6, 3 3938; MOVREL-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 3939; MOVREL-NEXT: v_mov_b32_e32 v0, s2 3940; MOVREL-NEXT: v_mov_b32_e32 v1, s3 3941; MOVREL-NEXT: v_mov_b32_e32 v2, s0 3942; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 3943; MOVREL-NEXT: s_endpgm 3944; 3945; GFX10-LABEL: dyn_extract_v4f64_s_s_s: 3946; GFX10: .amd_kernel_code_t 3947; GFX10-NEXT: amd_code_version_major = 1 3948; GFX10-NEXT: amd_code_version_minor = 2 3949; GFX10-NEXT: amd_machine_kind = 1 3950; GFX10-NEXT: amd_machine_version_major = 10 3951; GFX10-NEXT: amd_machine_version_minor = 1 3952; GFX10-NEXT: amd_machine_version_stepping = 0 3953; GFX10-NEXT: kernel_code_entry_byte_offset = 256 3954; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 3955; GFX10-NEXT: granulated_workitem_vgpr_count = 0 3956; GFX10-NEXT: granulated_wavefront_sgpr_count = 0 3957; GFX10-NEXT: priority = 0 3958; GFX10-NEXT: float_mode = 240 3959; GFX10-NEXT: priv = 0 3960; GFX10-NEXT: enable_dx10_clamp = 1 3961; GFX10-NEXT: debug_mode = 0 3962; GFX10-NEXT: enable_ieee_mode = 1 3963; GFX10-NEXT: enable_wgp_mode = 1 3964; GFX10-NEXT: enable_mem_ordered = 1 3965; GFX10-NEXT: enable_fwd_progress = 0 3966; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3967; GFX10-NEXT: user_sgpr_count = 6 3968; GFX10-NEXT: enable_trap_handler = 0 3969; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 3970; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0 3971; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0 3972; GFX10-NEXT: enable_sgpr_workgroup_info = 0 3973; GFX10-NEXT: enable_vgpr_workitem_id = 0 3974; GFX10-NEXT: enable_exception_msb = 0 3975; GFX10-NEXT: granulated_lds_size = 0 3976; GFX10-NEXT: enable_exception = 0 3977; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 3978; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0 3979; GFX10-NEXT: enable_sgpr_queue_ptr = 0 3980; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3981; GFX10-NEXT: enable_sgpr_dispatch_id = 0 3982; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 3983; GFX10-NEXT: enable_sgpr_private_segment_size = 0 3984; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3985; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3986; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3987; GFX10-NEXT: enable_wavefront_size32 = 1 3988; GFX10-NEXT: enable_ordered_append_gds = 0 3989; GFX10-NEXT: private_element_size = 1 3990; GFX10-NEXT: is_ptr64 = 1 3991; GFX10-NEXT: is_dynamic_callstack = 0 3992; GFX10-NEXT: is_debug_enabled = 0 3993; GFX10-NEXT: is_xnack_enabled = 1 3994; GFX10-NEXT: workitem_private_segment_byte_size = 0 3995; GFX10-NEXT: workgroup_group_segment_byte_size = 0 3996; GFX10-NEXT: gds_segment_byte_size = 0 3997; GFX10-NEXT: kernarg_segment_byte_size = 28 3998; GFX10-NEXT: workgroup_fbarrier_count = 0 3999; GFX10-NEXT: wavefront_sgpr_count = 7 4000; GFX10-NEXT: workitem_vgpr_count = 3 4001; GFX10-NEXT: reserved_vgpr_first = 0 4002; GFX10-NEXT: reserved_vgpr_count = 0 4003; GFX10-NEXT: reserved_sgpr_first = 0 4004; GFX10-NEXT: reserved_sgpr_count = 0 4005; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4006; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 4007; GFX10-NEXT: kernarg_segment_alignment = 4 4008; GFX10-NEXT: group_segment_alignment = 4 4009; GFX10-NEXT: private_segment_alignment = 4 4010; GFX10-NEXT: wavefront_size = 5 4011; GFX10-NEXT: call_convention = -1 4012; GFX10-NEXT: runtime_loader_kernel_symbol = 0 4013; GFX10-NEXT: .end_amd_kernel_code_t 4014; GFX10-NEXT: ; %bb.0: ; %entry 4015; GFX10-NEXT: s_clause 0x1 4016; GFX10-NEXT: s_load_dword s6, s[4:5], 0x8 4017; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4018; GFX10-NEXT: s_mov_b32 s2, 0 4019; GFX10-NEXT: s_mov_b32 s3, 0x40080000 4020; GFX10-NEXT: v_mov_b32_e32 v2, 0 4021; GFX10-NEXT: s_waitcnt lgkmcnt(0) 4022; GFX10-NEXT: s_cmp_eq_u32 s6, 1 4023; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 4024; GFX10-NEXT: s_cmp_eq_u32 s6, 2 4025; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 4026; GFX10-NEXT: s_cmp_eq_u32 s6, 3 4027; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 4028; GFX10-NEXT: v_mov_b32_e32 v0, s2 4029; GFX10-NEXT: v_mov_b32_e32 v1, s3 4030; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 4031; GFX10-NEXT: s_endpgm 4032entry: 4033 %ext = extractelement <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, i32 %sel 4034 store double %ext, double addrspace(1)* %out 4035 ret void 4036} 4037 4038define i32 @v_extract_v64i32_7(<64 x i32> addrspace(1)* %ptr) { 4039; GPRIDX-LABEL: v_extract_v64i32_7: 4040; GPRIDX: ; %bb.0: 4041; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4042; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 4043; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4044; GPRIDX-NEXT: v_mov_b32_e32 v0, v7 4045; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4046; 4047; MOVREL-LABEL: v_extract_v64i32_7: 4048; MOVREL: ; %bb.0: 4049; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4050; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 4051; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4052; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] 4053; MOVREL-NEXT: s_waitcnt vmcnt(0) 4054; MOVREL-NEXT: v_mov_b32_e32 v0, v7 4055; MOVREL-NEXT: s_setpc_b64 s[30:31] 4056; 4057; GFX10-LABEL: v_extract_v64i32_7: 4058; GFX10: ; %bb.0: 4059; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4060; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4061; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 4062; GFX10-NEXT: s_waitcnt vmcnt(0) 4063; GFX10-NEXT: v_mov_b32_e32 v0, v7 4064; GFX10-NEXT: s_setpc_b64 s[30:31] 4065 %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr 4066 %elt = extractelement <64 x i32> %vec, i32 7 4067 ret i32 %elt 4068} 4069 4070define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) { 4071; GPRIDX-LABEL: v_extract_v64i32_32: 4072; GPRIDX: ; %bb.0: 4073; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4074; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 4075; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4076; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4077; 4078; MOVREL-LABEL: v_extract_v64i32_32: 4079; MOVREL: ; %bb.0: 4080; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4081; MOVREL-NEXT: s_mov_b64 s[4:5], 0x80 4082; MOVREL-NEXT: v_mov_b32_e32 v2, s4 4083; MOVREL-NEXT: v_mov_b32_e32 v3, s5 4084; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 4085; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 4086; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4087; MOVREL-NEXT: s_waitcnt vmcnt(0) 4088; MOVREL-NEXT: s_setpc_b64 s[30:31] 4089; 4090; GFX10-LABEL: v_extract_v64i32_32: 4091; GFX10: ; %bb.0: 4092; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4093; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4094; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 4095; GFX10-NEXT: s_waitcnt vmcnt(0) 4096; GFX10-NEXT: s_setpc_b64 s[30:31] 4097 %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr 4098 %elt = extractelement <64 x i32> %vec, i32 32 4099 ret i32 %elt 4100} 4101 4102define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) { 4103; GPRIDX-LABEL: v_extract_v64i32_33: 4104; GPRIDX: ; %bb.0: 4105; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4106; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 4107; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4108; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 4109; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4110; 4111; MOVREL-LABEL: v_extract_v64i32_33: 4112; MOVREL: ; %bb.0: 4113; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4114; MOVREL-NEXT: s_mov_b64 s[4:5], 0x80 4115; MOVREL-NEXT: v_mov_b32_e32 v2, s4 4116; MOVREL-NEXT: v_mov_b32_e32 v3, s5 4117; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 4118; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 4119; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4120; MOVREL-NEXT: s_waitcnt vmcnt(0) 4121; MOVREL-NEXT: v_mov_b32_e32 v0, v1 4122; MOVREL-NEXT: s_setpc_b64 s[30:31] 4123; 4124; GFX10-LABEL: v_extract_v64i32_33: 4125; GFX10: ; %bb.0: 4126; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4127; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4128; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 4129; GFX10-NEXT: s_waitcnt vmcnt(0) 4130; GFX10-NEXT: v_mov_b32_e32 v0, v1 4131; GFX10-NEXT: s_setpc_b64 s[30:31] 4132 %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr 4133 %elt = extractelement <64 x i32> %vec, i32 33 4134 ret i32 %elt 4135} 4136 4137define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) { 4138; GPRIDX-LABEL: v_extract_v64i32_37: 4139; GPRIDX: ; %bb.0: 4140; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4141; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:144 4142; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4143; GPRIDX-NEXT: v_mov_b32_e32 v0, v5 4144; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4145; 4146; MOVREL-LABEL: v_extract_v64i32_37: 4147; MOVREL: ; %bb.0: 4148; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4149; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x90, v0 4150; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4151; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] 4152; MOVREL-NEXT: s_waitcnt vmcnt(0) 4153; MOVREL-NEXT: v_mov_b32_e32 v0, v5 4154; MOVREL-NEXT: s_setpc_b64 s[30:31] 4155; 4156; GFX10-LABEL: v_extract_v64i32_37: 4157; GFX10: ; %bb.0: 4158; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4159; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4160; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:144 4161; GFX10-NEXT: s_waitcnt vmcnt(0) 4162; GFX10-NEXT: v_mov_b32_e32 v0, v5 4163; GFX10-NEXT: s_setpc_b64 s[30:31] 4164 %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr 4165 %elt = extractelement <64 x i32> %vec, i32 37 4166 ret i32 %elt 4167} 4168