1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s 3 4; Check lowering of some large insertelement that use the stack 5; instead of register indexing. 6 7define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.ptr, <64 x i32> addrspace(1)* %ptr, i32 %val, i32 %idx) #0 { 8; GCN-LABEL: v_insert_v64i32_varidx: 9; GCN: ; %bb.0: 10; GCN-NEXT: s_add_u32 s0, s0, s7 11; GCN-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 12; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 13; GCN-NEXT: s_addc_u32 s1, s1, 0 14; GCN-NEXT: v_mov_b32_e32 v16, 0x100 15; GCN-NEXT: v_mov_b32_e32 v64, 0 16; GCN-NEXT: s_waitcnt lgkmcnt(0) 17; GCN-NEXT: s_load_dwordx16 s[36:51], s[10:11], 0x0 18; GCN-NEXT: s_load_dwordx16 s[52:67], s[10:11], 0x40 19; GCN-NEXT: s_load_dwordx16 s[12:27], s[10:11], 0x80 20; GCN-NEXT: s_and_b32 s4, s7, 63 21; GCN-NEXT: s_lshl_b32 s4, s4, 2 22; GCN-NEXT: s_waitcnt lgkmcnt(0) 23; GCN-NEXT: v_mov_b32_e32 v0, s36 24; GCN-NEXT: v_mov_b32_e32 v1, s37 25; GCN-NEXT: v_mov_b32_e32 v2, s38 26; GCN-NEXT: v_mov_b32_e32 v3, s39 27; GCN-NEXT: v_mov_b32_e32 v4, s40 28; GCN-NEXT: v_mov_b32_e32 v5, s41 29; GCN-NEXT: v_mov_b32_e32 v6, s42 30; GCN-NEXT: v_mov_b32_e32 v7, s43 31; GCN-NEXT: v_mov_b32_e32 v8, s44 32; GCN-NEXT: v_mov_b32_e32 v9, s45 33; GCN-NEXT: v_mov_b32_e32 v10, s46 34; GCN-NEXT: v_mov_b32_e32 v11, s47 35; GCN-NEXT: v_mov_b32_e32 v12, s48 36; GCN-NEXT: v_mov_b32_e32 v13, s49 37; GCN-NEXT: v_mov_b32_e32 v14, s50 38; GCN-NEXT: v_mov_b32_e32 v15, s51 39; GCN-NEXT: s_load_dwordx16 s[36:51], s[10:11], 0xc0 40; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:256 41; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:260 42; GCN-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:264 43; GCN-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:268 44; GCN-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:272 45; GCN-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:276 46; GCN-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:280 47; GCN-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284 48; GCN-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:288 49; GCN-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:292 50; GCN-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:296 51; GCN-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:300 52; GCN-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:304 53; GCN-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:308 54; GCN-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:312 55; GCN-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:316 56; GCN-NEXT: v_mov_b32_e32 v0, s52 57; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:320 58; GCN-NEXT: v_mov_b32_e32 v0, s53 59; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:324 60; GCN-NEXT: v_mov_b32_e32 v0, s54 61; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:328 62; GCN-NEXT: v_mov_b32_e32 v0, s55 63; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:332 64; GCN-NEXT: v_mov_b32_e32 v0, s56 65; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:336 66; GCN-NEXT: v_mov_b32_e32 v0, s57 67; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:340 68; GCN-NEXT: v_mov_b32_e32 v0, s58 69; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:344 70; GCN-NEXT: v_mov_b32_e32 v0, s59 71; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:348 72; GCN-NEXT: v_mov_b32_e32 v0, s60 73; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:352 74; GCN-NEXT: v_mov_b32_e32 v0, s61 75; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:356 76; GCN-NEXT: v_mov_b32_e32 v0, s62 77; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:360 78; GCN-NEXT: v_mov_b32_e32 v0, s63 79; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:364 80; GCN-NEXT: v_mov_b32_e32 v0, s64 81; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:368 82; GCN-NEXT: v_mov_b32_e32 v0, s65 83; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:372 84; GCN-NEXT: v_mov_b32_e32 v0, s66 85; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:376 86; GCN-NEXT: v_mov_b32_e32 v0, s67 87; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:380 88; GCN-NEXT: v_mov_b32_e32 v0, s12 89; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:384 90; GCN-NEXT: v_mov_b32_e32 v0, s13 91; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:388 92; GCN-NEXT: v_mov_b32_e32 v0, s14 93; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:392 94; GCN-NEXT: v_mov_b32_e32 v0, s15 95; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:396 96; GCN-NEXT: v_mov_b32_e32 v0, s16 97; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:400 98; GCN-NEXT: v_mov_b32_e32 v0, s17 99; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:404 100; GCN-NEXT: v_mov_b32_e32 v0, s18 101; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:408 102; GCN-NEXT: v_mov_b32_e32 v0, s19 103; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:412 104; GCN-NEXT: v_mov_b32_e32 v0, s20 105; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:416 106; GCN-NEXT: v_mov_b32_e32 v0, s21 107; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:420 108; GCN-NEXT: v_mov_b32_e32 v0, s22 109; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:424 110; GCN-NEXT: v_mov_b32_e32 v0, s23 111; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:428 112; GCN-NEXT: v_mov_b32_e32 v0, s24 113; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:432 114; GCN-NEXT: v_mov_b32_e32 v0, s25 115; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:436 116; GCN-NEXT: v_mov_b32_e32 v0, s26 117; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:440 118; GCN-NEXT: v_mov_b32_e32 v0, s27 119; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:444 120; GCN-NEXT: s_waitcnt lgkmcnt(0) 121; GCN-NEXT: v_mov_b32_e32 v0, s36 122; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:448 123; GCN-NEXT: v_mov_b32_e32 v0, s37 124; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:452 125; GCN-NEXT: v_mov_b32_e32 v0, s38 126; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:456 127; GCN-NEXT: v_mov_b32_e32 v0, s39 128; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:460 129; GCN-NEXT: v_mov_b32_e32 v0, s40 130; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:464 131; GCN-NEXT: v_mov_b32_e32 v0, s41 132; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:468 133; GCN-NEXT: v_mov_b32_e32 v0, s42 134; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:472 135; GCN-NEXT: v_mov_b32_e32 v0, s43 136; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:476 137; GCN-NEXT: v_mov_b32_e32 v0, s44 138; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:480 139; GCN-NEXT: v_mov_b32_e32 v0, s45 140; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:484 141; GCN-NEXT: v_mov_b32_e32 v0, s46 142; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:488 143; GCN-NEXT: v_mov_b32_e32 v0, s47 144; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:492 145; GCN-NEXT: v_mov_b32_e32 v0, s48 146; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:496 147; GCN-NEXT: v_mov_b32_e32 v0, s49 148; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:500 149; GCN-NEXT: v_mov_b32_e32 v0, s50 150; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:504 151; GCN-NEXT: v_mov_b32_e32 v0, s51 152; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:508 153; GCN-NEXT: v_add_u32_e32 v0, s4, v16 154; GCN-NEXT: v_mov_b32_e32 v1, s6 155; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 156; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:256 157; GCN-NEXT: s_nop 0 158; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:260 159; GCN-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:264 160; GCN-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:268 161; GCN-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:272 162; GCN-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:276 163; GCN-NEXT: buffer_load_dword v6, off, s[0:3], 0 offset:280 164; GCN-NEXT: buffer_load_dword v7, off, s[0:3], 0 offset:284 165; GCN-NEXT: buffer_load_dword v8, off, s[0:3], 0 offset:288 166; GCN-NEXT: buffer_load_dword v9, off, s[0:3], 0 offset:292 167; GCN-NEXT: buffer_load_dword v10, off, s[0:3], 0 offset:296 168; GCN-NEXT: buffer_load_dword v11, off, s[0:3], 0 offset:300 169; GCN-NEXT: buffer_load_dword v12, off, s[0:3], 0 offset:304 170; GCN-NEXT: buffer_load_dword v13, off, s[0:3], 0 offset:308 171; GCN-NEXT: buffer_load_dword v14, off, s[0:3], 0 offset:312 172; GCN-NEXT: buffer_load_dword v15, off, s[0:3], 0 offset:316 173; GCN-NEXT: buffer_load_dword v16, off, s[0:3], 0 offset:320 174; GCN-NEXT: buffer_load_dword v17, off, s[0:3], 0 offset:324 175; GCN-NEXT: buffer_load_dword v18, off, s[0:3], 0 offset:328 176; GCN-NEXT: buffer_load_dword v19, off, s[0:3], 0 offset:332 177; GCN-NEXT: buffer_load_dword v20, off, s[0:3], 0 offset:336 178; GCN-NEXT: buffer_load_dword v21, off, s[0:3], 0 offset:340 179; GCN-NEXT: buffer_load_dword v22, off, s[0:3], 0 offset:344 180; GCN-NEXT: buffer_load_dword v23, off, s[0:3], 0 offset:348 181; GCN-NEXT: buffer_load_dword v24, off, s[0:3], 0 offset:352 182; GCN-NEXT: buffer_load_dword v25, off, s[0:3], 0 offset:356 183; GCN-NEXT: buffer_load_dword v26, off, s[0:3], 0 offset:360 184; GCN-NEXT: buffer_load_dword v27, off, s[0:3], 0 offset:364 185; GCN-NEXT: buffer_load_dword v28, off, s[0:3], 0 offset:368 186; GCN-NEXT: buffer_load_dword v29, off, s[0:3], 0 offset:372 187; GCN-NEXT: buffer_load_dword v30, off, s[0:3], 0 offset:376 188; GCN-NEXT: buffer_load_dword v31, off, s[0:3], 0 offset:380 189; GCN-NEXT: buffer_load_dword v32, off, s[0:3], 0 offset:384 190; GCN-NEXT: buffer_load_dword v33, off, s[0:3], 0 offset:388 191; GCN-NEXT: buffer_load_dword v34, off, s[0:3], 0 offset:392 192; GCN-NEXT: buffer_load_dword v35, off, s[0:3], 0 offset:396 193; GCN-NEXT: buffer_load_dword v36, off, s[0:3], 0 offset:400 194; GCN-NEXT: buffer_load_dword v37, off, s[0:3], 0 offset:404 195; GCN-NEXT: buffer_load_dword v38, off, s[0:3], 0 offset:408 196; GCN-NEXT: buffer_load_dword v39, off, s[0:3], 0 offset:412 197; GCN-NEXT: buffer_load_dword v40, off, s[0:3], 0 offset:416 198; GCN-NEXT: buffer_load_dword v41, off, s[0:3], 0 offset:420 199; GCN-NEXT: buffer_load_dword v42, off, s[0:3], 0 offset:424 200; GCN-NEXT: buffer_load_dword v43, off, s[0:3], 0 offset:428 201; GCN-NEXT: buffer_load_dword v44, off, s[0:3], 0 offset:432 202; GCN-NEXT: buffer_load_dword v45, off, s[0:3], 0 offset:436 203; GCN-NEXT: buffer_load_dword v46, off, s[0:3], 0 offset:440 204; GCN-NEXT: buffer_load_dword v47, off, s[0:3], 0 offset:444 205; GCN-NEXT: buffer_load_dword v48, off, s[0:3], 0 offset:448 206; GCN-NEXT: buffer_load_dword v49, off, s[0:3], 0 offset:452 207; GCN-NEXT: buffer_load_dword v50, off, s[0:3], 0 offset:456 208; GCN-NEXT: buffer_load_dword v51, off, s[0:3], 0 offset:460 209; GCN-NEXT: buffer_load_dword v52, off, s[0:3], 0 offset:464 210; GCN-NEXT: buffer_load_dword v53, off, s[0:3], 0 offset:468 211; GCN-NEXT: buffer_load_dword v54, off, s[0:3], 0 offset:472 212; GCN-NEXT: buffer_load_dword v55, off, s[0:3], 0 offset:476 213; GCN-NEXT: buffer_load_dword v56, off, s[0:3], 0 offset:480 214; GCN-NEXT: buffer_load_dword v57, off, s[0:3], 0 offset:484 215; GCN-NEXT: buffer_load_dword v58, off, s[0:3], 0 offset:488 216; GCN-NEXT: buffer_load_dword v59, off, s[0:3], 0 offset:492 217; GCN-NEXT: buffer_load_dword v60, off, s[0:3], 0 offset:496 218; GCN-NEXT: buffer_load_dword v61, off, s[0:3], 0 offset:500 219; GCN-NEXT: buffer_load_dword v62, off, s[0:3], 0 offset:504 220; GCN-NEXT: buffer_load_dword v63, off, s[0:3], 0 offset:508 221; GCN-NEXT: s_waitcnt vmcnt(60) 222; GCN-NEXT: global_store_dwordx4 v64, v[0:3], s[8:9] 223; GCN-NEXT: s_waitcnt vmcnt(57) 224; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[8:9] offset:16 225; GCN-NEXT: s_waitcnt vmcnt(54) 226; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[8:9] offset:32 227; GCN-NEXT: s_waitcnt vmcnt(51) 228; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[8:9] offset:48 229; GCN-NEXT: s_waitcnt vmcnt(48) 230; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[8:9] offset:64 231; GCN-NEXT: s_waitcnt vmcnt(45) 232; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[8:9] offset:80 233; GCN-NEXT: s_waitcnt vmcnt(42) 234; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[8:9] offset:96 235; GCN-NEXT: s_waitcnt vmcnt(39) 236; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[8:9] offset:112 237; GCN-NEXT: s_waitcnt vmcnt(36) 238; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[8:9] offset:128 239; GCN-NEXT: s_waitcnt vmcnt(33) 240; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[8:9] offset:144 241; GCN-NEXT: s_waitcnt vmcnt(30) 242; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[8:9] offset:160 243; GCN-NEXT: s_waitcnt vmcnt(27) 244; GCN-NEXT: global_store_dwordx4 v64, v[44:47], s[8:9] offset:176 245; GCN-NEXT: s_waitcnt vmcnt(24) 246; GCN-NEXT: global_store_dwordx4 v64, v[48:51], s[8:9] offset:192 247; GCN-NEXT: s_waitcnt vmcnt(21) 248; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[8:9] offset:208 249; GCN-NEXT: s_waitcnt vmcnt(18) 250; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[8:9] offset:224 251; GCN-NEXT: s_waitcnt vmcnt(15) 252; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[8:9] offset:240 253; GCN-NEXT: s_endpgm 254 %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr 255 %insert = insertelement <64 x i32> %vec, i32 %val, i32 %idx 256 store <64 x i32> %insert, <64 x i32> addrspace(1)* %out.ptr 257 ret void 258} 259 260attributes #0 = { "amdgpu-waves-per-eu"="1,10" } 261