1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefix=GFX7 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s 5 6; FIXME: 7; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s 8 9define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 10; GFX9-LABEL: store_lds_v3i32: 11; GFX9: ; %bb.0: 12; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 13; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 14; GFX9-NEXT: s_waitcnt lgkmcnt(0) 15; GFX9-NEXT: v_mov_b32_e32 v3, s2 16; GFX9-NEXT: v_mov_b32_e32 v0, s12 17; GFX9-NEXT: v_mov_b32_e32 v1, s13 18; GFX9-NEXT: v_mov_b32_e32 v2, s14 19; GFX9-NEXT: ds_write_b96 v3, v[0:2] 20; GFX9-NEXT: s_endpgm 21; 22; GFX7-LABEL: store_lds_v3i32: 23; GFX7: ; %bb.0: 24; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 25; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 26; GFX7-NEXT: s_mov_b32 m0, -1 27; GFX7-NEXT: s_waitcnt lgkmcnt(0) 28; GFX7-NEXT: v_mov_b32_e32 v3, s4 29; GFX7-NEXT: v_mov_b32_e32 v0, s0 30; GFX7-NEXT: v_mov_b32_e32 v1, s1 31; GFX7-NEXT: v_mov_b32_e32 v2, s2 32; GFX7-NEXT: ds_write_b96 v3, v[0:2] 33; GFX7-NEXT: s_endpgm 34; 35; GFX10-LABEL: store_lds_v3i32: 36; GFX10: ; %bb.0: 37; GFX10-NEXT: s_clause 0x1 38; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 39; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 40; GFX10-NEXT: s_waitcnt lgkmcnt(0) 41; GFX10-NEXT: v_mov_b32_e32 v0, s12 42; GFX10-NEXT: v_mov_b32_e32 v1, s13 43; GFX10-NEXT: v_mov_b32_e32 v2, s14 44; GFX10-NEXT: v_mov_b32_e32 v3, s2 45; GFX10-NEXT: ds_write_b96 v3, v[0:2] 46; GFX10-NEXT: s_endpgm 47 store <3 x i32> %x, <3 x i32> addrspace(3)* %out 48 ret void 49} 50 51define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 52; GFX9-LABEL: store_lds_v3i32_align1: 53; GFX9: ; %bb.0: 54; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 55; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 56; GFX9-NEXT: s_waitcnt lgkmcnt(0) 57; GFX9-NEXT: v_mov_b32_e32 v1, s2 58; GFX9-NEXT: s_lshr_b32 s0, s12, 8 59; GFX9-NEXT: v_mov_b32_e32 v0, s12 60; GFX9-NEXT: s_lshr_b32 s1, s12, 16 61; GFX9-NEXT: ds_write_b8 v1, v0 62; GFX9-NEXT: v_mov_b32_e32 v0, s0 63; GFX9-NEXT: s_lshr_b32 s3, s12, 24 64; GFX9-NEXT: ds_write_b8 v1, v0 offset:1 65; GFX9-NEXT: v_mov_b32_e32 v0, s1 66; GFX9-NEXT: ds_write_b8 v1, v0 offset:2 67; GFX9-NEXT: v_mov_b32_e32 v0, s3 68; GFX9-NEXT: ds_write_b8 v1, v0 offset:3 69; GFX9-NEXT: s_lshr_b32 s0, s13, 8 70; GFX9-NEXT: v_mov_b32_e32 v0, s13 71; GFX9-NEXT: s_lshr_b32 s1, s13, 16 72; GFX9-NEXT: ds_write_b8 v1, v0 offset:4 73; GFX9-NEXT: v_mov_b32_e32 v0, s0 74; GFX9-NEXT: s_lshr_b32 s2, s13, 24 75; GFX9-NEXT: ds_write_b8 v1, v0 offset:5 76; GFX9-NEXT: v_mov_b32_e32 v0, s1 77; GFX9-NEXT: ds_write_b8 v1, v0 offset:6 78; GFX9-NEXT: v_mov_b32_e32 v0, s2 79; GFX9-NEXT: ds_write_b8 v1, v0 offset:7 80; GFX9-NEXT: s_lshr_b32 s0, s14, 8 81; GFX9-NEXT: v_mov_b32_e32 v0, s14 82; GFX9-NEXT: s_lshr_b32 s1, s14, 16 83; GFX9-NEXT: ds_write_b8 v1, v0 offset:8 84; GFX9-NEXT: v_mov_b32_e32 v0, s0 85; GFX9-NEXT: s_lshr_b32 s2, s14, 24 86; GFX9-NEXT: ds_write_b8 v1, v0 offset:9 87; GFX9-NEXT: v_mov_b32_e32 v0, s1 88; GFX9-NEXT: ds_write_b8 v1, v0 offset:10 89; GFX9-NEXT: v_mov_b32_e32 v0, s2 90; GFX9-NEXT: ds_write_b8 v1, v0 offset:11 91; GFX9-NEXT: s_endpgm 92; 93; GFX7-LABEL: store_lds_v3i32_align1: 94; GFX7: ; %bb.0: 95; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 96; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 97; GFX7-NEXT: s_mov_b32 m0, -1 98; GFX7-NEXT: s_waitcnt lgkmcnt(0) 99; GFX7-NEXT: v_mov_b32_e32 v1, s4 100; GFX7-NEXT: s_lshr_b32 s3, s0, 8 101; GFX7-NEXT: v_mov_b32_e32 v0, s0 102; GFX7-NEXT: s_lshr_b32 s5, s0, 16 103; GFX7-NEXT: ds_write_b8 v1, v0 104; GFX7-NEXT: v_mov_b32_e32 v0, s3 105; GFX7-NEXT: s_lshr_b32 s6, s0, 24 106; GFX7-NEXT: ds_write_b8 v1, v0 offset:1 107; GFX7-NEXT: v_mov_b32_e32 v0, s5 108; GFX7-NEXT: ds_write_b8 v1, v0 offset:2 109; GFX7-NEXT: v_mov_b32_e32 v0, s6 110; GFX7-NEXT: ds_write_b8 v1, v0 offset:3 111; GFX7-NEXT: s_lshr_b32 s0, s1, 8 112; GFX7-NEXT: v_mov_b32_e32 v0, s1 113; GFX7-NEXT: s_lshr_b32 s3, s1, 16 114; GFX7-NEXT: ds_write_b8 v1, v0 offset:4 115; GFX7-NEXT: v_mov_b32_e32 v0, s0 116; GFX7-NEXT: s_lshr_b32 s4, s1, 24 117; GFX7-NEXT: ds_write_b8 v1, v0 offset:5 118; GFX7-NEXT: v_mov_b32_e32 v0, s3 119; GFX7-NEXT: ds_write_b8 v1, v0 offset:6 120; GFX7-NEXT: v_mov_b32_e32 v0, s4 121; GFX7-NEXT: ds_write_b8 v1, v0 offset:7 122; GFX7-NEXT: s_lshr_b32 s0, s2, 8 123; GFX7-NEXT: v_mov_b32_e32 v0, s2 124; GFX7-NEXT: s_lshr_b32 s1, s2, 16 125; GFX7-NEXT: ds_write_b8 v1, v0 offset:8 126; GFX7-NEXT: v_mov_b32_e32 v0, s0 127; GFX7-NEXT: s_lshr_b32 s3, s2, 24 128; GFX7-NEXT: ds_write_b8 v1, v0 offset:9 129; GFX7-NEXT: v_mov_b32_e32 v0, s1 130; GFX7-NEXT: ds_write_b8 v1, v0 offset:10 131; GFX7-NEXT: v_mov_b32_e32 v0, s3 132; GFX7-NEXT: ds_write_b8 v1, v0 offset:11 133; GFX7-NEXT: s_endpgm 134; 135; GFX10-LABEL: store_lds_v3i32_align1: 136; GFX10: ; %bb.0: 137; GFX10-NEXT: s_clause 0x1 138; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 139; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 140; GFX10-NEXT: s_waitcnt lgkmcnt(0) 141; GFX10-NEXT: s_lshr_b32 s0, s12, 8 142; GFX10-NEXT: v_mov_b32_e32 v0, s12 143; GFX10-NEXT: v_mov_b32_e32 v1, s2 144; GFX10-NEXT: s_lshr_b32 s5, s13, 24 145; GFX10-NEXT: s_lshr_b32 s1, s12, 16 146; GFX10-NEXT: v_mov_b32_e32 v2, s13 147; GFX10-NEXT: s_lshr_b32 s3, s12, 24 148; GFX10-NEXT: s_lshr_b32 s6, s14, 8 149; GFX10-NEXT: v_mov_b32_e32 v4, s0 150; GFX10-NEXT: v_mov_b32_e32 v9, s5 151; GFX10-NEXT: s_lshr_b32 s2, s13, 8 152; GFX10-NEXT: s_lshr_b32 s4, s13, 16 153; GFX10-NEXT: s_lshr_b32 s7, s14, 16 154; GFX10-NEXT: v_mov_b32_e32 v3, s14 155; GFX10-NEXT: v_mov_b32_e32 v5, s1 156; GFX10-NEXT: s_lshr_b32 s8, s14, 24 157; GFX10-NEXT: v_mov_b32_e32 v6, s3 158; GFX10-NEXT: v_mov_b32_e32 v10, s6 159; GFX10-NEXT: v_mov_b32_e32 v7, s2 160; GFX10-NEXT: v_mov_b32_e32 v8, s4 161; GFX10-NEXT: ds_write_b8 v1, v0 162; GFX10-NEXT: ds_write_b8 v1, v2 offset:4 163; GFX10-NEXT: ds_write_b8 v1, v4 offset:1 164; GFX10-NEXT: ds_write_b8 v1, v5 offset:2 165; GFX10-NEXT: ds_write_b8 v1, v6 offset:3 166; GFX10-NEXT: ds_write_b8 v1, v7 offset:5 167; GFX10-NEXT: ds_write_b8 v1, v8 offset:6 168; GFX10-NEXT: v_mov_b32_e32 v0, s7 169; GFX10-NEXT: v_mov_b32_e32 v2, s8 170; GFX10-NEXT: ds_write_b8 v1, v9 offset:7 171; GFX10-NEXT: ds_write_b8 v1, v3 offset:8 172; GFX10-NEXT: ds_write_b8 v1, v10 offset:9 173; GFX10-NEXT: ds_write_b8 v1, v0 offset:10 174; GFX10-NEXT: ds_write_b8 v1, v2 offset:11 175; GFX10-NEXT: s_endpgm 176 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 1 177 ret void 178} 179 180define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 181; GFX9-LABEL: store_lds_v3i32_align2: 182; GFX9: ; %bb.0: 183; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 184; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 185; GFX9-NEXT: s_waitcnt lgkmcnt(0) 186; GFX9-NEXT: v_mov_b32_e32 v1, s2 187; GFX9-NEXT: s_lshr_b32 s0, s12, 16 188; GFX9-NEXT: v_mov_b32_e32 v0, s12 189; GFX9-NEXT: ds_write_b16 v1, v0 190; GFX9-NEXT: v_mov_b32_e32 v0, s0 191; GFX9-NEXT: ds_write_b16 v1, v0 offset:2 192; GFX9-NEXT: s_lshr_b32 s0, s13, 16 193; GFX9-NEXT: v_mov_b32_e32 v0, s13 194; GFX9-NEXT: ds_write_b16 v1, v0 offset:4 195; GFX9-NEXT: v_mov_b32_e32 v0, s0 196; GFX9-NEXT: ds_write_b16 v1, v0 offset:6 197; GFX9-NEXT: s_lshr_b32 s0, s14, 16 198; GFX9-NEXT: v_mov_b32_e32 v0, s14 199; GFX9-NEXT: ds_write_b16 v1, v0 offset:8 200; GFX9-NEXT: v_mov_b32_e32 v0, s0 201; GFX9-NEXT: ds_write_b16 v1, v0 offset:10 202; GFX9-NEXT: s_endpgm 203; 204; GFX7-LABEL: store_lds_v3i32_align2: 205; GFX7: ; %bb.0: 206; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 207; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 208; GFX7-NEXT: s_mov_b32 m0, -1 209; GFX7-NEXT: s_waitcnt lgkmcnt(0) 210; GFX7-NEXT: v_mov_b32_e32 v1, s4 211; GFX7-NEXT: s_lshr_b32 s3, s0, 16 212; GFX7-NEXT: v_mov_b32_e32 v0, s0 213; GFX7-NEXT: ds_write_b16 v1, v0 214; GFX7-NEXT: v_mov_b32_e32 v0, s3 215; GFX7-NEXT: ds_write_b16 v1, v0 offset:2 216; GFX7-NEXT: s_lshr_b32 s0, s1, 16 217; GFX7-NEXT: v_mov_b32_e32 v0, s1 218; GFX7-NEXT: ds_write_b16 v1, v0 offset:4 219; GFX7-NEXT: v_mov_b32_e32 v0, s0 220; GFX7-NEXT: ds_write_b16 v1, v0 offset:6 221; GFX7-NEXT: s_lshr_b32 s0, s2, 16 222; GFX7-NEXT: v_mov_b32_e32 v0, s2 223; GFX7-NEXT: ds_write_b16 v1, v0 offset:8 224; GFX7-NEXT: v_mov_b32_e32 v0, s0 225; GFX7-NEXT: ds_write_b16 v1, v0 offset:10 226; GFX7-NEXT: s_endpgm 227; 228; GFX10-LABEL: store_lds_v3i32_align2: 229; GFX10: ; %bb.0: 230; GFX10-NEXT: s_clause 0x1 231; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 232; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 233; GFX10-NEXT: s_waitcnt lgkmcnt(0) 234; GFX10-NEXT: v_mov_b32_e32 v0, s12 235; GFX10-NEXT: v_mov_b32_e32 v1, s2 236; GFX10-NEXT: s_lshr_b32 s0, s12, 16 237; GFX10-NEXT: v_mov_b32_e32 v2, s13 238; GFX10-NEXT: s_lshr_b32 s1, s13, 16 239; GFX10-NEXT: v_mov_b32_e32 v3, s14 240; GFX10-NEXT: s_lshr_b32 s2, s14, 16 241; GFX10-NEXT: v_mov_b32_e32 v4, s0 242; GFX10-NEXT: v_mov_b32_e32 v5, s1 243; GFX10-NEXT: v_mov_b32_e32 v6, s2 244; GFX10-NEXT: ds_write_b16 v1, v0 245; GFX10-NEXT: ds_write_b16 v1, v2 offset:4 246; GFX10-NEXT: ds_write_b16 v1, v3 offset:8 247; GFX10-NEXT: ds_write_b16 v1, v4 offset:2 248; GFX10-NEXT: ds_write_b16 v1, v5 offset:6 249; GFX10-NEXT: ds_write_b16 v1, v6 offset:10 250; GFX10-NEXT: s_endpgm 251 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 2 252 ret void 253} 254 255define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 256; GFX9-LABEL: store_lds_v3i32_align4: 257; GFX9: ; %bb.0: 258; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 259; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 260; GFX9-NEXT: s_waitcnt lgkmcnt(0) 261; GFX9-NEXT: v_mov_b32_e32 v2, s2 262; GFX9-NEXT: v_mov_b32_e32 v0, s12 263; GFX9-NEXT: v_mov_b32_e32 v1, s13 264; GFX9-NEXT: v_mov_b32_e32 v3, s14 265; GFX9-NEXT: ds_write2_b32 v2, v0, v1 offset1:1 266; GFX9-NEXT: ds_write_b32 v2, v3 offset:8 267; GFX9-NEXT: s_endpgm 268; 269; GFX7-LABEL: store_lds_v3i32_align4: 270; GFX7: ; %bb.0: 271; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 272; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 273; GFX7-NEXT: s_mov_b32 m0, -1 274; GFX7-NEXT: s_waitcnt lgkmcnt(0) 275; GFX7-NEXT: v_mov_b32_e32 v2, s4 276; GFX7-NEXT: v_mov_b32_e32 v0, s0 277; GFX7-NEXT: v_mov_b32_e32 v1, s1 278; GFX7-NEXT: v_mov_b32_e32 v3, s2 279; GFX7-NEXT: ds_write2_b32 v2, v0, v1 offset1:1 280; GFX7-NEXT: ds_write_b32 v2, v3 offset:8 281; GFX7-NEXT: s_endpgm 282; 283; GFX10-LABEL: store_lds_v3i32_align4: 284; GFX10: ; %bb.0: 285; GFX10-NEXT: s_clause 0x1 286; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 287; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 288; GFX10-NEXT: s_waitcnt lgkmcnt(0) 289; GFX10-NEXT: v_mov_b32_e32 v0, s12 290; GFX10-NEXT: v_mov_b32_e32 v1, s13 291; GFX10-NEXT: v_mov_b32_e32 v2, s2 292; GFX10-NEXT: v_mov_b32_e32 v3, s14 293; GFX10-NEXT: ds_write2_b32 v2, v0, v1 offset1:1 294; GFX10-NEXT: ds_write_b32 v2, v3 offset:8 295; GFX10-NEXT: s_endpgm 296 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 4 297 ret void 298} 299 300define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 301; GFX9-LABEL: store_lds_v3i32_align8: 302; GFX9: ; %bb.0: 303; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 304; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 305; GFX9-NEXT: s_waitcnt lgkmcnt(0) 306; GFX9-NEXT: v_mov_b32_e32 v2, s2 307; GFX9-NEXT: v_mov_b32_e32 v0, s12 308; GFX9-NEXT: v_mov_b32_e32 v1, s13 309; GFX9-NEXT: v_mov_b32_e32 v3, s14 310; GFX9-NEXT: ds_write_b64 v2, v[0:1] 311; GFX9-NEXT: ds_write_b32 v2, v3 offset:8 312; GFX9-NEXT: s_endpgm 313; 314; GFX7-LABEL: store_lds_v3i32_align8: 315; GFX7: ; %bb.0: 316; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 317; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 318; GFX7-NEXT: s_mov_b32 m0, -1 319; GFX7-NEXT: s_waitcnt lgkmcnt(0) 320; GFX7-NEXT: v_mov_b32_e32 v2, s4 321; GFX7-NEXT: v_mov_b32_e32 v0, s0 322; GFX7-NEXT: v_mov_b32_e32 v1, s1 323; GFX7-NEXT: v_mov_b32_e32 v3, s2 324; GFX7-NEXT: ds_write_b64 v2, v[0:1] 325; GFX7-NEXT: ds_write_b32 v2, v3 offset:8 326; GFX7-NEXT: s_endpgm 327; 328; GFX10-LABEL: store_lds_v3i32_align8: 329; GFX10: ; %bb.0: 330; GFX10-NEXT: s_clause 0x1 331; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 332; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 333; GFX10-NEXT: s_waitcnt lgkmcnt(0) 334; GFX10-NEXT: v_mov_b32_e32 v0, s12 335; GFX10-NEXT: v_mov_b32_e32 v1, s13 336; GFX10-NEXT: v_mov_b32_e32 v2, s2 337; GFX10-NEXT: v_mov_b32_e32 v3, s14 338; GFX10-NEXT: ds_write_b64 v2, v[0:1] 339; GFX10-NEXT: ds_write_b32 v2, v3 offset:8 340; GFX10-NEXT: s_endpgm 341 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 8 342 ret void 343} 344 345define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 346; GFX9-LABEL: store_lds_v3i32_align16: 347; GFX9: ; %bb.0: 348; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 349; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 350; GFX9-NEXT: s_waitcnt lgkmcnt(0) 351; GFX9-NEXT: v_mov_b32_e32 v3, s2 352; GFX9-NEXT: v_mov_b32_e32 v0, s12 353; GFX9-NEXT: v_mov_b32_e32 v1, s13 354; GFX9-NEXT: v_mov_b32_e32 v2, s14 355; GFX9-NEXT: ds_write_b96 v3, v[0:2] 356; GFX9-NEXT: s_endpgm 357; 358; GFX7-LABEL: store_lds_v3i32_align16: 359; GFX7: ; %bb.0: 360; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 361; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 362; GFX7-NEXT: s_mov_b32 m0, -1 363; GFX7-NEXT: s_waitcnt lgkmcnt(0) 364; GFX7-NEXT: v_mov_b32_e32 v3, s4 365; GFX7-NEXT: v_mov_b32_e32 v0, s0 366; GFX7-NEXT: v_mov_b32_e32 v1, s1 367; GFX7-NEXT: v_mov_b32_e32 v2, s2 368; GFX7-NEXT: ds_write_b96 v3, v[0:2] 369; GFX7-NEXT: s_endpgm 370; 371; GFX10-LABEL: store_lds_v3i32_align16: 372; GFX10: ; %bb.0: 373; GFX10-NEXT: s_clause 0x1 374; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 375; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 376; GFX10-NEXT: s_waitcnt lgkmcnt(0) 377; GFX10-NEXT: v_mov_b32_e32 v0, s12 378; GFX10-NEXT: v_mov_b32_e32 v1, s13 379; GFX10-NEXT: v_mov_b32_e32 v2, s14 380; GFX10-NEXT: v_mov_b32_e32 v3, s2 381; GFX10-NEXT: ds_write_b96 v3, v[0:2] 382; GFX10-NEXT: s_endpgm 383 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 16 384 ret void 385} 386