1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefix=GFX7 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s 5 6; FIXME: 7; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s 8 9define <4 x i32> @load_lds_v4i32(<4 x i32> addrspace(3)* %ptr) { 10; GFX9-LABEL: load_lds_v4i32: 11; GFX9: ; %bb.0: 12; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13; GFX9-NEXT: ds_read_b128 v[0:3], v0 14; GFX9-NEXT: s_waitcnt lgkmcnt(0) 15; GFX9-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX7-LABEL: load_lds_v4i32: 18; GFX7: ; %bb.0: 19; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX7-NEXT: s_mov_b32 m0, -1 21; GFX7-NEXT: ds_read_b128 v[0:3], v0 22; GFX7-NEXT: s_waitcnt lgkmcnt(0) 23; GFX7-NEXT: s_setpc_b64 s[30:31] 24; 25; GFX10-LABEL: load_lds_v4i32: 26; GFX10: ; %bb.0: 27; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 29; GFX10-NEXT: ds_read_b128 v[0:3], v0 30; GFX10-NEXT: s_waitcnt lgkmcnt(0) 31; GFX10-NEXT: s_setpc_b64 s[30:31] 32 %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr 33 ret <4 x i32> %load 34} 35 36define <4 x i32> @load_lds_v4i32_align1(<4 x i32> addrspace(3)* %ptr) { 37; GFX9-LABEL: load_lds_v4i32_align1: 38; GFX9: ; %bb.0: 39; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 40; GFX9-NEXT: ds_read_u8 v1, v0 41; GFX9-NEXT: ds_read_u8 v2, v0 offset:1 42; GFX9-NEXT: ds_read_u8 v3, v0 offset:2 43; GFX9-NEXT: ds_read_u8 v4, v0 offset:3 44; GFX9-NEXT: ds_read_u8 v5, v0 offset:4 45; GFX9-NEXT: ds_read_u8 v6, v0 offset:5 46; GFX9-NEXT: ds_read_u8 v7, v0 offset:6 47; GFX9-NEXT: ds_read_u8 v8, v0 offset:7 48; GFX9-NEXT: s_waitcnt lgkmcnt(6) 49; GFX9-NEXT: v_lshl_or_b32 v1, v2, 8, v1 50; GFX9-NEXT: s_waitcnt lgkmcnt(5) 51; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v3 52; GFX9-NEXT: s_waitcnt lgkmcnt(4) 53; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v4 54; GFX9-NEXT: v_or3_b32 v4, v1, v2, v3 55; GFX9-NEXT: s_waitcnt lgkmcnt(2) 56; GFX9-NEXT: v_lshl_or_b32 v1, v6, 8, v5 57; GFX9-NEXT: s_waitcnt lgkmcnt(1) 58; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v7 59; GFX9-NEXT: s_waitcnt lgkmcnt(0) 60; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v8 61; GFX9-NEXT: v_or3_b32 v1, v1, v2, v3 62; GFX9-NEXT: ds_read_u8 v2, v0 offset:8 63; GFX9-NEXT: ds_read_u8 v3, v0 offset:9 64; GFX9-NEXT: ds_read_u8 v5, v0 offset:10 65; GFX9-NEXT: ds_read_u8 v6, v0 offset:11 66; GFX9-NEXT: ds_read_u8 v7, v0 offset:12 67; GFX9-NEXT: ds_read_u8 v8, v0 offset:13 68; GFX9-NEXT: ds_read_u8 v9, v0 offset:14 69; GFX9-NEXT: ds_read_u8 v0, v0 offset:15 70; GFX9-NEXT: s_waitcnt lgkmcnt(6) 71; GFX9-NEXT: v_lshl_or_b32 v2, v3, 8, v2 72; GFX9-NEXT: s_waitcnt lgkmcnt(5) 73; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v5 74; GFX9-NEXT: s_waitcnt lgkmcnt(4) 75; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v6 76; GFX9-NEXT: v_or3_b32 v2, v2, v3, v5 77; GFX9-NEXT: s_waitcnt lgkmcnt(2) 78; GFX9-NEXT: v_lshl_or_b32 v3, v8, 8, v7 79; GFX9-NEXT: s_waitcnt lgkmcnt(1) 80; GFX9-NEXT: v_lshlrev_b32_e32 v5, 16, v9 81; GFX9-NEXT: s_waitcnt lgkmcnt(0) 82; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0 83; GFX9-NEXT: v_or3_b32 v3, v3, v5, v0 84; GFX9-NEXT: v_mov_b32_e32 v0, v4 85; GFX9-NEXT: s_setpc_b64 s[30:31] 86; 87; GFX7-LABEL: load_lds_v4i32_align1: 88; GFX7: ; %bb.0: 89; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GFX7-NEXT: ds_read_u8 v1, v0 91; GFX7-NEXT: ds_read_u8 v2, v0 offset:1 92; GFX7-NEXT: ds_read_u8 v3, v0 offset:2 93; GFX7-NEXT: ds_read_u8 v4, v0 offset:3 94; GFX7-NEXT: ds_read_u8 v5, v0 offset:4 95; GFX7-NEXT: ds_read_u8 v6, v0 offset:5 96; GFX7-NEXT: ds_read_u8 v7, v0 offset:6 97; GFX7-NEXT: ds_read_u8 v8, v0 offset:7 98; GFX7-NEXT: s_waitcnt lgkmcnt(6) 99; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 100; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 101; GFX7-NEXT: s_waitcnt lgkmcnt(5) 102; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v3 103; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 104; GFX7-NEXT: s_waitcnt lgkmcnt(4) 105; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v4 106; GFX7-NEXT: v_or_b32_e32 v4, v1, v2 107; GFX7-NEXT: s_waitcnt lgkmcnt(2) 108; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v6 109; GFX7-NEXT: v_or_b32_e32 v1, v5, v1 110; GFX7-NEXT: s_waitcnt lgkmcnt(1) 111; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v7 112; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 113; GFX7-NEXT: s_waitcnt lgkmcnt(0) 114; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v8 115; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 116; GFX7-NEXT: ds_read_u8 v2, v0 offset:8 117; GFX7-NEXT: ds_read_u8 v3, v0 offset:9 118; GFX7-NEXT: ds_read_u8 v5, v0 offset:10 119; GFX7-NEXT: ds_read_u8 v6, v0 offset:11 120; GFX7-NEXT: ds_read_u8 v7, v0 offset:12 121; GFX7-NEXT: ds_read_u8 v8, v0 offset:13 122; GFX7-NEXT: ds_read_u8 v9, v0 offset:14 123; GFX7-NEXT: ds_read_u8 v0, v0 offset:15 124; GFX7-NEXT: s_waitcnt lgkmcnt(6) 125; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 126; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 127; GFX7-NEXT: s_waitcnt lgkmcnt(5) 128; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v5 129; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 130; GFX7-NEXT: s_waitcnt lgkmcnt(4) 131; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v6 132; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 133; GFX7-NEXT: s_waitcnt lgkmcnt(2) 134; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v8 135; GFX7-NEXT: v_or_b32_e32 v3, v7, v3 136; GFX7-NEXT: s_waitcnt lgkmcnt(1) 137; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v9 138; GFX7-NEXT: v_or_b32_e32 v3, v3, v5 139; GFX7-NEXT: s_waitcnt lgkmcnt(0) 140; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 141; GFX7-NEXT: v_or_b32_e32 v3, v3, v0 142; GFX7-NEXT: v_mov_b32_e32 v0, v4 143; GFX7-NEXT: s_setpc_b64 s[30:31] 144; 145; GFX10-LABEL: load_lds_v4i32_align1: 146; GFX10: ; %bb.0: 147; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 148; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 149; GFX10-NEXT: ds_read_u8 v1, v0 150; GFX10-NEXT: ds_read_u8 v2, v0 offset:1 151; GFX10-NEXT: ds_read_u8 v3, v0 offset:2 152; GFX10-NEXT: ds_read_u8 v4, v0 offset:3 153; GFX10-NEXT: ds_read_u8 v5, v0 offset:4 154; GFX10-NEXT: ds_read_u8 v6, v0 offset:5 155; GFX10-NEXT: ds_read_u8 v7, v0 offset:6 156; GFX10-NEXT: ds_read_u8 v8, v0 offset:7 157; GFX10-NEXT: ds_read_u8 v9, v0 offset:8 158; GFX10-NEXT: ds_read_u8 v10, v0 offset:9 159; GFX10-NEXT: ds_read_u8 v11, v0 offset:10 160; GFX10-NEXT: ds_read_u8 v12, v0 offset:11 161; GFX10-NEXT: ds_read_u8 v13, v0 offset:12 162; GFX10-NEXT: ds_read_u8 v14, v0 offset:13 163; GFX10-NEXT: ds_read_u8 v15, v0 offset:14 164; GFX10-NEXT: ds_read_u8 v0, v0 offset:15 165; GFX10-NEXT: s_waitcnt lgkmcnt(14) 166; GFX10-NEXT: v_lshl_or_b32 v1, v2, 8, v1 167; GFX10-NEXT: s_waitcnt lgkmcnt(13) 168; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v3 169; GFX10-NEXT: s_waitcnt lgkmcnt(12) 170; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v4 171; GFX10-NEXT: s_waitcnt lgkmcnt(10) 172; GFX10-NEXT: v_lshl_or_b32 v4, v6, 8, v5 173; GFX10-NEXT: s_waitcnt lgkmcnt(9) 174; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v7 175; GFX10-NEXT: s_waitcnt lgkmcnt(8) 176; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v8 177; GFX10-NEXT: s_waitcnt lgkmcnt(6) 178; GFX10-NEXT: v_lshl_or_b32 v7, v10, 8, v9 179; GFX10-NEXT: s_waitcnt lgkmcnt(5) 180; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v11 181; GFX10-NEXT: s_waitcnt lgkmcnt(4) 182; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v12 183; GFX10-NEXT: s_waitcnt lgkmcnt(2) 184; GFX10-NEXT: v_lshl_or_b32 v10, v14, 8, v13 185; GFX10-NEXT: s_waitcnt lgkmcnt(1) 186; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v15 187; GFX10-NEXT: s_waitcnt lgkmcnt(0) 188; GFX10-NEXT: v_lshlrev_b32_e32 v12, 24, v0 189; GFX10-NEXT: v_or3_b32 v0, v1, v2, v3 190; GFX10-NEXT: v_or3_b32 v1, v4, v5, v6 191; GFX10-NEXT: v_or3_b32 v2, v7, v8, v9 192; GFX10-NEXT: v_or3_b32 v3, v10, v11, v12 193; GFX10-NEXT: s_setpc_b64 s[30:31] 194 %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 1 195 ret <4 x i32> %load 196} 197 198define <4 x i32> @load_lds_v4i32_align2(<4 x i32> addrspace(3)* %ptr) { 199; GFX9-LABEL: load_lds_v4i32_align2: 200; GFX9: ; %bb.0: 201; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 202; GFX9-NEXT: ds_read_u16 v1, v0 203; GFX9-NEXT: ds_read_u16 v2, v0 offset:2 204; GFX9-NEXT: ds_read_u16 v3, v0 offset:4 205; GFX9-NEXT: ds_read_u16 v4, v0 offset:6 206; GFX9-NEXT: ds_read_u16 v5, v0 offset:8 207; GFX9-NEXT: ds_read_u16 v6, v0 offset:10 208; GFX9-NEXT: ds_read_u16 v7, v0 offset:12 209; GFX9-NEXT: ds_read_u16 v8, v0 offset:14 210; GFX9-NEXT: s_waitcnt lgkmcnt(6) 211; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v1 212; GFX9-NEXT: s_waitcnt lgkmcnt(4) 213; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v3 214; GFX9-NEXT: s_waitcnt lgkmcnt(2) 215; GFX9-NEXT: v_lshl_or_b32 v2, v6, 16, v5 216; GFX9-NEXT: s_waitcnt lgkmcnt(0) 217; GFX9-NEXT: v_lshl_or_b32 v3, v8, 16, v7 218; GFX9-NEXT: s_setpc_b64 s[30:31] 219; 220; GFX7-LABEL: load_lds_v4i32_align2: 221; GFX7: ; %bb.0: 222; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 223; GFX7-NEXT: ds_read_u16 v1, v0 224; GFX7-NEXT: ds_read_u16 v2, v0 offset:2 225; GFX7-NEXT: ds_read_u16 v3, v0 offset:4 226; GFX7-NEXT: ds_read_u16 v4, v0 offset:6 227; GFX7-NEXT: ds_read_u16 v5, v0 offset:8 228; GFX7-NEXT: ds_read_u16 v6, v0 offset:10 229; GFX7-NEXT: ds_read_u16 v7, v0 offset:12 230; GFX7-NEXT: ds_read_u16 v8, v0 offset:14 231; GFX7-NEXT: s_waitcnt lgkmcnt(6) 232; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v2 233; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 234; GFX7-NEXT: s_waitcnt lgkmcnt(4) 235; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v4 236; GFX7-NEXT: v_or_b32_e32 v1, v3, v1 237; GFX7-NEXT: s_waitcnt lgkmcnt(2) 238; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v6 239; GFX7-NEXT: s_waitcnt lgkmcnt(0) 240; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v8 241; GFX7-NEXT: v_or_b32_e32 v2, v5, v2 242; GFX7-NEXT: v_or_b32_e32 v3, v7, v3 243; GFX7-NEXT: s_setpc_b64 s[30:31] 244; 245; GFX10-LABEL: load_lds_v4i32_align2: 246; GFX10: ; %bb.0: 247; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 248; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 249; GFX10-NEXT: ds_read_u16 v1, v0 250; GFX10-NEXT: ds_read_u16 v2, v0 offset:2 251; GFX10-NEXT: ds_read_u16 v3, v0 offset:4 252; GFX10-NEXT: ds_read_u16 v4, v0 offset:6 253; GFX10-NEXT: ds_read_u16 v5, v0 offset:8 254; GFX10-NEXT: ds_read_u16 v6, v0 offset:10 255; GFX10-NEXT: ds_read_u16 v7, v0 offset:12 256; GFX10-NEXT: ds_read_u16 v8, v0 offset:14 257; GFX10-NEXT: s_waitcnt lgkmcnt(6) 258; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v1 259; GFX10-NEXT: s_waitcnt lgkmcnt(4) 260; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v3 261; GFX10-NEXT: s_waitcnt lgkmcnt(2) 262; GFX10-NEXT: v_lshl_or_b32 v2, v6, 16, v5 263; GFX10-NEXT: s_waitcnt lgkmcnt(0) 264; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v7 265; GFX10-NEXT: s_setpc_b64 s[30:31] 266 %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 2 267 ret <4 x i32> %load 268} 269 270define <4 x i32> @load_lds_v4i32_align4(<4 x i32> addrspace(3)* %ptr) { 271; GFX9-LABEL: load_lds_v4i32_align4: 272; GFX9: ; %bb.0: 273; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 274; GFX9-NEXT: v_mov_b32_e32 v2, v0 275; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 276; GFX9-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3 277; GFX9-NEXT: s_waitcnt lgkmcnt(0) 278; GFX9-NEXT: s_setpc_b64 s[30:31] 279; 280; GFX7-LABEL: load_lds_v4i32_align4: 281; GFX7: ; %bb.0: 282; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 283; GFX7-NEXT: v_mov_b32_e32 v2, v0 284; GFX7-NEXT: s_mov_b32 m0, -1 285; GFX7-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 286; GFX7-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3 287; GFX7-NEXT: s_waitcnt lgkmcnt(0) 288; GFX7-NEXT: s_setpc_b64 s[30:31] 289; 290; GFX10-LABEL: load_lds_v4i32_align4: 291; GFX10: ; %bb.0: 292; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 293; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 294; GFX10-NEXT: v_mov_b32_e32 v2, v0 295; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 296; GFX10-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3 297; GFX10-NEXT: s_waitcnt lgkmcnt(0) 298; GFX10-NEXT: s_setpc_b64 s[30:31] 299 %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 4 300 ret <4 x i32> %load 301} 302 303define <4 x i32> @load_lds_v4i32_align8(<4 x i32> addrspace(3)* %ptr) { 304; GFX9-LABEL: load_lds_v4i32_align8: 305; GFX9: ; %bb.0: 306; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 307; GFX9-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 308; GFX9-NEXT: s_waitcnt lgkmcnt(0) 309; GFX9-NEXT: s_setpc_b64 s[30:31] 310; 311; GFX7-LABEL: load_lds_v4i32_align8: 312; GFX7: ; %bb.0: 313; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 314; GFX7-NEXT: s_mov_b32 m0, -1 315; GFX7-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 316; GFX7-NEXT: s_waitcnt lgkmcnt(0) 317; GFX7-NEXT: s_setpc_b64 s[30:31] 318; 319; GFX10-LABEL: load_lds_v4i32_align8: 320; GFX10: ; %bb.0: 321; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 322; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 323; GFX10-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 324; GFX10-NEXT: s_waitcnt lgkmcnt(0) 325; GFX10-NEXT: s_setpc_b64 s[30:31] 326 %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 8 327 ret <4 x i32> %load 328} 329 330define <4 x i32> @load_lds_v4i32_align16(<4 x i32> addrspace(3)* %ptr) { 331; GFX9-LABEL: load_lds_v4i32_align16: 332; GFX9: ; %bb.0: 333; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 334; GFX9-NEXT: ds_read_b128 v[0:3], v0 335; GFX9-NEXT: s_waitcnt lgkmcnt(0) 336; GFX9-NEXT: s_setpc_b64 s[30:31] 337; 338; GFX7-LABEL: load_lds_v4i32_align16: 339; GFX7: ; %bb.0: 340; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 341; GFX7-NEXT: s_mov_b32 m0, -1 342; GFX7-NEXT: ds_read_b128 v[0:3], v0 343; GFX7-NEXT: s_waitcnt lgkmcnt(0) 344; GFX7-NEXT: s_setpc_b64 s[30:31] 345; 346; GFX10-LABEL: load_lds_v4i32_align16: 347; GFX10: ; %bb.0: 348; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 350; GFX10-NEXT: ds_read_b128 v[0:3], v0 351; GFX10-NEXT: s_waitcnt lgkmcnt(0) 352; GFX10-NEXT: s_setpc_b64 s[30:31] 353 %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 16 354 ret <4 x i32> %load 355} 356