1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GFX900-MUBUF %s 3; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck --check-prefix=GFX906 %s 4; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck --check-prefix=GFX803 %s 5; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs --amdgpu-enable-flat-scratch < %s | FileCheck -check-prefixes=GFX900,GFX900-FLATSCR %s 6 7define <2 x i16> @load_local_lo_v2i16_undeflo(i16 addrspace(3)* %in) #0 { 8; GFX900-LABEL: load_local_lo_v2i16_undeflo: 9; GFX900: ; %bb.0: ; %entry 10; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX900-NEXT: ds_read_u16_d16 v0, v0 12; GFX900-NEXT: s_waitcnt lgkmcnt(0) 13; GFX900-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX906-LABEL: load_local_lo_v2i16_undeflo: 16; GFX906: ; %bb.0: ; %entry 17; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX906-NEXT: ds_read_u16 v0, v0 19; GFX906-NEXT: s_waitcnt lgkmcnt(0) 20; GFX906-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX803-LABEL: load_local_lo_v2i16_undeflo: 23; GFX803: ; %bb.0: ; %entry 24; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX803-NEXT: s_mov_b32 m0, -1 26; GFX803-NEXT: ds_read_u16 v0, v0 27; GFX803-NEXT: s_waitcnt lgkmcnt(0) 28; GFX803-NEXT: s_setpc_b64 s[30:31] 29entry: 30 %load = load i16, i16 addrspace(3)* %in 31 %build = insertelement <2 x i16> undef, i16 %load, i32 0 32 ret <2 x i16> %build 33} 34 35define <2 x i16> @load_local_lo_v2i16_reglo(i16 addrspace(3)* %in, i16 %reg) #0 { 36; GFX900-LABEL: load_local_lo_v2i16_reglo: 37; GFX900: ; %bb.0: ; %entry 38; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; GFX900-NEXT: ds_read_u16 v0, v0 40; GFX900-NEXT: s_waitcnt lgkmcnt(0) 41; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 42; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 43; GFX900-NEXT: s_setpc_b64 s[30:31] 44; 45; GFX906-LABEL: load_local_lo_v2i16_reglo: 46; GFX906: ; %bb.0: ; %entry 47; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX906-NEXT: ds_read_u16 v0, v0 49; GFX906-NEXT: s_waitcnt lgkmcnt(0) 50; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 51; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 52; GFX906-NEXT: s_setpc_b64 s[30:31] 53; 54; GFX803-LABEL: load_local_lo_v2i16_reglo: 55; GFX803: ; %bb.0: ; %entry 56; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 57; GFX803-NEXT: s_mov_b32 m0, -1 58; GFX803-NEXT: ds_read_u16 v0, v0 59; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 60; GFX803-NEXT: s_waitcnt lgkmcnt(0) 61; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 62; GFX803-NEXT: s_setpc_b64 s[30:31] 63entry: 64 %load = load i16, i16 addrspace(3)* %in 65 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 66 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0 67 ret <2 x i16> %build1 68} 69 70; Show that we get reasonable regalloc without physreg constraints. 71define void @load_local_lo_v2i16_reglo_vreg(i16 addrspace(3)* %in, i16 %reg) #0 { 72; GFX900-LABEL: load_local_lo_v2i16_reglo_vreg: 73; GFX900: ; %bb.0: ; %entry 74; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX900-NEXT: ds_read_u16 v0, v0 76; GFX900-NEXT: s_waitcnt lgkmcnt(0) 77; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 78; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 79; GFX900-NEXT: global_store_dword v[0:1], v0, off 80; GFX900-NEXT: s_waitcnt vmcnt(0) 81; GFX900-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg: 84; GFX906: ; %bb.0: ; %entry 85; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX906-NEXT: ds_read_u16 v0, v0 87; GFX906-NEXT: s_waitcnt lgkmcnt(0) 88; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 89; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 90; GFX906-NEXT: global_store_dword v[0:1], v0, off 91; GFX906-NEXT: s_waitcnt vmcnt(0) 92; GFX906-NEXT: s_setpc_b64 s[30:31] 93; 94; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg: 95; GFX803: ; %bb.0: ; %entry 96; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; GFX803-NEXT: s_mov_b32 m0, -1 98; GFX803-NEXT: ds_read_u16 v0, v0 99; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 100; GFX803-NEXT: s_waitcnt lgkmcnt(0) 101; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 102; GFX803-NEXT: flat_store_dword v[0:1], v0 103; GFX803-NEXT: s_waitcnt vmcnt(0) 104; GFX803-NEXT: s_setpc_b64 s[30:31] 105entry: 106 %load = load i16, i16 addrspace(3)* %in 107 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 108 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0 109 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 110 ret void 111} 112 113define <2 x i16> @load_local_lo_v2i16_zerolo(i16 addrspace(3)* %in) #0 { 114; GFX900-LABEL: load_local_lo_v2i16_zerolo: 115; GFX900: ; %bb.0: ; %entry 116; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117; GFX900-NEXT: v_mov_b32_e32 v1, 0 118; GFX900-NEXT: ds_read_u16_d16 v1, v0 119; GFX900-NEXT: s_waitcnt lgkmcnt(0) 120; GFX900-NEXT: v_mov_b32_e32 v0, v1 121; GFX900-NEXT: s_setpc_b64 s[30:31] 122; 123; GFX906-LABEL: load_local_lo_v2i16_zerolo: 124; GFX906: ; %bb.0: ; %entry 125; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126; GFX906-NEXT: ds_read_u16 v0, v0 127; GFX906-NEXT: s_waitcnt lgkmcnt(0) 128; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 129; GFX906-NEXT: s_setpc_b64 s[30:31] 130; 131; GFX803-LABEL: load_local_lo_v2i16_zerolo: 132; GFX803: ; %bb.0: ; %entry 133; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 134; GFX803-NEXT: s_mov_b32 m0, -1 135; GFX803-NEXT: ds_read_u16 v0, v0 136; GFX803-NEXT: s_waitcnt lgkmcnt(0) 137; GFX803-NEXT: s_setpc_b64 s[30:31] 138entry: 139 %load = load i16, i16 addrspace(3)* %in 140 %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 0 141 ret <2 x i16> %build 142} 143 144define <2 x half> @load_local_lo_v2f16_fpimm(half addrspace(3)* %in) #0 { 145; GFX900-LABEL: load_local_lo_v2f16_fpimm: 146; GFX900: ; %bb.0: ; %entry 147; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 148; GFX900-NEXT: v_mov_b32_e32 v1, 2.0 149; GFX900-NEXT: ds_read_u16_d16 v1, v0 150; GFX900-NEXT: s_waitcnt lgkmcnt(0) 151; GFX900-NEXT: v_mov_b32_e32 v0, v1 152; GFX900-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX906-LABEL: load_local_lo_v2f16_fpimm: 155; GFX906: ; %bb.0: ; %entry 156; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX906-NEXT: ds_read_u16 v0, v0 158; GFX906-NEXT: s_movk_i32 s4, 0x4000 159; GFX906-NEXT: s_waitcnt lgkmcnt(0) 160; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 161; GFX906-NEXT: v_lshl_or_b32 v0, s4, 16, v0 162; GFX906-NEXT: s_setpc_b64 s[30:31] 163; 164; GFX803-LABEL: load_local_lo_v2f16_fpimm: 165; GFX803: ; %bb.0: ; %entry 166; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 167; GFX803-NEXT: s_mov_b32 m0, -1 168; GFX803-NEXT: ds_read_u16 v0, v0 169; GFX803-NEXT: s_waitcnt lgkmcnt(0) 170; GFX803-NEXT: v_or_b32_e32 v0, 2.0, v0 171; GFX803-NEXT: s_setpc_b64 s[30:31] 172entry: 173 %load = load half, half addrspace(3)* %in 174 %build = insertelement <2 x half> <half 0.0, half 2.0>, half %load, i32 0 175 ret <2 x half> %build 176} 177 178define void @load_local_lo_v2f16_reghi_vreg(half addrspace(3)* %in, i32 %reg) #0 { 179; GFX900-LABEL: load_local_lo_v2f16_reghi_vreg: 180; GFX900: ; %bb.0: ; %entry 181; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 182; GFX900-NEXT: ds_read_u16_d16 v1, v0 183; GFX900-NEXT: s_waitcnt lgkmcnt(0) 184; GFX900-NEXT: global_store_dword v[0:1], v1, off 185; GFX900-NEXT: s_waitcnt vmcnt(0) 186; GFX900-NEXT: s_setpc_b64 s[30:31] 187; 188; GFX906-LABEL: load_local_lo_v2f16_reghi_vreg: 189; GFX906: ; %bb.0: ; %entry 190; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 191; GFX906-NEXT: ds_read_u16 v0, v0 192; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v1 193; GFX906-NEXT: s_waitcnt lgkmcnt(0) 194; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 195; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 196; GFX906-NEXT: global_store_dword v[0:1], v0, off 197; GFX906-NEXT: s_waitcnt vmcnt(0) 198; GFX906-NEXT: s_setpc_b64 s[30:31] 199; 200; GFX803-LABEL: load_local_lo_v2f16_reghi_vreg: 201; GFX803: ; %bb.0: ; %entry 202; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 203; GFX803-NEXT: s_mov_b32 m0, -1 204; GFX803-NEXT: ds_read_u16 v0, v0 205; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 206; GFX803-NEXT: s_waitcnt lgkmcnt(0) 207; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 208; GFX803-NEXT: flat_store_dword v[0:1], v0 209; GFX803-NEXT: s_waitcnt vmcnt(0) 210; GFX803-NEXT: s_setpc_b64 s[30:31] 211entry: 212 %reg.bc = bitcast i32 %reg to <2 x half> 213 %load = load half, half addrspace(3)* %in 214 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 215 store <2 x half> %build1, <2 x half> addrspace(1)* undef 216 ret void 217} 218 219define void @load_local_lo_v2f16_reglo_vreg(half addrspace(3)* %in, half %reg) #0 { 220; GFX900-LABEL: load_local_lo_v2f16_reglo_vreg: 221; GFX900: ; %bb.0: ; %entry 222; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 223; GFX900-NEXT: ds_read_u16 v0, v0 224; GFX900-NEXT: s_waitcnt lgkmcnt(0) 225; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 226; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 227; GFX900-NEXT: global_store_dword v[0:1], v0, off 228; GFX900-NEXT: s_waitcnt vmcnt(0) 229; GFX900-NEXT: s_setpc_b64 s[30:31] 230; 231; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg: 232; GFX906: ; %bb.0: ; %entry 233; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 234; GFX906-NEXT: ds_read_u16 v0, v0 235; GFX906-NEXT: s_waitcnt lgkmcnt(0) 236; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 237; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 238; GFX906-NEXT: global_store_dword v[0:1], v0, off 239; GFX906-NEXT: s_waitcnt vmcnt(0) 240; GFX906-NEXT: s_setpc_b64 s[30:31] 241; 242; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg: 243; GFX803: ; %bb.0: ; %entry 244; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX803-NEXT: s_mov_b32 m0, -1 246; GFX803-NEXT: ds_read_u16 v0, v0 247; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 248; GFX803-NEXT: s_waitcnt lgkmcnt(0) 249; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 250; GFX803-NEXT: flat_store_dword v[0:1], v0 251; GFX803-NEXT: s_waitcnt vmcnt(0) 252; GFX803-NEXT: s_setpc_b64 s[30:31] 253entry: 254 %load = load half, half addrspace(3)* %in 255 %build0 = insertelement <2 x half> undef, half %reg, i32 1 256 %build1 = insertelement <2 x half> %build0, half %load, i32 0 257 store <2 x half> %build1, <2 x half> addrspace(1)* undef 258 ret void 259} 260 261define void @load_local_lo_v2i16_reghi_vreg_zexti8(i8 addrspace(3)* %in, i32 %reg) #0 { 262; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8: 263; GFX900: ; %bb.0: ; %entry 264; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 265; GFX900-NEXT: ds_read_u8_d16 v1, v0 266; GFX900-NEXT: s_waitcnt lgkmcnt(0) 267; GFX900-NEXT: global_store_dword v[0:1], v1, off 268; GFX900-NEXT: s_waitcnt vmcnt(0) 269; GFX900-NEXT: s_setpc_b64 s[30:31] 270; 271; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8: 272; GFX906: ; %bb.0: ; %entry 273; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 274; GFX906-NEXT: ds_read_u8 v0, v0 275; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 276; GFX906-NEXT: s_waitcnt lgkmcnt(0) 277; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 278; GFX906-NEXT: global_store_dword v[0:1], v0, off 279; GFX906-NEXT: s_waitcnt vmcnt(0) 280; GFX906-NEXT: s_setpc_b64 s[30:31] 281; 282; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8: 283; GFX803: ; %bb.0: ; %entry 284; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 285; GFX803-NEXT: s_mov_b32 m0, -1 286; GFX803-NEXT: ds_read_u8 v0, v0 287; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1 288; GFX803-NEXT: s_mov_b32 s4, 0x5040c00 289; GFX803-NEXT: s_waitcnt lgkmcnt(0) 290; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4 291; GFX803-NEXT: flat_store_dword v[0:1], v0 292; GFX803-NEXT: s_waitcnt vmcnt(0) 293; GFX803-NEXT: s_setpc_b64 s[30:31] 294entry: 295 %reg.bc = bitcast i32 %reg to <2 x i16> 296 %load = load i8, i8 addrspace(3)* %in 297 %ext = zext i8 %load to i16 298 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 299 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 300 ret void 301} 302 303define void @load_local_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(3)* %in, i16 %reg) #0 { 304; GFX900-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8: 305; GFX900: ; %bb.0: ; %entry 306; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 307; GFX900-NEXT: ds_read_u8 v0, v0 308; GFX900-NEXT: s_waitcnt lgkmcnt(0) 309; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 310; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 311; GFX900-NEXT: global_store_dword v[0:1], v0, off 312; GFX900-NEXT: s_waitcnt vmcnt(0) 313; GFX900-NEXT: s_setpc_b64 s[30:31] 314; 315; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8: 316; GFX906: ; %bb.0: ; %entry 317; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 318; GFX906-NEXT: ds_read_u8 v0, v0 319; GFX906-NEXT: s_waitcnt lgkmcnt(0) 320; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 321; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 322; GFX906-NEXT: global_store_dword v[0:1], v0, off 323; GFX906-NEXT: s_waitcnt vmcnt(0) 324; GFX906-NEXT: s_setpc_b64 s[30:31] 325; 326; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8: 327; GFX803: ; %bb.0: ; %entry 328; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 329; GFX803-NEXT: s_mov_b32 m0, -1 330; GFX803-NEXT: ds_read_u8 v0, v0 331; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 332; GFX803-NEXT: s_waitcnt lgkmcnt(0) 333; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 334; GFX803-NEXT: flat_store_dword v[0:1], v0 335; GFX803-NEXT: s_waitcnt vmcnt(0) 336; GFX803-NEXT: s_setpc_b64 s[30:31] 337entry: 338 %load = load i8, i8 addrspace(3)* %in 339 %ext = zext i8 %load to i16 340 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 341 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0 342 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 343 ret void 344} 345 346define void @load_local_lo_v2i16_reghi_vreg_sexti8(i8 addrspace(3)* %in, i32 %reg) #0 { 347; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8: 348; GFX900: ; %bb.0: ; %entry 349; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 350; GFX900-NEXT: ds_read_i8_d16 v1, v0 351; GFX900-NEXT: s_waitcnt lgkmcnt(0) 352; GFX900-NEXT: global_store_dword v[0:1], v1, off 353; GFX900-NEXT: s_waitcnt vmcnt(0) 354; GFX900-NEXT: s_setpc_b64 s[30:31] 355; 356; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8: 357; GFX906: ; %bb.0: ; %entry 358; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 359; GFX906-NEXT: ds_read_i8 v0, v0 360; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 361; GFX906-NEXT: s_waitcnt lgkmcnt(0) 362; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 363; GFX906-NEXT: global_store_dword v[0:1], v0, off 364; GFX906-NEXT: s_waitcnt vmcnt(0) 365; GFX906-NEXT: s_setpc_b64 s[30:31] 366; 367; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8: 368; GFX803: ; %bb.0: ; %entry 369; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 370; GFX803-NEXT: s_mov_b32 m0, -1 371; GFX803-NEXT: ds_read_i8 v0, v0 372; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 373; GFX803-NEXT: s_waitcnt lgkmcnt(0) 374; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 375; GFX803-NEXT: flat_store_dword v[0:1], v0 376; GFX803-NEXT: s_waitcnt vmcnt(0) 377; GFX803-NEXT: s_setpc_b64 s[30:31] 378entry: 379 %reg.bc = bitcast i32 %reg to <2 x i16> 380 %load = load i8, i8 addrspace(3)* %in 381 %ext = sext i8 %load to i16 382 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 383 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 384 ret void 385} 386 387define void @load_local_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(3)* %in, i16 %reg) #0 { 388; GFX900-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8: 389; GFX900: ; %bb.0: ; %entry 390; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 391; GFX900-NEXT: ds_read_i8 v0, v0 392; GFX900-NEXT: s_waitcnt lgkmcnt(0) 393; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 394; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 395; GFX900-NEXT: global_store_dword v[0:1], v0, off 396; GFX900-NEXT: s_waitcnt vmcnt(0) 397; GFX900-NEXT: s_setpc_b64 s[30:31] 398; 399; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8: 400; GFX906: ; %bb.0: ; %entry 401; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 402; GFX906-NEXT: ds_read_i8 v0, v0 403; GFX906-NEXT: s_waitcnt lgkmcnt(0) 404; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 405; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 406; GFX906-NEXT: global_store_dword v[0:1], v0, off 407; GFX906-NEXT: s_waitcnt vmcnt(0) 408; GFX906-NEXT: s_setpc_b64 s[30:31] 409; 410; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8: 411; GFX803: ; %bb.0: ; %entry 412; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 413; GFX803-NEXT: s_mov_b32 m0, -1 414; GFX803-NEXT: ds_read_i8 v0, v0 415; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 416; GFX803-NEXT: s_waitcnt lgkmcnt(0) 417; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 418; GFX803-NEXT: flat_store_dword v[0:1], v0 419; GFX803-NEXT: s_waitcnt vmcnt(0) 420; GFX803-NEXT: s_setpc_b64 s[30:31] 421entry: 422 %load = load i8, i8 addrspace(3)* %in 423 %ext = sext i8 %load to i16 424 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 425 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0 426 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 427 ret void 428} 429 430define void @load_local_lo_v2f16_reglo_vreg_zexti8(i8 addrspace(3)* %in, half %reg) #0 { 431; GFX900-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8: 432; GFX900: ; %bb.0: ; %entry 433; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX900-NEXT: ds_read_u8 v0, v0 435; GFX900-NEXT: s_waitcnt lgkmcnt(0) 436; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 437; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 438; GFX900-NEXT: global_store_dword v[0:1], v0, off 439; GFX900-NEXT: s_waitcnt vmcnt(0) 440; GFX900-NEXT: s_setpc_b64 s[30:31] 441; 442; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8: 443; GFX906: ; %bb.0: ; %entry 444; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 445; GFX906-NEXT: ds_read_u8 v0, v0 446; GFX906-NEXT: s_waitcnt lgkmcnt(0) 447; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 448; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 449; GFX906-NEXT: global_store_dword v[0:1], v0, off 450; GFX906-NEXT: s_waitcnt vmcnt(0) 451; GFX906-NEXT: s_setpc_b64 s[30:31] 452; 453; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8: 454; GFX803: ; %bb.0: ; %entry 455; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 456; GFX803-NEXT: s_mov_b32 m0, -1 457; GFX803-NEXT: ds_read_u8 v0, v0 458; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 459; GFX803-NEXT: s_waitcnt lgkmcnt(0) 460; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 461; GFX803-NEXT: flat_store_dword v[0:1], v0 462; GFX803-NEXT: s_waitcnt vmcnt(0) 463; GFX803-NEXT: s_setpc_b64 s[30:31] 464entry: 465 %load = load i8, i8 addrspace(3)* %in 466 %ext = zext i8 %load to i16 467 %bitcast = bitcast i16 %ext to half 468 %build0 = insertelement <2 x half> undef, half %reg, i32 1 469 %build1 = insertelement <2 x half> %build0, half %bitcast, i32 0 470 store <2 x half> %build1, <2 x half> addrspace(1)* undef 471 ret void 472} 473 474define void @load_local_lo_v2f16_reglo_vreg_sexti8(i8 addrspace(3)* %in, half %reg) #0 { 475; GFX900-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8: 476; GFX900: ; %bb.0: ; %entry 477; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 478; GFX900-NEXT: ds_read_i8 v0, v0 479; GFX900-NEXT: s_waitcnt lgkmcnt(0) 480; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 481; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 482; GFX900-NEXT: global_store_dword v[0:1], v0, off 483; GFX900-NEXT: s_waitcnt vmcnt(0) 484; GFX900-NEXT: s_setpc_b64 s[30:31] 485; 486; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8: 487; GFX906: ; %bb.0: ; %entry 488; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 489; GFX906-NEXT: ds_read_i8 v0, v0 490; GFX906-NEXT: s_waitcnt lgkmcnt(0) 491; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 492; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 493; GFX906-NEXT: global_store_dword v[0:1], v0, off 494; GFX906-NEXT: s_waitcnt vmcnt(0) 495; GFX906-NEXT: s_setpc_b64 s[30:31] 496; 497; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8: 498; GFX803: ; %bb.0: ; %entry 499; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 500; GFX803-NEXT: s_mov_b32 m0, -1 501; GFX803-NEXT: ds_read_i8 v0, v0 502; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 503; GFX803-NEXT: s_waitcnt lgkmcnt(0) 504; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 505; GFX803-NEXT: flat_store_dword v[0:1], v0 506; GFX803-NEXT: s_waitcnt vmcnt(0) 507; GFX803-NEXT: s_setpc_b64 s[30:31] 508entry: 509 %load = load i8, i8 addrspace(3)* %in 510 %ext = sext i8 %load to i16 511 %bitcast = bitcast i16 %ext to half 512 %build0 = insertelement <2 x half> undef, half %reg, i32 1 513 %build1 = insertelement <2 x half> %build0, half %bitcast, i32 0 514 store <2 x half> %build1, <2 x half> addrspace(1)* undef 515 ret void 516} 517 518define void @load_local_lo_v2i16_reghi_vreg_multi_use_lo(i16 addrspace(3)* %in, <2 x i16> %reg) #0 { 519; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo: 520; GFX900: ; %bb.0: ; %entry 521; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 522; GFX900-NEXT: ds_read_u16 v0, v0 523; GFX900-NEXT: v_mov_b32_e32 v2, 0 524; GFX900-NEXT: v_mov_b32_e32 v3, 0xffff 525; GFX900-NEXT: s_waitcnt lgkmcnt(0) 526; GFX900-NEXT: ds_write_b16 v2, v0 527; GFX900-NEXT: v_bfi_b32 v0, v3, v0, v1 528; GFX900-NEXT: global_store_dword v[0:1], v0, off 529; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 530; GFX900-NEXT: s_setpc_b64 s[30:31] 531; 532; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo: 533; GFX906: ; %bb.0: ; %entry 534; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 535; GFX906-NEXT: ds_read_u16 v0, v0 536; GFX906-NEXT: v_mov_b32_e32 v2, 0 537; GFX906-NEXT: v_mov_b32_e32 v3, 0xffff 538; GFX906-NEXT: s_waitcnt lgkmcnt(0) 539; GFX906-NEXT: ds_write_b16 v2, v0 540; GFX906-NEXT: v_bfi_b32 v0, v3, v0, v1 541; GFX906-NEXT: global_store_dword v[0:1], v0, off 542; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 543; GFX906-NEXT: s_setpc_b64 s[30:31] 544; 545; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo: 546; GFX803: ; %bb.0: ; %entry 547; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 548; GFX803-NEXT: s_mov_b32 m0, -1 549; GFX803-NEXT: ds_read_u16 v0, v0 550; GFX803-NEXT: v_mov_b32_e32 v2, 0 551; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 552; GFX803-NEXT: s_waitcnt lgkmcnt(0) 553; GFX803-NEXT: ds_write_b16 v2, v0 554; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 555; GFX803-NEXT: flat_store_dword v[0:1], v0 556; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 557; GFX803-NEXT: s_setpc_b64 s[30:31] 558entry: 559 %load = load i16, i16 addrspace(3)* %in 560 %elt1 = extractelement <2 x i16> %reg, i32 1 561 store i16 %load, i16 addrspace(3)* null 562 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0 563 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 564 ret void 565} 566 567define void @load_local_lo_v2i16_reghi_vreg_multi_use_hi(i16 addrspace(3)* %in, <2 x i16> %reg) #0 { 568; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi: 569; GFX900: ; %bb.0: ; %entry 570; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 571; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v1 572; GFX900-NEXT: ds_read_u16_d16 v1, v0 573; GFX900-NEXT: v_mov_b32_e32 v0, 0 574; GFX900-NEXT: ds_write_b16 v0, v2 575; GFX900-NEXT: s_waitcnt lgkmcnt(1) 576; GFX900-NEXT: global_store_dword v[0:1], v1, off 577; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 578; GFX900-NEXT: s_setpc_b64 s[30:31] 579; 580; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi: 581; GFX906: ; %bb.0: ; %entry 582; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 583; GFX906-NEXT: ds_read_u16 v0, v0 584; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v1 585; GFX906-NEXT: v_mov_b32_e32 v3, 0 586; GFX906-NEXT: ds_write_b16 v3, v2 587; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 588; GFX906-NEXT: s_waitcnt lgkmcnt(1) 589; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 590; GFX906-NEXT: global_store_dword v[0:1], v0, off 591; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 592; GFX906-NEXT: s_setpc_b64 s[30:31] 593; 594; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi: 595; GFX803: ; %bb.0: ; %entry 596; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 597; GFX803-NEXT: s_mov_b32 m0, -1 598; GFX803-NEXT: ds_read_u16 v0, v0 599; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1 600; GFX803-NEXT: v_mov_b32_e32 v2, 0 601; GFX803-NEXT: ds_write_b16 v2, v1 602; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 603; GFX803-NEXT: s_waitcnt lgkmcnt(1) 604; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 605; GFX803-NEXT: flat_store_dword v[0:1], v0 606; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 607; GFX803-NEXT: s_setpc_b64 s[30:31] 608entry: 609 %load = load i16, i16 addrspace(3)* %in 610 %elt1 = extractelement <2 x i16> %reg, i32 1 611 store i16 %elt1, i16 addrspace(3)* null 612 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0 613 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 614 ret void 615} 616 617define void @load_local_lo_v2i16_reghi_vreg_multi_use_lohi(i16 addrspace(3)* noalias %in, <2 x i16> %reg, i16 addrspace(3)* noalias %out0, i16 addrspace(3)* noalias %out1) #0 { 618; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi: 619; GFX900: ; %bb.0: ; %entry 620; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 621; GFX900-NEXT: ds_read_u16 v0, v0 622; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v1 623; GFX900-NEXT: s_waitcnt lgkmcnt(0) 624; GFX900-NEXT: ds_write_b16 v2, v0 625; GFX900-NEXT: ds_write_b16 v3, v4 626; GFX900-NEXT: v_mov_b32_e32 v2, 0xffff 627; GFX900-NEXT: v_bfi_b32 v0, v2, v0, v1 628; GFX900-NEXT: global_store_dword v[0:1], v0, off 629; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 630; GFX900-NEXT: s_setpc_b64 s[30:31] 631; 632; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi: 633; GFX906: ; %bb.0: ; %entry 634; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 635; GFX906-NEXT: ds_read_u16 v0, v0 636; GFX906-NEXT: v_lshrrev_b32_e32 v4, 16, v1 637; GFX906-NEXT: s_waitcnt lgkmcnt(0) 638; GFX906-NEXT: ds_write_b16 v2, v0 639; GFX906-NEXT: ds_write_b16 v3, v4 640; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 641; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 642; GFX906-NEXT: global_store_dword v[0:1], v0, off 643; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 644; GFX906-NEXT: s_setpc_b64 s[30:31] 645; 646; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi: 647; GFX803: ; %bb.0: ; %entry 648; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 649; GFX803-NEXT: s_mov_b32 m0, -1 650; GFX803-NEXT: ds_read_u16 v0, v0 651; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1 652; GFX803-NEXT: s_waitcnt lgkmcnt(0) 653; GFX803-NEXT: ds_write_b16 v2, v0 654; GFX803-NEXT: ds_write_b16 v3, v1 655; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 656; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 657; GFX803-NEXT: flat_store_dword v[0:1], v0 658; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 659; GFX803-NEXT: s_setpc_b64 s[30:31] 660entry: 661 %load = load i16, i16 addrspace(3)* %in 662 %elt1 = extractelement <2 x i16> %reg, i32 1 663 store i16 %load, i16 addrspace(3)* %out0 664 store i16 %elt1, i16 addrspace(3)* %out1 665 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0 666 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 667 ret void 668} 669 670define void @load_global_lo_v2i16_reglo_vreg(i16 addrspace(1)* %in, i32 %reg) #0 { 671; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg: 672; GFX900: ; %bb.0: ; %entry 673; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 674; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 675; GFX900-NEXT: s_waitcnt vmcnt(0) 676; GFX900-NEXT: global_store_dword v[0:1], v2, off 677; GFX900-NEXT: s_waitcnt vmcnt(0) 678; GFX900-NEXT: s_setpc_b64 s[30:31] 679; 680; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg: 681; GFX906: ; %bb.0: ; %entry 682; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 683; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094 684; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 685; GFX906-NEXT: s_waitcnt vmcnt(0) 686; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 687; GFX906-NEXT: global_store_dword v[0:1], v0, off 688; GFX906-NEXT: s_waitcnt vmcnt(0) 689; GFX906-NEXT: s_setpc_b64 s[30:31] 690; 691; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg: 692; GFX803: ; %bb.0: ; %entry 693; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 694; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0 695; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 696; GFX803-NEXT: flat_load_ushort v0, v[0:1] 697; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 698; GFX803-NEXT: s_waitcnt vmcnt(0) 699; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 700; GFX803-NEXT: flat_store_dword v[0:1], v0 701; GFX803-NEXT: s_waitcnt vmcnt(0) 702; GFX803-NEXT: s_setpc_b64 s[30:31] 703entry: 704 %reg.bc = bitcast i32 %reg to <2 x i16> 705 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047 706 %load = load i16, i16 addrspace(1)* %gep 707 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 708 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 709 ret void 710} 711 712define void @load_global_lo_v2f16_reglo_vreg(half addrspace(1)* %in, i32 %reg) #0 { 713; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg: 714; GFX900: ; %bb.0: ; %entry 715; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 716; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 717; GFX900-NEXT: s_waitcnt vmcnt(0) 718; GFX900-NEXT: global_store_dword v[0:1], v2, off 719; GFX900-NEXT: s_waitcnt vmcnt(0) 720; GFX900-NEXT: s_setpc_b64 s[30:31] 721; 722; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg: 723; GFX906: ; %bb.0: ; %entry 724; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 725; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094 726; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 727; GFX906-NEXT: s_waitcnt vmcnt(0) 728; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 729; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 730; GFX906-NEXT: global_store_dword v[0:1], v0, off 731; GFX906-NEXT: s_waitcnt vmcnt(0) 732; GFX906-NEXT: s_setpc_b64 s[30:31] 733; 734; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg: 735; GFX803: ; %bb.0: ; %entry 736; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 737; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0 738; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 739; GFX803-NEXT: flat_load_ushort v0, v[0:1] 740; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 741; GFX803-NEXT: s_waitcnt vmcnt(0) 742; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 743; GFX803-NEXT: flat_store_dword v[0:1], v0 744; GFX803-NEXT: s_waitcnt vmcnt(0) 745; GFX803-NEXT: s_setpc_b64 s[30:31] 746entry: 747 %reg.bc = bitcast i32 %reg to <2 x half> 748 %gep = getelementptr inbounds half, half addrspace(1)* %in, i64 -2047 749 %load = load half, half addrspace(1)* %gep 750 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 751 store <2 x half> %build1, <2 x half> addrspace(1)* undef 752 ret void 753} 754 755define void @load_global_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i32 %reg) #0 { 756; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8: 757; GFX900: ; %bb.0: ; %entry 758; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 759; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095 760; GFX900-NEXT: s_waitcnt vmcnt(0) 761; GFX900-NEXT: global_store_dword v[0:1], v2, off 762; GFX900-NEXT: s_waitcnt vmcnt(0) 763; GFX900-NEXT: s_setpc_b64 s[30:31] 764; 765; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8: 766; GFX906: ; %bb.0: ; %entry 767; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 768; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095 769; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 770; GFX906-NEXT: s_waitcnt vmcnt(0) 771; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 772; GFX906-NEXT: global_store_dword v[0:1], v0, off 773; GFX906-NEXT: s_waitcnt vmcnt(0) 774; GFX906-NEXT: s_setpc_b64 s[30:31] 775; 776; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8: 777; GFX803: ; %bb.0: ; %entry 778; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 779; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0 780; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 781; GFX803-NEXT: flat_load_ubyte v0, v[0:1] 782; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2 783; GFX803-NEXT: s_mov_b32 s4, 0x5040c00 784; GFX803-NEXT: s_waitcnt vmcnt(0) 785; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4 786; GFX803-NEXT: flat_store_dword v[0:1], v0 787; GFX803-NEXT: s_waitcnt vmcnt(0) 788; GFX803-NEXT: s_setpc_b64 s[30:31] 789entry: 790 %reg.bc = bitcast i32 %reg to <2 x i16> 791 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095 792 %load = load i8, i8 addrspace(1)* %gep 793 %ext = zext i8 %load to i16 794 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 795 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 796 ret void 797} 798 799define void @load_global_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i32 %reg) #0 { 800; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8: 801; GFX900: ; %bb.0: ; %entry 802; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 803; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095 804; GFX900-NEXT: s_waitcnt vmcnt(0) 805; GFX900-NEXT: global_store_dword v[0:1], v2, off 806; GFX900-NEXT: s_waitcnt vmcnt(0) 807; GFX900-NEXT: s_setpc_b64 s[30:31] 808; 809; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8: 810; GFX906: ; %bb.0: ; %entry 811; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 812; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095 813; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 814; GFX906-NEXT: s_waitcnt vmcnt(0) 815; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 816; GFX906-NEXT: global_store_dword v[0:1], v0, off 817; GFX906-NEXT: s_waitcnt vmcnt(0) 818; GFX906-NEXT: s_setpc_b64 s[30:31] 819; 820; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8: 821; GFX803: ; %bb.0: ; %entry 822; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 823; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0 824; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 825; GFX803-NEXT: flat_load_sbyte v0, v[0:1] 826; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 827; GFX803-NEXT: s_waitcnt vmcnt(0) 828; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 829; GFX803-NEXT: flat_store_dword v[0:1], v0 830; GFX803-NEXT: s_waitcnt vmcnt(0) 831; GFX803-NEXT: s_setpc_b64 s[30:31] 832entry: 833 %reg.bc = bitcast i32 %reg to <2 x i16> 834 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095 835 %load = load i8, i8 addrspace(1)* %gep 836 %ext = sext i8 %load to i16 837 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 838 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 839 ret void 840} 841 842define void @load_global_lo_v2f16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i32 %reg) #0 { 843; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8: 844; GFX900: ; %bb.0: ; %entry 845; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 846; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095 847; GFX900-NEXT: s_waitcnt vmcnt(0) 848; GFX900-NEXT: global_store_dword v[0:1], v2, off 849; GFX900-NEXT: s_waitcnt vmcnt(0) 850; GFX900-NEXT: s_setpc_b64 s[30:31] 851; 852; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8: 853; GFX906: ; %bb.0: ; %entry 854; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 855; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095 856; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 857; GFX906-NEXT: s_waitcnt vmcnt(0) 858; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 859; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 860; GFX906-NEXT: global_store_dword v[0:1], v0, off 861; GFX906-NEXT: s_waitcnt vmcnt(0) 862; GFX906-NEXT: s_setpc_b64 s[30:31] 863; 864; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8: 865; GFX803: ; %bb.0: ; %entry 866; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 867; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0 868; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 869; GFX803-NEXT: flat_load_ubyte v0, v[0:1] 870; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2 871; GFX803-NEXT: s_mov_b32 s4, 0x5040c00 872; GFX803-NEXT: s_waitcnt vmcnt(0) 873; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4 874; GFX803-NEXT: flat_store_dword v[0:1], v0 875; GFX803-NEXT: s_waitcnt vmcnt(0) 876; GFX803-NEXT: s_setpc_b64 s[30:31] 877entry: 878 %reg.bc = bitcast i32 %reg to <2 x half> 879 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095 880 %load = load i8, i8 addrspace(1)* %gep 881 %ext = zext i8 %load to i16 882 %bitcast = bitcast i16 %ext to half 883 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 884 store <2 x half> %build1, <2 x half> addrspace(1)* undef 885 ret void 886} 887 888define void @load_global_lo_v2f16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i32 %reg) #0 { 889; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8: 890; GFX900: ; %bb.0: ; %entry 891; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 892; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095 893; GFX900-NEXT: s_waitcnt vmcnt(0) 894; GFX900-NEXT: global_store_dword v[0:1], v2, off 895; GFX900-NEXT: s_waitcnt vmcnt(0) 896; GFX900-NEXT: s_setpc_b64 s[30:31] 897; 898; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8: 899; GFX906: ; %bb.0: ; %entry 900; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 901; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095 902; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 903; GFX906-NEXT: s_waitcnt vmcnt(0) 904; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 905; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 906; GFX906-NEXT: global_store_dword v[0:1], v0, off 907; GFX906-NEXT: s_waitcnt vmcnt(0) 908; GFX906-NEXT: s_setpc_b64 s[30:31] 909; 910; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8: 911; GFX803: ; %bb.0: ; %entry 912; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 913; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0 914; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 915; GFX803-NEXT: flat_load_sbyte v0, v[0:1] 916; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 917; GFX803-NEXT: s_waitcnt vmcnt(0) 918; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 919; GFX803-NEXT: flat_store_dword v[0:1], v0 920; GFX803-NEXT: s_waitcnt vmcnt(0) 921; GFX803-NEXT: s_setpc_b64 s[30:31] 922entry: 923 %reg.bc = bitcast i32 %reg to <2 x half> 924 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095 925 %load = load i8, i8 addrspace(1)* %gep 926 %ext = sext i8 %load to i16 927 %bitcast = bitcast i16 %ext to half 928 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 929 store <2 x half> %build1, <2 x half> addrspace(1)* undef 930 ret void 931} 932 933define void @load_flat_lo_v2i16_reghi_vreg(i16* %in, i32 %reg) #0 { 934; GFX900-LABEL: load_flat_lo_v2i16_reghi_vreg: 935; GFX900: ; %bb.0: ; %entry 936; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 937; GFX900-NEXT: flat_load_short_d16 v2, v[0:1] 938; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 939; GFX900-NEXT: global_store_dword v[0:1], v2, off 940; GFX900-NEXT: s_waitcnt vmcnt(0) 941; GFX900-NEXT: s_setpc_b64 s[30:31] 942; 943; GFX906-LABEL: load_flat_lo_v2i16_reghi_vreg: 944; GFX906: ; %bb.0: ; %entry 945; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 946; GFX906-NEXT: flat_load_ushort v0, v[0:1] 947; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 948; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 949; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 950; GFX906-NEXT: global_store_dword v[0:1], v0, off 951; GFX906-NEXT: s_waitcnt vmcnt(0) 952; GFX906-NEXT: s_setpc_b64 s[30:31] 953; 954; GFX803-LABEL: load_flat_lo_v2i16_reghi_vreg: 955; GFX803: ; %bb.0: ; %entry 956; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 957; GFX803-NEXT: flat_load_ushort v0, v[0:1] 958; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 959; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 960; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 961; GFX803-NEXT: flat_store_dword v[0:1], v0 962; GFX803-NEXT: s_waitcnt vmcnt(0) 963; GFX803-NEXT: s_setpc_b64 s[30:31] 964entry: 965 %reg.bc = bitcast i32 %reg to <2 x i16> 966 %load = load i16, i16* %in 967 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 968 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 969 ret void 970} 971 972define void @load_flat_lo_v2f16_reghi_vreg(half* %in, i32 %reg) #0 { 973; GFX900-LABEL: load_flat_lo_v2f16_reghi_vreg: 974; GFX900: ; %bb.0: ; %entry 975; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 976; GFX900-NEXT: flat_load_short_d16 v2, v[0:1] 977; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 978; GFX900-NEXT: global_store_dword v[0:1], v2, off 979; GFX900-NEXT: s_waitcnt vmcnt(0) 980; GFX900-NEXT: s_setpc_b64 s[30:31] 981; 982; GFX906-LABEL: load_flat_lo_v2f16_reghi_vreg: 983; GFX906: ; %bb.0: ; %entry 984; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 985; GFX906-NEXT: flat_load_ushort v0, v[0:1] 986; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 987; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 988; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 989; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 990; GFX906-NEXT: global_store_dword v[0:1], v0, off 991; GFX906-NEXT: s_waitcnt vmcnt(0) 992; GFX906-NEXT: s_setpc_b64 s[30:31] 993; 994; GFX803-LABEL: load_flat_lo_v2f16_reghi_vreg: 995; GFX803: ; %bb.0: ; %entry 996; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 997; GFX803-NEXT: flat_load_ushort v0, v[0:1] 998; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 999; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1000; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1001; GFX803-NEXT: flat_store_dword v[0:1], v0 1002; GFX803-NEXT: s_waitcnt vmcnt(0) 1003; GFX803-NEXT: s_setpc_b64 s[30:31] 1004 1005; FIXME: the and above should be removable 1006entry: 1007 %reg.bc = bitcast i32 %reg to <2 x half> 1008 %load = load half, half* %in 1009 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 1010 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1011 ret void 1012} 1013 1014define void @load_flat_lo_v2i16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 { 1015; GFX900-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8: 1016; GFX900: ; %bb.0: ; %entry 1017; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1018; GFX900-NEXT: flat_load_ubyte_d16 v2, v[0:1] 1019; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1020; GFX900-NEXT: global_store_dword v[0:1], v2, off 1021; GFX900-NEXT: s_waitcnt vmcnt(0) 1022; GFX900-NEXT: s_setpc_b64 s[30:31] 1023; 1024; GFX906-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8: 1025; GFX906: ; %bb.0: ; %entry 1026; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1027; GFX906-NEXT: flat_load_ubyte v0, v[0:1] 1028; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 1029; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1030; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 1031; GFX906-NEXT: global_store_dword v[0:1], v0, off 1032; GFX906-NEXT: s_waitcnt vmcnt(0) 1033; GFX906-NEXT: s_setpc_b64 s[30:31] 1034; 1035; GFX803-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8: 1036; GFX803: ; %bb.0: ; %entry 1037; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1038; GFX803-NEXT: flat_load_ubyte v0, v[0:1] 1039; GFX803-NEXT: v_lshrrev_b32_e32 v2, 16, v2 1040; GFX803-NEXT: s_mov_b32 s4, 0x5040c00 1041; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1042; GFX803-NEXT: v_perm_b32 v0, v2, v0, s4 1043; GFX803-NEXT: flat_store_dword v[0:1], v0 1044; GFX803-NEXT: s_waitcnt vmcnt(0) 1045; GFX803-NEXT: s_setpc_b64 s[30:31] 1046entry: 1047 %reg.bc = bitcast i32 %reg to <2 x i16> 1048 %load = load i8, i8* %in 1049 %ext = zext i8 %load to i16 1050 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 1051 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1052 ret void 1053} 1054 1055define void @load_flat_lo_v2i16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 { 1056; GFX900-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8: 1057; GFX900: ; %bb.0: ; %entry 1058; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1059; GFX900-NEXT: flat_load_sbyte_d16 v2, v[0:1] 1060; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1061; GFX900-NEXT: global_store_dword v[0:1], v2, off 1062; GFX900-NEXT: s_waitcnt vmcnt(0) 1063; GFX900-NEXT: s_setpc_b64 s[30:31] 1064; 1065; GFX906-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8: 1066; GFX906: ; %bb.0: ; %entry 1067; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1068; GFX906-NEXT: flat_load_sbyte v0, v[0:1] 1069; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 1070; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1071; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 1072; GFX906-NEXT: global_store_dword v[0:1], v0, off 1073; GFX906-NEXT: s_waitcnt vmcnt(0) 1074; GFX906-NEXT: s_setpc_b64 s[30:31] 1075; 1076; GFX803-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8: 1077; GFX803: ; %bb.0: ; %entry 1078; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1079; GFX803-NEXT: flat_load_sbyte v0, v[0:1] 1080; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 1081; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1082; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1083; GFX803-NEXT: flat_store_dword v[0:1], v0 1084; GFX803-NEXT: s_waitcnt vmcnt(0) 1085; GFX803-NEXT: s_setpc_b64 s[30:31] 1086entry: 1087 %reg.bc = bitcast i32 %reg to <2 x i16> 1088 %load = load i8, i8* %in 1089 %ext = sext i8 %load to i16 1090 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 1091 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1092 ret void 1093} 1094 1095define void @load_flat_lo_v2f16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 { 1096; GFX900-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8: 1097; GFX900: ; %bb.0: ; %entry 1098; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1099; GFX900-NEXT: flat_load_ubyte_d16 v2, v[0:1] 1100; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1101; GFX900-NEXT: global_store_dword v[0:1], v2, off 1102; GFX900-NEXT: s_waitcnt vmcnt(0) 1103; GFX900-NEXT: s_setpc_b64 s[30:31] 1104; 1105; GFX906-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8: 1106; GFX906: ; %bb.0: ; %entry 1107; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1108; GFX906-NEXT: flat_load_ubyte v0, v[0:1] 1109; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 1110; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1111; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1112; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1113; GFX906-NEXT: global_store_dword v[0:1], v0, off 1114; GFX906-NEXT: s_waitcnt vmcnt(0) 1115; GFX906-NEXT: s_setpc_b64 s[30:31] 1116; 1117; GFX803-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8: 1118; GFX803: ; %bb.0: ; %entry 1119; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1120; GFX803-NEXT: flat_load_ubyte v0, v[0:1] 1121; GFX803-NEXT: v_lshrrev_b32_e32 v2, 16, v2 1122; GFX803-NEXT: s_mov_b32 s4, 0x5040c00 1123; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1124; GFX803-NEXT: v_perm_b32 v0, v2, v0, s4 1125; GFX803-NEXT: flat_store_dword v[0:1], v0 1126; GFX803-NEXT: s_waitcnt vmcnt(0) 1127; GFX803-NEXT: s_setpc_b64 s[30:31] 1128entry: 1129 %reg.bc = bitcast i32 %reg to <2 x half> 1130 %load = load i8, i8* %in 1131 %ext = zext i8 %load to i16 1132 %bitcast = bitcast i16 %ext to half 1133 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 1134 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1135 ret void 1136} 1137 1138define void @load_flat_lo_v2f16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 { 1139; GFX900-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8: 1140; GFX900: ; %bb.0: ; %entry 1141; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1142; GFX900-NEXT: flat_load_sbyte_d16 v2, v[0:1] 1143; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1144; GFX900-NEXT: global_store_dword v[0:1], v2, off 1145; GFX900-NEXT: s_waitcnt vmcnt(0) 1146; GFX900-NEXT: s_setpc_b64 s[30:31] 1147; 1148; GFX906-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8: 1149; GFX906: ; %bb.0: ; %entry 1150; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1151; GFX906-NEXT: flat_load_sbyte v0, v[0:1] 1152; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 1153; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1154; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1155; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1156; GFX906-NEXT: global_store_dword v[0:1], v0, off 1157; GFX906-NEXT: s_waitcnt vmcnt(0) 1158; GFX906-NEXT: s_setpc_b64 s[30:31] 1159; 1160; GFX803-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8: 1161; GFX803: ; %bb.0: ; %entry 1162; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1163; GFX803-NEXT: flat_load_sbyte v0, v[0:1] 1164; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 1165; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1166; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1167; GFX803-NEXT: flat_store_dword v[0:1], v0 1168; GFX803-NEXT: s_waitcnt vmcnt(0) 1169; GFX803-NEXT: s_setpc_b64 s[30:31] 1170entry: 1171 %reg.bc = bitcast i32 %reg to <2 x half> 1172 %load = load i8, i8* %in 1173 %ext = sext i8 %load to i16 1174 %bitcast = bitcast i16 %ext to half 1175 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 1176 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1177 ret void 1178} 1179 1180define void @load_private_lo_v2i16_reglo_vreg(i16 addrspace(5)* byval(i16) %in, i32 %reg) #0 { 1181; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg: 1182; GFX900-MUBUF: ; %bb.0: ; %entry 1183; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1184; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094 1185; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1186; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1187; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1188; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1189; 1190; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg: 1191; GFX906: ; %bb.0: ; %entry 1192; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1193; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1194; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1195; GFX906-NEXT: s_waitcnt vmcnt(0) 1196; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0 1197; GFX906-NEXT: global_store_dword v[0:1], v0, off 1198; GFX906-NEXT: s_waitcnt vmcnt(0) 1199; GFX906-NEXT: s_setpc_b64 s[30:31] 1200; 1201; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg: 1202; GFX803: ; %bb.0: ; %entry 1203; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1204; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1205; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 1206; GFX803-NEXT: s_waitcnt vmcnt(0) 1207; GFX803-NEXT: v_or_b32_e32 v0, v1, v0 1208; GFX803-NEXT: flat_store_dword v[0:1], v0 1209; GFX803-NEXT: s_waitcnt vmcnt(0) 1210; GFX803-NEXT: s_setpc_b64 s[30:31] 1211; 1212; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg: 1213; GFX900-FLATSCR: ; %bb.0: ; %entry 1214; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1215; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, s32 offset:4094 1216; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1217; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 1218; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1219; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1220entry: 1221 %reg.bc = bitcast i32 %reg to <2 x i16> 1222 %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2047 1223 %load = load i16, i16 addrspace(5)* %gep 1224 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 1225 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1226 ret void 1227} 1228 1229define void @load_private_lo_v2i16_reghi_vreg(i16 addrspace(5)* byval(i16) %in, i16 %reg) #0 { 1230; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reghi_vreg: 1231; GFX900-MUBUF: ; %bb.0: ; %entry 1232; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1233; GFX900-MUBUF-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1234; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1235; GFX900-MUBUF-NEXT: v_and_b32_e32 v1, 0xffff, v1 1236; GFX900-MUBUF-NEXT: v_lshl_or_b32 v0, v0, 16, v1 1237; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1238; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1239; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1240; 1241; GFX906-LABEL: load_private_lo_v2i16_reghi_vreg: 1242; GFX906: ; %bb.0: ; %entry 1243; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1244; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1245; GFX906-NEXT: s_waitcnt vmcnt(0) 1246; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1 1247; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1 1248; GFX906-NEXT: global_store_dword v[0:1], v0, off 1249; GFX906-NEXT: s_waitcnt vmcnt(0) 1250; GFX906-NEXT: s_setpc_b64 s[30:31] 1251; 1252; GFX803-LABEL: load_private_lo_v2i16_reghi_vreg: 1253; GFX803: ; %bb.0: ; %entry 1254; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1255; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1256; GFX803-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1257; GFX803-NEXT: s_waitcnt vmcnt(0) 1258; GFX803-NEXT: v_or_b32_e32 v0, v1, v0 1259; GFX803-NEXT: flat_store_dword v[0:1], v0 1260; GFX803-NEXT: s_waitcnt vmcnt(0) 1261; GFX803-NEXT: s_setpc_b64 s[30:31] 1262; 1263; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reghi_vreg: 1264; GFX900-FLATSCR: ; %bb.0: ; %entry 1265; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1266; GFX900-FLATSCR-NEXT: scratch_load_ushort v1, off, s32 offset:4094 1267; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1268; GFX900-FLATSCR-NEXT: v_and_b32_e32 v1, 0xffff, v1 1269; GFX900-FLATSCR-NEXT: v_lshl_or_b32 v0, v0, 16, v1 1270; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 1271; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1272; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1273entry: 1274 %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2047 1275 %load = load i16, i16 addrspace(5)* %gep 1276 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 1277 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0 1278 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1279 ret void 1280} 1281 1282define void @load_private_lo_v2f16_reglo_vreg(half addrspace(5)* byval(half) %in, i32 %reg) #0 { 1283; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg: 1284; GFX900-MUBUF: ; %bb.0: ; %entry 1285; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1286; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094 1287; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1288; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1289; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1290; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1291; 1292; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg: 1293; GFX906: ; %bb.0: ; %entry 1294; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1295; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1296; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1297; GFX906-NEXT: s_waitcnt vmcnt(0) 1298; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1 1299; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1 1300; GFX906-NEXT: global_store_dword v[0:1], v0, off 1301; GFX906-NEXT: s_waitcnt vmcnt(0) 1302; GFX906-NEXT: s_setpc_b64 s[30:31] 1303; 1304; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg: 1305; GFX803: ; %bb.0: ; %entry 1306; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1307; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1308; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 1309; GFX803-NEXT: s_waitcnt vmcnt(0) 1310; GFX803-NEXT: v_or_b32_e32 v0, v1, v0 1311; GFX803-NEXT: flat_store_dword v[0:1], v0 1312; GFX803-NEXT: s_waitcnt vmcnt(0) 1313; GFX803-NEXT: s_setpc_b64 s[30:31] 1314; 1315; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg: 1316; GFX900-FLATSCR: ; %bb.0: ; %entry 1317; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1318; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, s32 offset:4094 1319; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1320; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 1321; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1322; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1323entry: 1324 %reg.bc = bitcast i32 %reg to <2 x half> 1325 %gep = getelementptr inbounds half, half addrspace(5)* %in, i64 2047 1326 %load = load half, half addrspace(5)* %gep 1327 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 1328 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1329 ret void 1330} 1331 1332define void @load_private_lo_v2i16_reglo_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 { 1333; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff: 1334; GFX900-MUBUF: ; %bb.0: ; %entry 1335; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1336; GFX900-MUBUF-NEXT: buffer_load_short_d16 v1, off, s[0:3], 0 offset:4094 glc 1337; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1338; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off 1339; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1340; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1341; 1342; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff: 1343; GFX906: ; %bb.0: ; %entry 1344; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1345; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc 1346; GFX906-NEXT: s_waitcnt vmcnt(0) 1347; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1348; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 1349; GFX906-NEXT: global_store_dword v[0:1], v0, off 1350; GFX906-NEXT: s_waitcnt vmcnt(0) 1351; GFX906-NEXT: s_setpc_b64 s[30:31] 1352; 1353; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff: 1354; GFX803: ; %bb.0: ; %entry 1355; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1356; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc 1357; GFX803-NEXT: s_waitcnt vmcnt(0) 1358; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 1359; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1360; GFX803-NEXT: flat_store_dword v[0:1], v0 1361; GFX803-NEXT: s_waitcnt vmcnt(0) 1362; GFX803-NEXT: s_setpc_b64 s[30:31] 1363; 1364; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_nooff: 1365; GFX900-FLATSCR: ; %bb.0: ; %entry 1366; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1367; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe 1368; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v1, off, s0 glc 1369; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1370; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off 1371; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1372; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1373entry: 1374 %reg.bc = bitcast i32 %reg to <2 x i16> 1375 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*) 1376 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 1377 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1378 ret void 1379} 1380 1381define void @load_private_lo_v2i16_reghi_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 { 1382; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reghi_vreg_nooff: 1383; GFX900-MUBUF: ; %bb.0: ; %entry 1384; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1385; GFX900-MUBUF-NEXT: buffer_load_short_d16 v1, off, s[0:3], 0 offset:4094 glc 1386; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1387; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off 1388; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1389; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1390; 1391; GFX906-LABEL: load_private_lo_v2i16_reghi_vreg_nooff: 1392; GFX906: ; %bb.0: ; %entry 1393; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1394; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc 1395; GFX906-NEXT: s_waitcnt vmcnt(0) 1396; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1397; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 1398; GFX906-NEXT: global_store_dword v[0:1], v0, off 1399; GFX906-NEXT: s_waitcnt vmcnt(0) 1400; GFX906-NEXT: s_setpc_b64 s[30:31] 1401; 1402; GFX803-LABEL: load_private_lo_v2i16_reghi_vreg_nooff: 1403; GFX803: ; %bb.0: ; %entry 1404; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1405; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc 1406; GFX803-NEXT: s_waitcnt vmcnt(0) 1407; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 1408; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1409; GFX803-NEXT: flat_store_dword v[0:1], v0 1410; GFX803-NEXT: s_waitcnt vmcnt(0) 1411; GFX803-NEXT: s_setpc_b64 s[30:31] 1412; 1413; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reghi_vreg_nooff: 1414; GFX900-FLATSCR: ; %bb.0: ; %entry 1415; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1416; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe 1417; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v1, off, s0 glc 1418; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1419; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off 1420; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1421; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1422entry: 1423 %reg.bc = bitcast i32 %reg to <2 x i16> 1424 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*) 1425 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 1426 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1427 ret void 1428} 1429 1430define void @load_private_lo_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, i32 %reg) #0 { 1431; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_nooff: 1432; GFX900-MUBUF: ; %bb.0: ; %entry 1433; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1434; GFX900-MUBUF-NEXT: buffer_load_short_d16 v1, off, s[0:3], 0 offset:4094 glc 1435; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1436; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off 1437; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1438; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1439; 1440; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_nooff: 1441; GFX906: ; %bb.0: ; %entry 1442; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1443; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc 1444; GFX906-NEXT: s_waitcnt vmcnt(0) 1445; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1446; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1447; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1448; GFX906-NEXT: global_store_dword v[0:1], v0, off 1449; GFX906-NEXT: s_waitcnt vmcnt(0) 1450; GFX906-NEXT: s_setpc_b64 s[30:31] 1451; 1452; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_nooff: 1453; GFX803: ; %bb.0: ; %entry 1454; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1455; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc 1456; GFX803-NEXT: s_waitcnt vmcnt(0) 1457; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 1458; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1459; GFX803-NEXT: flat_store_dword v[0:1], v0 1460; GFX803-NEXT: s_waitcnt vmcnt(0) 1461; GFX803-NEXT: s_setpc_b64 s[30:31] 1462; 1463; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_nooff: 1464; GFX900-FLATSCR: ; %bb.0: ; %entry 1465; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1466; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe 1467; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v1, off, s0 glc 1468; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1469; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off 1470; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1471; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1472entry: 1473 %reg.bc = bitcast i32 %reg to <2 x half> 1474 %load = load volatile half, half addrspace(5)* inttoptr (i32 4094 to half addrspace(5)*) 1475 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 1476 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1477 ret void 1478} 1479 1480define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* byval(i8) %in, i32 %reg) #0 { 1481; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8: 1482; GFX900-MUBUF: ; %bb.0: ; %entry 1483; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1484; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095 1485; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1486; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1487; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1488; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1489; 1490; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8: 1491; GFX906: ; %bb.0: ; %entry 1492; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1493; GFX906-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 1494; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1495; GFX906-NEXT: s_waitcnt vmcnt(0) 1496; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0 1497; GFX906-NEXT: global_store_dword v[0:1], v0, off 1498; GFX906-NEXT: s_waitcnt vmcnt(0) 1499; GFX906-NEXT: s_setpc_b64 s[30:31] 1500; 1501; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8: 1502; GFX803: ; %bb.0: ; %entry 1503; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1504; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 1505; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1506; GFX803-NEXT: s_mov_b32 s4, 0x5040c00 1507; GFX803-NEXT: s_waitcnt vmcnt(0) 1508; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4 1509; GFX803-NEXT: flat_store_dword v[0:1], v0 1510; GFX803-NEXT: s_waitcnt vmcnt(0) 1511; GFX803-NEXT: s_setpc_b64 s[30:31] 1512; 1513; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8: 1514; GFX900-FLATSCR: ; %bb.0: ; %entry 1515; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1516; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s32 offset:4095 1517; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1518; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 1519; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1520; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1521entry: 1522 %reg.bc = bitcast i32 %reg to <2 x i16> 1523 %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4095 1524 %load = load i8, i8 addrspace(5)* %gep 1525 %ext = zext i8 %load to i16 1526 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 1527 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1528 ret void 1529} 1530 1531define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* byval(i8) %in, i32 %reg) #0 { 1532; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8: 1533; GFX900-MUBUF: ; %bb.0: ; %entry 1534; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1535; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095 1536; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1537; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1538; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1539; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1540; 1541; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8: 1542; GFX906: ; %bb.0: ; %entry 1543; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1544; GFX906-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 1545; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1546; GFX906-NEXT: s_waitcnt vmcnt(0) 1547; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0 1548; GFX906-NEXT: global_store_dword v[0:1], v0, off 1549; GFX906-NEXT: s_waitcnt vmcnt(0) 1550; GFX906-NEXT: s_setpc_b64 s[30:31] 1551; 1552; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8: 1553; GFX803: ; %bb.0: ; %entry 1554; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1555; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 1556; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 1557; GFX803-NEXT: s_waitcnt vmcnt(0) 1558; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1559; GFX803-NEXT: flat_store_dword v[0:1], v0 1560; GFX803-NEXT: s_waitcnt vmcnt(0) 1561; GFX803-NEXT: s_setpc_b64 s[30:31] 1562; 1563; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8: 1564; GFX900-FLATSCR: ; %bb.0: ; %entry 1565; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1566; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s32 offset:4095 1567; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1568; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 1569; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1570; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1571entry: 1572 %reg.bc = bitcast i32 %reg to <2 x i16> 1573 %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4095 1574 %load = load i8, i8 addrspace(5)* %gep 1575 %ext = sext i8 %load to i16 1576 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 1577 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1578 ret void 1579} 1580 1581define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 { 1582; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8: 1583; GFX900-MUBUF: ; %bb.0: ; %entry 1584; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1585; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], 0 offset:4094 glc 1586; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1587; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off 1588; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1589; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1590; 1591; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8: 1592; GFX906: ; %bb.0: ; %entry 1593; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1594; GFX906-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4094 glc 1595; GFX906-NEXT: s_waitcnt vmcnt(0) 1596; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1597; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 1598; GFX906-NEXT: global_store_dword v[0:1], v0, off 1599; GFX906-NEXT: s_waitcnt vmcnt(0) 1600; GFX906-NEXT: s_setpc_b64 s[30:31] 1601; 1602; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8: 1603; GFX803: ; %bb.0: ; %entry 1604; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1605; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v1 1606; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:4094 glc 1607; GFX803-NEXT: s_waitcnt vmcnt(0) 1608; GFX803-NEXT: s_mov_b32 s4, 0x5040c00 1609; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4 1610; GFX803-NEXT: flat_store_dword v[0:1], v0 1611; GFX803-NEXT: s_waitcnt vmcnt(0) 1612; GFX803-NEXT: s_setpc_b64 s[30:31] 1613; 1614; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8: 1615; GFX900-FLATSCR: ; %bb.0: ; %entry 1616; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1617; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe 1618; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v1, off, s0 glc 1619; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1620; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off 1621; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1622; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1623entry: 1624 %reg.bc = bitcast i32 %reg to <2 x i16> 1625 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) 1626 %ext = zext i8 %load to i16 1627 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 1628 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1629 ret void 1630} 1631 1632define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i32 %reg) #0 { 1633; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8: 1634; GFX900-MUBUF: ; %bb.0: ; %entry 1635; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1636; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v1, off, s[0:3], 0 offset:4094 glc 1637; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1638; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off 1639; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1640; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1641; 1642; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8: 1643; GFX906: ; %bb.0: ; %entry 1644; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1645; GFX906-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 offset:4094 glc 1646; GFX906-NEXT: s_waitcnt vmcnt(0) 1647; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1648; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 1649; GFX906-NEXT: global_store_dword v[0:1], v0, off 1650; GFX906-NEXT: s_waitcnt vmcnt(0) 1651; GFX906-NEXT: s_setpc_b64 s[30:31] 1652; 1653; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8: 1654; GFX803: ; %bb.0: ; %entry 1655; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1656; GFX803-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 offset:4094 glc 1657; GFX803-NEXT: s_waitcnt vmcnt(0) 1658; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 1659; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1660; GFX803-NEXT: flat_store_dword v[0:1], v0 1661; GFX803-NEXT: s_waitcnt vmcnt(0) 1662; GFX803-NEXT: s_setpc_b64 s[30:31] 1663; 1664; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8: 1665; GFX900-FLATSCR: ; %bb.0: ; %entry 1666; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1667; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe 1668; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v1, off, s0 glc 1669; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1670; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off 1671; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1672; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1673entry: 1674 %reg.bc = bitcast i32 %reg to <2 x i16> 1675 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) 1676 %ext = sext i8 %load to i16 1677 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 1678 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1679 ret void 1680} 1681 1682define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 { 1683; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8: 1684; GFX900-MUBUF: ; %bb.0: ; %entry 1685; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1686; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], 0 offset:4094 glc 1687; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1688; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off 1689; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1690; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1691; 1692; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8: 1693; GFX906: ; %bb.0: ; %entry 1694; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1695; GFX906-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4094 glc 1696; GFX906-NEXT: s_waitcnt vmcnt(0) 1697; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1698; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1699; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1700; GFX906-NEXT: global_store_dword v[0:1], v0, off 1701; GFX906-NEXT: s_waitcnt vmcnt(0) 1702; GFX906-NEXT: s_setpc_b64 s[30:31] 1703; 1704; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8: 1705; GFX803: ; %bb.0: ; %entry 1706; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1707; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v1 1708; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:4094 glc 1709; GFX803-NEXT: s_waitcnt vmcnt(0) 1710; GFX803-NEXT: s_mov_b32 s4, 0x5040c00 1711; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4 1712; GFX803-NEXT: flat_store_dword v[0:1], v0 1713; GFX803-NEXT: s_waitcnt vmcnt(0) 1714; GFX803-NEXT: s_setpc_b64 s[30:31] 1715; 1716; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8: 1717; GFX900-FLATSCR: ; %bb.0: ; %entry 1718; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1719; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe 1720; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v1, off, s0 glc 1721; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1722; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off 1723; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1724; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1725entry: 1726 %reg.bc = bitcast i32 %reg to <2 x half> 1727 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) 1728 %ext = zext i8 %load to i16 1729 %bc.ext = bitcast i16 %ext to half 1730 %build1 = insertelement <2 x half> %reg.bc, half %bc.ext, i32 0 1731 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1732 ret void 1733} 1734 1735define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(4)* %in, i32 %reg) #0 { 1736; GFX900-LABEL: load_constant_lo_v2i16_reglo_vreg: 1737; GFX900: ; %bb.0: ; %entry 1738; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1739; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 1740; GFX900-NEXT: s_waitcnt vmcnt(0) 1741; GFX900-NEXT: global_store_dword v[0:1], v2, off 1742; GFX900-NEXT: s_waitcnt vmcnt(0) 1743; GFX900-NEXT: s_setpc_b64 s[30:31] 1744; 1745; GFX906-LABEL: load_constant_lo_v2i16_reglo_vreg: 1746; GFX906: ; %bb.0: ; %entry 1747; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1748; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094 1749; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 1750; GFX906-NEXT: s_waitcnt vmcnt(0) 1751; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 1752; GFX906-NEXT: global_store_dword v[0:1], v0, off 1753; GFX906-NEXT: s_waitcnt vmcnt(0) 1754; GFX906-NEXT: s_setpc_b64 s[30:31] 1755; 1756; GFX803-LABEL: load_constant_lo_v2i16_reglo_vreg: 1757; GFX803: ; %bb.0: ; %entry 1758; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1759; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0 1760; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 1761; GFX803-NEXT: flat_load_ushort v0, v[0:1] 1762; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 1763; GFX803-NEXT: s_waitcnt vmcnt(0) 1764; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1765; GFX803-NEXT: flat_store_dword v[0:1], v0 1766; GFX803-NEXT: s_waitcnt vmcnt(0) 1767; GFX803-NEXT: s_setpc_b64 s[30:31] 1768entry: 1769 %reg.bc = bitcast i32 %reg to <2 x i16> 1770 %gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 -2047 1771 %load = load i16, i16 addrspace(4)* %gep 1772 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 1773 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1774 ret void 1775} 1776 1777define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(4)* %in, i32 %reg) #0 { 1778; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg: 1779; GFX900: ; %bb.0: ; %entry 1780; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1781; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 1782; GFX900-NEXT: s_waitcnt vmcnt(0) 1783; GFX900-NEXT: global_store_dword v[0:1], v2, off 1784; GFX900-NEXT: s_waitcnt vmcnt(0) 1785; GFX900-NEXT: s_setpc_b64 s[30:31] 1786; 1787; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg: 1788; GFX906: ; %bb.0: ; %entry 1789; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1790; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094 1791; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 1792; GFX906-NEXT: s_waitcnt vmcnt(0) 1793; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1794; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1795; GFX906-NEXT: global_store_dword v[0:1], v0, off 1796; GFX906-NEXT: s_waitcnt vmcnt(0) 1797; GFX906-NEXT: s_setpc_b64 s[30:31] 1798; 1799; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg: 1800; GFX803: ; %bb.0: ; %entry 1801; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1802; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0 1803; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 1804; GFX803-NEXT: flat_load_ushort v0, v[0:1] 1805; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 1806; GFX803-NEXT: s_waitcnt vmcnt(0) 1807; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1808; GFX803-NEXT: flat_store_dword v[0:1], v0 1809; GFX803-NEXT: s_waitcnt vmcnt(0) 1810; GFX803-NEXT: s_setpc_b64 s[30:31] 1811entry: 1812 %reg.bc = bitcast i32 %reg to <2 x half> 1813 %gep = getelementptr inbounds half, half addrspace(4)* %in, i64 -2047 1814 %load = load half, half addrspace(4)* %gep 1815 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 1816 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1817 ret void 1818} 1819 1820define void @load_constant_lo_v2f16_reglo_vreg_zexti8(i8 addrspace(4)* %in, i32 %reg) #0 { 1821; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8: 1822; GFX900: ; %bb.0: ; %entry 1823; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1824; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095 1825; GFX900-NEXT: s_waitcnt vmcnt(0) 1826; GFX900-NEXT: global_store_dword v[0:1], v2, off 1827; GFX900-NEXT: s_waitcnt vmcnt(0) 1828; GFX900-NEXT: s_setpc_b64 s[30:31] 1829; 1830; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8: 1831; GFX906: ; %bb.0: ; %entry 1832; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1833; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095 1834; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 1835; GFX906-NEXT: s_waitcnt vmcnt(0) 1836; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1837; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1838; GFX906-NEXT: global_store_dword v[0:1], v0, off 1839; GFX906-NEXT: s_waitcnt vmcnt(0) 1840; GFX906-NEXT: s_setpc_b64 s[30:31] 1841; 1842; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8: 1843; GFX803: ; %bb.0: ; %entry 1844; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1845; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0 1846; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 1847; GFX803-NEXT: flat_load_ubyte v0, v[0:1] 1848; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2 1849; GFX803-NEXT: s_mov_b32 s4, 0x5040c00 1850; GFX803-NEXT: s_waitcnt vmcnt(0) 1851; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4 1852; GFX803-NEXT: flat_store_dword v[0:1], v0 1853; GFX803-NEXT: s_waitcnt vmcnt(0) 1854; GFX803-NEXT: s_setpc_b64 s[30:31] 1855entry: 1856 %reg.bc = bitcast i32 %reg to <2 x half> 1857 %gep = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 -4095 1858 %load = load i8, i8 addrspace(4)* %gep 1859 %ext = zext i8 %load to i16 1860 %bitcast = bitcast i16 %ext to half 1861 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 1862 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1863 ret void 1864} 1865 1866define void @load_constant_lo_v2f16_reglo_vreg_sexti8(i8 addrspace(4)* %in, i32 %reg) #0 { 1867; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8: 1868; GFX900: ; %bb.0: ; %entry 1869; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1870; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095 1871; GFX900-NEXT: s_waitcnt vmcnt(0) 1872; GFX900-NEXT: global_store_dword v[0:1], v2, off 1873; GFX900-NEXT: s_waitcnt vmcnt(0) 1874; GFX900-NEXT: s_setpc_b64 s[30:31] 1875; 1876; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8: 1877; GFX906: ; %bb.0: ; %entry 1878; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1879; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095 1880; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 1881; GFX906-NEXT: s_waitcnt vmcnt(0) 1882; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1883; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1884; GFX906-NEXT: global_store_dword v[0:1], v0, off 1885; GFX906-NEXT: s_waitcnt vmcnt(0) 1886; GFX906-NEXT: s_setpc_b64 s[30:31] 1887; 1888; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8: 1889; GFX803: ; %bb.0: ; %entry 1890; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1891; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0 1892; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 1893; GFX803-NEXT: flat_load_sbyte v0, v[0:1] 1894; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 1895; GFX803-NEXT: s_waitcnt vmcnt(0) 1896; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1897; GFX803-NEXT: flat_store_dword v[0:1], v0 1898; GFX803-NEXT: s_waitcnt vmcnt(0) 1899; GFX803-NEXT: s_setpc_b64 s[30:31] 1900entry: 1901 %reg.bc = bitcast i32 %reg to <2 x half> 1902 %gep = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 -4095 1903 %load = load i8, i8 addrspace(4)* %gep 1904 %ext = sext i8 %load to i16 1905 %bitcast = bitcast i16 %ext to half 1906 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 1907 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1908 ret void 1909} 1910 1911define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 { 1912; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset: 1913; GFX900-MUBUF: ; %bb.0: ; %entry 1914; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1915; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b 1916; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 1917; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1918; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094 glc 1919; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1920; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1921; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1922; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1923; 1924; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset: 1925; GFX906: ; %bb.0: ; %entry 1926; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1927; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b 1928; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 1929; GFX906-NEXT: s_waitcnt vmcnt(0) 1930; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 glc 1931; GFX906-NEXT: s_waitcnt vmcnt(0) 1932; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1933; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0 1934; GFX906-NEXT: global_store_dword v[0:1], v0, off 1935; GFX906-NEXT: s_waitcnt vmcnt(0) 1936; GFX906-NEXT: s_setpc_b64 s[30:31] 1937; 1938; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset: 1939; GFX803: ; %bb.0: ; %entry 1940; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1941; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b 1942; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 1943; GFX803-NEXT: s_waitcnt vmcnt(0) 1944; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 glc 1945; GFX803-NEXT: s_waitcnt vmcnt(0) 1946; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 1947; GFX803-NEXT: v_or_b32_e32 v0, v1, v0 1948; GFX803-NEXT: flat_store_dword v[0:1], v0 1949; GFX803-NEXT: s_waitcnt vmcnt(0) 1950; GFX803-NEXT: s_setpc_b64 s[30:31] 1951; 1952; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset: 1953; GFX900-FLATSCR: ; %bb.0: ; %entry 1954; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1955; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b 1956; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 1957; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1958; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, s32 offset:4094 glc 1959; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1960; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 1961; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1962; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1963entry: 1964 %obj0 = alloca [10 x i32], align 4, addrspace(5) 1965 %obj1 = alloca [4096 x i16], align 2, addrspace(5) 1966 %reg.bc = bitcast i32 %reg to <2 x i16> 1967 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 1968 store volatile i32 123, i32 addrspace(5)* %bc 1969 %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2027 1970 %load = load volatile i16, i16 addrspace(5)* %gep 1971 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 1972 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1973 ret void 1974} 1975 1976define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 { 1977; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset: 1978; GFX900-MUBUF: ; %bb.0: ; %entry 1979; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1980; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b 1981; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 1982; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1983; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095 glc 1984; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1985; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1986; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1987; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1988; 1989; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset: 1990; GFX906: ; %bb.0: ; %entry 1991; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1992; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b 1993; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 1994; GFX906-NEXT: s_waitcnt vmcnt(0) 1995; GFX906-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 glc 1996; GFX906-NEXT: s_waitcnt vmcnt(0) 1997; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1998; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0 1999; GFX906-NEXT: global_store_dword v[0:1], v0, off 2000; GFX906-NEXT: s_waitcnt vmcnt(0) 2001; GFX906-NEXT: s_setpc_b64 s[30:31] 2002; 2003; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset: 2004; GFX803: ; %bb.0: ; %entry 2005; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2006; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b 2007; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 2008; GFX803-NEXT: s_waitcnt vmcnt(0) 2009; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 glc 2010; GFX803-NEXT: s_waitcnt vmcnt(0) 2011; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 2012; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2013; GFX803-NEXT: flat_store_dword v[0:1], v0 2014; GFX803-NEXT: s_waitcnt vmcnt(0) 2015; GFX803-NEXT: s_setpc_b64 s[30:31] 2016; 2017; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset: 2018; GFX900-FLATSCR: ; %bb.0: ; %entry 2019; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2020; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b 2021; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 2022; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2023; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s32 offset:4095 glc 2024; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2025; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 2026; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2027; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 2028entry: 2029 %obj0 = alloca [10 x i32], align 4, addrspace(5) 2030 %obj1 = alloca [4096 x i8], align 2, addrspace(5) 2031 %reg.bc = bitcast i32 %reg to <2 x i16> 2032 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 2033 store volatile i32 123, i32 addrspace(5)* %bc 2034 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055 2035 %load = load volatile i8, i8 addrspace(5)* %gep 2036 %load.ext = sext i8 %load to i16 2037 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0 2038 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 2039 ret void 2040} 2041 2042define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 { 2043; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset: 2044; GFX900-MUBUF: ; %bb.0: ; %entry 2045; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2046; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b 2047; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 2048; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2049; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095 glc 2050; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2051; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 2052; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2053; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 2054; 2055; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset: 2056; GFX906: ; %bb.0: ; %entry 2057; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2058; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b 2059; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 2060; GFX906-NEXT: s_waitcnt vmcnt(0) 2061; GFX906-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 glc 2062; GFX906-NEXT: s_waitcnt vmcnt(0) 2063; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 2064; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0 2065; GFX906-NEXT: global_store_dword v[0:1], v0, off 2066; GFX906-NEXT: s_waitcnt vmcnt(0) 2067; GFX906-NEXT: s_setpc_b64 s[30:31] 2068; 2069; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset: 2070; GFX803: ; %bb.0: ; %entry 2071; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2072; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b 2073; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 2074; GFX803-NEXT: s_waitcnt vmcnt(0) 2075; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 glc 2076; GFX803-NEXT: s_waitcnt vmcnt(0) 2077; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2078; GFX803-NEXT: s_mov_b32 s4, 0x5040c00 2079; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4 2080; GFX803-NEXT: flat_store_dword v[0:1], v0 2081; GFX803-NEXT: s_waitcnt vmcnt(0) 2082; GFX803-NEXT: s_setpc_b64 s[30:31] 2083; 2084; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset: 2085; GFX900-FLATSCR: ; %bb.0: ; %entry 2086; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2087; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b 2088; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 2089; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2090; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s32 offset:4095 glc 2091; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2092; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 2093; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2094; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 2095entry: 2096 %obj0 = alloca [10 x i32], align 4, addrspace(5) 2097 %obj1 = alloca [4096 x i8], align 2, addrspace(5) 2098 %reg.bc = bitcast i32 %reg to <2 x i16> 2099 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 2100 store volatile i32 123, i32 addrspace(5)* %bc 2101 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055 2102 %load = load volatile i8, i8 addrspace(5)* %gep 2103 %load.ext = zext i8 %load to i16 2104 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0 2105 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 2106 ret void 2107} 2108 2109define void @load_private_lo_v2f16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 { 2110; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset: 2111; GFX900-MUBUF: ; %bb.0: ; %entry 2112; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2113; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b 2114; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 2115; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2116; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095 glc 2117; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2118; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 2119; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2120; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 2121; 2122; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset: 2123; GFX906: ; %bb.0: ; %entry 2124; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2125; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b 2126; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 2127; GFX906-NEXT: s_waitcnt vmcnt(0) 2128; GFX906-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 glc 2129; GFX906-NEXT: s_waitcnt vmcnt(0) 2130; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2131; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1 2132; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1 2133; GFX906-NEXT: global_store_dword v[0:1], v0, off 2134; GFX906-NEXT: s_waitcnt vmcnt(0) 2135; GFX906-NEXT: s_setpc_b64 s[30:31] 2136; 2137; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset: 2138; GFX803: ; %bb.0: ; %entry 2139; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2140; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b 2141; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 2142; GFX803-NEXT: s_waitcnt vmcnt(0) 2143; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 glc 2144; GFX803-NEXT: s_waitcnt vmcnt(0) 2145; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 2146; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2147; GFX803-NEXT: flat_store_dword v[0:1], v0 2148; GFX803-NEXT: s_waitcnt vmcnt(0) 2149; GFX803-NEXT: s_setpc_b64 s[30:31] 2150; 2151; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset: 2152; GFX900-FLATSCR: ; %bb.0: ; %entry 2153; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2154; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b 2155; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 2156; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2157; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s32 offset:4095 glc 2158; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2159; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 2160; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2161; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 2162entry: 2163 %obj0 = alloca [10 x i32], align 4, addrspace(5) 2164 %obj1 = alloca [4096 x i8], align 2, addrspace(5) 2165 %reg.bc = bitcast i32 %reg to <2 x half> 2166 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 2167 store volatile i32 123, i32 addrspace(5)* %bc 2168 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055 2169 %load = load volatile i8, i8 addrspace(5)* %gep 2170 %load.ext = sext i8 %load to i16 2171 %bitcast = bitcast i16 %load.ext to half 2172 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 2173 store <2 x half> %build1, <2 x half> addrspace(1)* undef 2174 ret void 2175} 2176 2177define void @load_private_lo_v2f16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 { 2178; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset: 2179; GFX900-MUBUF: ; %bb.0: ; %entry 2180; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2181; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b 2182; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 2183; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2184; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095 glc 2185; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2186; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 2187; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2188; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 2189; 2190; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset: 2191; GFX906: ; %bb.0: ; %entry 2192; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2193; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b 2194; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 2195; GFX906-NEXT: s_waitcnt vmcnt(0) 2196; GFX906-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 glc 2197; GFX906-NEXT: s_waitcnt vmcnt(0) 2198; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2199; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1 2200; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1 2201; GFX906-NEXT: global_store_dword v[0:1], v0, off 2202; GFX906-NEXT: s_waitcnt vmcnt(0) 2203; GFX906-NEXT: s_setpc_b64 s[30:31] 2204; 2205; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset: 2206; GFX803: ; %bb.0: ; %entry 2207; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2208; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b 2209; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 2210; GFX803-NEXT: s_waitcnt vmcnt(0) 2211; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 glc 2212; GFX803-NEXT: s_waitcnt vmcnt(0) 2213; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2214; GFX803-NEXT: s_mov_b32 s4, 0x5040c00 2215; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4 2216; GFX803-NEXT: flat_store_dword v[0:1], v0 2217; GFX803-NEXT: s_waitcnt vmcnt(0) 2218; GFX803-NEXT: s_setpc_b64 s[30:31] 2219; 2220; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset: 2221; GFX900-FLATSCR: ; %bb.0: ; %entry 2222; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2223; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b 2224; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 2225; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2226; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s32 offset:4095 glc 2227; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2228; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 2229; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2230; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 2231entry: 2232 %obj0 = alloca [10 x i32], align 4, addrspace(5) 2233 %obj1 = alloca [4096 x i8], align 2, addrspace(5) 2234 %reg.bc = bitcast i32 %reg to <2 x half> 2235 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 2236 store volatile i32 123, i32 addrspace(5)* %bc 2237 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055 2238 %load = load volatile i8, i8 addrspace(5)* %gep 2239 %load.ext = zext i8 %load to i16 2240 %bitcast = bitcast i16 %load.ext to half 2241 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 2242 store <2 x half> %build1, <2 x half> addrspace(1)* undef 2243 ret void 2244} 2245 2246attributes #0 = { nounwind } 2247