1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-UNPACKED %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-PACKED %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 6 7define amdgpu_ps half @load_1d_f16_x(<8 x i32> inreg %rsrc, i32 %s) { 8; GFX8-UNPACKED-LABEL: load_1d_f16_x: 9; GFX8-UNPACKED: ; %bb.0: 10; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 11; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 12; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 13; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 14; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 15; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 16; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 17; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 18; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16 19; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 20; GFX8-UNPACKED-NEXT: ; return to shader part epilog 21; 22; GFX8-PACKED-LABEL: load_1d_f16_x: 23; GFX8-PACKED: ; %bb.0: 24; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 25; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 26; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 27; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 28; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 29; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 30; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 31; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 32; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16 33; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 34; GFX8-PACKED-NEXT: ; return to shader part epilog 35; 36; GFX9-LABEL: load_1d_f16_x: 37; GFX9: ; %bb.0: 38; GFX9-NEXT: s_mov_b32 s0, s2 39; GFX9-NEXT: s_mov_b32 s1, s3 40; GFX9-NEXT: s_mov_b32 s2, s4 41; GFX9-NEXT: s_mov_b32 s3, s5 42; GFX9-NEXT: s_mov_b32 s4, s6 43; GFX9-NEXT: s_mov_b32 s5, s7 44; GFX9-NEXT: s_mov_b32 s6, s8 45; GFX9-NEXT: s_mov_b32 s7, s9 46; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16 47; GFX9-NEXT: s_waitcnt vmcnt(0) 48; GFX9-NEXT: ; return to shader part epilog 49; 50; GFX10-LABEL: load_1d_f16_x: 51; GFX10: ; %bb.0: 52; GFX10-NEXT: s_mov_b32 s0, s2 53; GFX10-NEXT: s_mov_b32 s1, s3 54; GFX10-NEXT: s_mov_b32 s2, s4 55; GFX10-NEXT: s_mov_b32 s3, s5 56; GFX10-NEXT: s_mov_b32 s4, s6 57; GFX10-NEXT: s_mov_b32 s5, s7 58; GFX10-NEXT: s_mov_b32 s6, s8 59; GFX10-NEXT: s_mov_b32 s7, s9 60; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm d16 61; GFX10-NEXT: s_waitcnt vmcnt(0) 62; GFX10-NEXT: ; return to shader part epilog 63 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 64 ret half %v 65} 66 67define amdgpu_ps half @load_1d_f16_y(<8 x i32> inreg %rsrc, i32 %s) { 68; GFX8-UNPACKED-LABEL: load_1d_f16_y: 69; GFX8-UNPACKED: ; %bb.0: 70; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 71; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 72; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 73; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 74; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 75; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 76; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 77; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 78; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16 79; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 80; GFX8-UNPACKED-NEXT: ; return to shader part epilog 81; 82; GFX8-PACKED-LABEL: load_1d_f16_y: 83; GFX8-PACKED: ; %bb.0: 84; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 85; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 86; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 87; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 88; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 89; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 90; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 91; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 92; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16 93; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 94; GFX8-PACKED-NEXT: ; return to shader part epilog 95; 96; GFX9-LABEL: load_1d_f16_y: 97; GFX9: ; %bb.0: 98; GFX9-NEXT: s_mov_b32 s0, s2 99; GFX9-NEXT: s_mov_b32 s1, s3 100; GFX9-NEXT: s_mov_b32 s2, s4 101; GFX9-NEXT: s_mov_b32 s3, s5 102; GFX9-NEXT: s_mov_b32 s4, s6 103; GFX9-NEXT: s_mov_b32 s5, s7 104; GFX9-NEXT: s_mov_b32 s6, s8 105; GFX9-NEXT: s_mov_b32 s7, s9 106; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16 107; GFX9-NEXT: s_waitcnt vmcnt(0) 108; GFX9-NEXT: ; return to shader part epilog 109; 110; GFX10-LABEL: load_1d_f16_y: 111; GFX10: ; %bb.0: 112; GFX10-NEXT: s_mov_b32 s0, s2 113; GFX10-NEXT: s_mov_b32 s1, s3 114; GFX10-NEXT: s_mov_b32 s2, s4 115; GFX10-NEXT: s_mov_b32 s3, s5 116; GFX10-NEXT: s_mov_b32 s4, s6 117; GFX10-NEXT: s_mov_b32 s5, s7 118; GFX10-NEXT: s_mov_b32 s6, s8 119; GFX10-NEXT: s_mov_b32 s7, s9 120; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm d16 121; GFX10-NEXT: s_waitcnt vmcnt(0) 122; GFX10-NEXT: ; return to shader part epilog 123 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 124 ret half %v 125} 126 127define amdgpu_ps half @load_1d_f16_z(<8 x i32> inreg %rsrc, i32 %s) { 128; GFX8-UNPACKED-LABEL: load_1d_f16_z: 129; GFX8-UNPACKED: ; %bb.0: 130; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 131; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 132; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 133; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 134; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 135; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 136; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 137; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 138; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16 139; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 140; GFX8-UNPACKED-NEXT: ; return to shader part epilog 141; 142; GFX8-PACKED-LABEL: load_1d_f16_z: 143; GFX8-PACKED: ; %bb.0: 144; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 145; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 146; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 147; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 148; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 149; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 150; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 151; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 152; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16 153; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 154; GFX8-PACKED-NEXT: ; return to shader part epilog 155; 156; GFX9-LABEL: load_1d_f16_z: 157; GFX9: ; %bb.0: 158; GFX9-NEXT: s_mov_b32 s0, s2 159; GFX9-NEXT: s_mov_b32 s1, s3 160; GFX9-NEXT: s_mov_b32 s2, s4 161; GFX9-NEXT: s_mov_b32 s3, s5 162; GFX9-NEXT: s_mov_b32 s4, s6 163; GFX9-NEXT: s_mov_b32 s5, s7 164; GFX9-NEXT: s_mov_b32 s6, s8 165; GFX9-NEXT: s_mov_b32 s7, s9 166; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16 167; GFX9-NEXT: s_waitcnt vmcnt(0) 168; GFX9-NEXT: ; return to shader part epilog 169; 170; GFX10-LABEL: load_1d_f16_z: 171; GFX10: ; %bb.0: 172; GFX10-NEXT: s_mov_b32 s0, s2 173; GFX10-NEXT: s_mov_b32 s1, s3 174; GFX10-NEXT: s_mov_b32 s2, s4 175; GFX10-NEXT: s_mov_b32 s3, s5 176; GFX10-NEXT: s_mov_b32 s4, s6 177; GFX10-NEXT: s_mov_b32 s5, s7 178; GFX10-NEXT: s_mov_b32 s6, s8 179; GFX10-NEXT: s_mov_b32 s7, s9 180; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm d16 181; GFX10-NEXT: s_waitcnt vmcnt(0) 182; GFX10-NEXT: ; return to shader part epilog 183 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 184 ret half %v 185} 186 187define amdgpu_ps half @load_1d_f16_w(<8 x i32> inreg %rsrc, i32 %s) { 188; GFX8-UNPACKED-LABEL: load_1d_f16_w: 189; GFX8-UNPACKED: ; %bb.0: 190; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 191; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 192; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 193; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 194; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 195; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 196; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 197; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 198; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16 199; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 200; GFX8-UNPACKED-NEXT: ; return to shader part epilog 201; 202; GFX8-PACKED-LABEL: load_1d_f16_w: 203; GFX8-PACKED: ; %bb.0: 204; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 205; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 206; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 207; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 208; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 209; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 210; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 211; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 212; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16 213; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 214; GFX8-PACKED-NEXT: ; return to shader part epilog 215; 216; GFX9-LABEL: load_1d_f16_w: 217; GFX9: ; %bb.0: 218; GFX9-NEXT: s_mov_b32 s0, s2 219; GFX9-NEXT: s_mov_b32 s1, s3 220; GFX9-NEXT: s_mov_b32 s2, s4 221; GFX9-NEXT: s_mov_b32 s3, s5 222; GFX9-NEXT: s_mov_b32 s4, s6 223; GFX9-NEXT: s_mov_b32 s5, s7 224; GFX9-NEXT: s_mov_b32 s6, s8 225; GFX9-NEXT: s_mov_b32 s7, s9 226; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16 227; GFX9-NEXT: s_waitcnt vmcnt(0) 228; GFX9-NEXT: ; return to shader part epilog 229; 230; GFX10-LABEL: load_1d_f16_w: 231; GFX10: ; %bb.0: 232; GFX10-NEXT: s_mov_b32 s0, s2 233; GFX10-NEXT: s_mov_b32 s1, s3 234; GFX10-NEXT: s_mov_b32 s2, s4 235; GFX10-NEXT: s_mov_b32 s3, s5 236; GFX10-NEXT: s_mov_b32 s4, s6 237; GFX10-NEXT: s_mov_b32 s5, s7 238; GFX10-NEXT: s_mov_b32 s6, s8 239; GFX10-NEXT: s_mov_b32 s7, s9 240; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm d16 241; GFX10-NEXT: s_waitcnt vmcnt(0) 242; GFX10-NEXT: ; return to shader part epilog 243 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 244 ret half %v 245} 246 247define amdgpu_ps <2 x half> @load_1d_v2f16_xy(<8 x i32> inreg %rsrc, i32 %s) { 248; GFX8-UNPACKED-LABEL: load_1d_v2f16_xy: 249; GFX8-UNPACKED: ; %bb.0: 250; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 251; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 252; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 253; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 254; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 255; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 256; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 257; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 258; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm d16 259; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 260; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 261; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 262; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 263; GFX8-UNPACKED-NEXT: ; return to shader part epilog 264; 265; GFX8-PACKED-LABEL: load_1d_v2f16_xy: 266; GFX8-PACKED: ; %bb.0: 267; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 268; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 269; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 270; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 271; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 272; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 273; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 274; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 275; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x3 unorm d16 276; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 277; GFX8-PACKED-NEXT: ; return to shader part epilog 278; 279; GFX9-LABEL: load_1d_v2f16_xy: 280; GFX9: ; %bb.0: 281; GFX9-NEXT: s_mov_b32 s0, s2 282; GFX9-NEXT: s_mov_b32 s1, s3 283; GFX9-NEXT: s_mov_b32 s2, s4 284; GFX9-NEXT: s_mov_b32 s3, s5 285; GFX9-NEXT: s_mov_b32 s4, s6 286; GFX9-NEXT: s_mov_b32 s5, s7 287; GFX9-NEXT: s_mov_b32 s6, s8 288; GFX9-NEXT: s_mov_b32 s7, s9 289; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x3 unorm d16 290; GFX9-NEXT: s_waitcnt vmcnt(0) 291; GFX9-NEXT: ; return to shader part epilog 292; 293; GFX10-LABEL: load_1d_v2f16_xy: 294; GFX10: ; %bb.0: 295; GFX10-NEXT: s_mov_b32 s0, s2 296; GFX10-NEXT: s_mov_b32 s1, s3 297; GFX10-NEXT: s_mov_b32 s2, s4 298; GFX10-NEXT: s_mov_b32 s3, s5 299; GFX10-NEXT: s_mov_b32 s4, s6 300; GFX10-NEXT: s_mov_b32 s5, s7 301; GFX10-NEXT: s_mov_b32 s6, s8 302; GFX10-NEXT: s_mov_b32 s7, s9 303; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm d16 304; GFX10-NEXT: s_waitcnt vmcnt(0) 305; GFX10-NEXT: ; return to shader part epilog 306 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 307 ret <2 x half> %v 308} 309 310define amdgpu_ps <2 x half> @load_1d_v2f16_xz(<8 x i32> inreg %rsrc, i32 %s) { 311; GFX8-UNPACKED-LABEL: load_1d_v2f16_xz: 312; GFX8-UNPACKED: ; %bb.0: 313; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 314; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 315; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 316; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 317; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 318; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 319; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 320; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 321; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 unorm d16 322; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 323; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 324; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 325; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 326; GFX8-UNPACKED-NEXT: ; return to shader part epilog 327; 328; GFX8-PACKED-LABEL: load_1d_v2f16_xz: 329; GFX8-PACKED: ; %bb.0: 330; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 331; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 332; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 333; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 334; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 335; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 336; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 337; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 338; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x5 unorm d16 339; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 340; GFX8-PACKED-NEXT: ; return to shader part epilog 341; 342; GFX9-LABEL: load_1d_v2f16_xz: 343; GFX9: ; %bb.0: 344; GFX9-NEXT: s_mov_b32 s0, s2 345; GFX9-NEXT: s_mov_b32 s1, s3 346; GFX9-NEXT: s_mov_b32 s2, s4 347; GFX9-NEXT: s_mov_b32 s3, s5 348; GFX9-NEXT: s_mov_b32 s4, s6 349; GFX9-NEXT: s_mov_b32 s5, s7 350; GFX9-NEXT: s_mov_b32 s6, s8 351; GFX9-NEXT: s_mov_b32 s7, s9 352; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x5 unorm d16 353; GFX9-NEXT: s_waitcnt vmcnt(0) 354; GFX9-NEXT: ; return to shader part epilog 355; 356; GFX10-LABEL: load_1d_v2f16_xz: 357; GFX10: ; %bb.0: 358; GFX10-NEXT: s_mov_b32 s0, s2 359; GFX10-NEXT: s_mov_b32 s1, s3 360; GFX10-NEXT: s_mov_b32 s2, s4 361; GFX10-NEXT: s_mov_b32 s3, s5 362; GFX10-NEXT: s_mov_b32 s4, s6 363; GFX10-NEXT: s_mov_b32 s5, s7 364; GFX10-NEXT: s_mov_b32 s6, s8 365; GFX10-NEXT: s_mov_b32 s7, s9 366; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm d16 367; GFX10-NEXT: s_waitcnt vmcnt(0) 368; GFX10-NEXT: ; return to shader part epilog 369 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 370 ret <2 x half> %v 371} 372 373define amdgpu_ps <2 x half> @load_1d_v2f16_xw(<8 x i32> inreg %rsrc, i32 %s) { 374; GFX8-UNPACKED-LABEL: load_1d_v2f16_xw: 375; GFX8-UNPACKED: ; %bb.0: 376; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 377; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 378; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 379; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 380; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 381; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 382; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 383; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 384; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm d16 385; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 386; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 387; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 388; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 389; GFX8-UNPACKED-NEXT: ; return to shader part epilog 390; 391; GFX8-PACKED-LABEL: load_1d_v2f16_xw: 392; GFX8-PACKED: ; %bb.0: 393; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 394; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 395; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 396; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 397; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 398; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 399; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 400; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 401; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x9 unorm d16 402; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 403; GFX8-PACKED-NEXT: ; return to shader part epilog 404; 405; GFX9-LABEL: load_1d_v2f16_xw: 406; GFX9: ; %bb.0: 407; GFX9-NEXT: s_mov_b32 s0, s2 408; GFX9-NEXT: s_mov_b32 s1, s3 409; GFX9-NEXT: s_mov_b32 s2, s4 410; GFX9-NEXT: s_mov_b32 s3, s5 411; GFX9-NEXT: s_mov_b32 s4, s6 412; GFX9-NEXT: s_mov_b32 s5, s7 413; GFX9-NEXT: s_mov_b32 s6, s8 414; GFX9-NEXT: s_mov_b32 s7, s9 415; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x9 unorm d16 416; GFX9-NEXT: s_waitcnt vmcnt(0) 417; GFX9-NEXT: ; return to shader part epilog 418; 419; GFX10-LABEL: load_1d_v2f16_xw: 420; GFX10: ; %bb.0: 421; GFX10-NEXT: s_mov_b32 s0, s2 422; GFX10-NEXT: s_mov_b32 s1, s3 423; GFX10-NEXT: s_mov_b32 s2, s4 424; GFX10-NEXT: s_mov_b32 s3, s5 425; GFX10-NEXT: s_mov_b32 s4, s6 426; GFX10-NEXT: s_mov_b32 s5, s7 427; GFX10-NEXT: s_mov_b32 s6, s8 428; GFX10-NEXT: s_mov_b32 s7, s9 429; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm d16 430; GFX10-NEXT: s_waitcnt vmcnt(0) 431; GFX10-NEXT: ; return to shader part epilog 432 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 433 ret <2 x half> %v 434} 435 436define amdgpu_ps <2 x half> @load_1d_v2f16_yz(<8 x i32> inreg %rsrc, i32 %s) { 437; GFX8-UNPACKED-LABEL: load_1d_v2f16_yz: 438; GFX8-UNPACKED: ; %bb.0: 439; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 440; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 441; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 442; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 443; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 444; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 445; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 446; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 447; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 unorm d16 448; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 449; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 450; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 451; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 452; GFX8-UNPACKED-NEXT: ; return to shader part epilog 453; 454; GFX8-PACKED-LABEL: load_1d_v2f16_yz: 455; GFX8-PACKED: ; %bb.0: 456; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 457; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 458; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 459; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 460; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 461; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 462; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 463; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 464; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x6 unorm d16 465; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 466; GFX8-PACKED-NEXT: ; return to shader part epilog 467; 468; GFX9-LABEL: load_1d_v2f16_yz: 469; GFX9: ; %bb.0: 470; GFX9-NEXT: s_mov_b32 s0, s2 471; GFX9-NEXT: s_mov_b32 s1, s3 472; GFX9-NEXT: s_mov_b32 s2, s4 473; GFX9-NEXT: s_mov_b32 s3, s5 474; GFX9-NEXT: s_mov_b32 s4, s6 475; GFX9-NEXT: s_mov_b32 s5, s7 476; GFX9-NEXT: s_mov_b32 s6, s8 477; GFX9-NEXT: s_mov_b32 s7, s9 478; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x6 unorm d16 479; GFX9-NEXT: s_waitcnt vmcnt(0) 480; GFX9-NEXT: ; return to shader part epilog 481; 482; GFX10-LABEL: load_1d_v2f16_yz: 483; GFX10: ; %bb.0: 484; GFX10-NEXT: s_mov_b32 s0, s2 485; GFX10-NEXT: s_mov_b32 s1, s3 486; GFX10-NEXT: s_mov_b32 s2, s4 487; GFX10-NEXT: s_mov_b32 s3, s5 488; GFX10-NEXT: s_mov_b32 s4, s6 489; GFX10-NEXT: s_mov_b32 s5, s7 490; GFX10-NEXT: s_mov_b32 s6, s8 491; GFX10-NEXT: s_mov_b32 s7, s9 492; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm d16 493; GFX10-NEXT: s_waitcnt vmcnt(0) 494; GFX10-NEXT: ; return to shader part epilog 495 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 496 ret <2 x half> %v 497} 498 499define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { 500; GFX8-UNPACKED-LABEL: load_1d_v3f16_xyz: 501; GFX8-UNPACKED: ; %bb.0: 502; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 503; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 504; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 505; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 506; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 507; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 508; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 509; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 510; GFX8-UNPACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16 511; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0xffff 512; GFX8-UNPACKED-NEXT: s_and_b32 s1, s0, s0 513; GFX8-UNPACKED-NEXT: s_lshl_b32 s1, s1, 16 514; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, s1 515; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 516; GFX8-UNPACKED-NEXT: v_and_b32_e32 v4, s0, v1 517; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 518; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v4 519; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 520; GFX8-UNPACKED-NEXT: ; return to shader part epilog 521; 522; GFX8-PACKED-LABEL: load_1d_v3f16_xyz: 523; GFX8-PACKED: ; %bb.0: 524; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 525; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 526; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 527; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 528; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 529; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 530; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 531; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 532; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 533; GFX8-PACKED-NEXT: s_mov_b32 s0, 0xffff 534; GFX8-PACKED-NEXT: s_and_b32 s0, s0, s0 535; GFX8-PACKED-NEXT: s_lshl_b32 s0, s0, 16 536; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, s0 537; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 538; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v3, 16, v0 539; GFX8-PACKED-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 540; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 541; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 542; GFX8-PACKED-NEXT: ; return to shader part epilog 543; 544; GFX9-LABEL: load_1d_v3f16_xyz: 545; GFX9: ; %bb.0: 546; GFX9-NEXT: s_mov_b32 s0, s2 547; GFX9-NEXT: s_mov_b32 s1, s3 548; GFX9-NEXT: s_mov_b32 s2, s4 549; GFX9-NEXT: s_mov_b32 s3, s5 550; GFX9-NEXT: s_mov_b32 s4, s6 551; GFX9-NEXT: s_mov_b32 s5, s7 552; GFX9-NEXT: s_mov_b32 s6, s8 553; GFX9-NEXT: s_mov_b32 s7, s9 554; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 555; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 556; GFX9-NEXT: s_lshl_b32 s0, s0, 16 557; GFX9-NEXT: s_waitcnt vmcnt(0) 558; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 559; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 560; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s0 561; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v3 562; GFX9-NEXT: ; return to shader part epilog 563; 564; GFX10-LABEL: load_1d_v3f16_xyz: 565; GFX10: ; %bb.0: 566; GFX10-NEXT: s_mov_b32 s0, s2 567; GFX10-NEXT: s_mov_b32 s1, s3 568; GFX10-NEXT: s_mov_b32 s2, s4 569; GFX10-NEXT: s_mov_b32 s3, s5 570; GFX10-NEXT: s_mov_b32 s4, s6 571; GFX10-NEXT: s_mov_b32 s5, s7 572; GFX10-NEXT: s_mov_b32 s6, s8 573; GFX10-NEXT: s_mov_b32 s7, s9 574; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff 575; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16 576; GFX10-NEXT: s_waitcnt_depctr 0xffe3 577; GFX10-NEXT: s_lshl_b32 s0, s0, 16 578; GFX10-NEXT: s_waitcnt vmcnt(0) 579; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 580; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s0 581; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 582; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v2 583; GFX10-NEXT: ; return to shader part epilog 584 %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 585 ret <3 x half> %v 586} 587 588define amdgpu_ps <4 x half> @load_1d_v4f16_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 589; GFX8-UNPACKED-LABEL: load_1d_v4f16_xyzw: 590; GFX8-UNPACKED: ; %bb.0: 591; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 592; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 593; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 594; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 595; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 596; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 597; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 598; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 599; GFX8-UNPACKED-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm d16 600; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0xffff 601; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 602; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, s0, v1 603; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, s0, v3 604; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 605; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v3, 16, v3 606; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 607; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 608; GFX8-UNPACKED-NEXT: ; return to shader part epilog 609; 610; GFX8-PACKED-LABEL: load_1d_v4f16_xyzw: 611; GFX8-PACKED: ; %bb.0: 612; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 613; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 614; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 615; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 616; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 617; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 618; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 619; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 620; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16 621; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 622; GFX8-PACKED-NEXT: ; return to shader part epilog 623; 624; GFX9-LABEL: load_1d_v4f16_xyzw: 625; GFX9: ; %bb.0: 626; GFX9-NEXT: s_mov_b32 s0, s2 627; GFX9-NEXT: s_mov_b32 s1, s3 628; GFX9-NEXT: s_mov_b32 s2, s4 629; GFX9-NEXT: s_mov_b32 s3, s5 630; GFX9-NEXT: s_mov_b32 s4, s6 631; GFX9-NEXT: s_mov_b32 s5, s7 632; GFX9-NEXT: s_mov_b32 s6, s8 633; GFX9-NEXT: s_mov_b32 s7, s9 634; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16 635; GFX9-NEXT: s_waitcnt vmcnt(0) 636; GFX9-NEXT: ; return to shader part epilog 637; 638; GFX10-LABEL: load_1d_v4f16_xyzw: 639; GFX10: ; %bb.0: 640; GFX10-NEXT: s_mov_b32 s0, s2 641; GFX10-NEXT: s_mov_b32 s1, s3 642; GFX10-NEXT: s_mov_b32 s2, s4 643; GFX10-NEXT: s_mov_b32 s3, s5 644; GFX10-NEXT: s_mov_b32 s4, s6 645; GFX10-NEXT: s_mov_b32 s5, s7 646; GFX10-NEXT: s_mov_b32 s6, s8 647; GFX10-NEXT: s_mov_b32 s7, s9 648; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16 649; GFX10-NEXT: s_waitcnt vmcnt(0) 650; GFX10-NEXT: ; return to shader part epilog 651 %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 652 ret <4 x half> %v 653} 654 655define amdgpu_ps float @load_1d_f16_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) { 656; GFX8-UNPACKED-LABEL: load_1d_f16_tfe_dmask_x: 657; GFX8-UNPACKED: ; %bb.0: 658; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0 659; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 660; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 661; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 662; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 663; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 664; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 665; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 666; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 667; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1 668; GFX8-UNPACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16 669; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 670; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v2 671; GFX8-UNPACKED-NEXT: ; return to shader part epilog 672; 673; GFX8-PACKED-LABEL: load_1d_f16_tfe_dmask_x: 674; GFX8-PACKED: ; %bb.0: 675; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0 676; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 677; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 678; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 679; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 680; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 681; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 682; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 683; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 684; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1 685; GFX8-PACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16 686; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 687; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2 688; GFX8-PACKED-NEXT: ; return to shader part epilog 689; 690; GFX9-LABEL: load_1d_f16_tfe_dmask_x: 691; GFX9: ; %bb.0: 692; GFX9-NEXT: v_mov_b32_e32 v1, 0 693; GFX9-NEXT: s_mov_b32 s0, s2 694; GFX9-NEXT: s_mov_b32 s1, s3 695; GFX9-NEXT: s_mov_b32 s2, s4 696; GFX9-NEXT: s_mov_b32 s3, s5 697; GFX9-NEXT: s_mov_b32 s4, s6 698; GFX9-NEXT: s_mov_b32 s5, s7 699; GFX9-NEXT: s_mov_b32 s6, s8 700; GFX9-NEXT: s_mov_b32 s7, s9 701; GFX9-NEXT: v_mov_b32_e32 v2, v1 702; GFX9-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16 703; GFX9-NEXT: s_waitcnt vmcnt(0) 704; GFX9-NEXT: v_mov_b32_e32 v0, v2 705; GFX9-NEXT: ; return to shader part epilog 706; 707; GFX10-LABEL: load_1d_f16_tfe_dmask_x: 708; GFX10: ; %bb.0: 709; GFX10-NEXT: v_mov_b32_e32 v1, 0 710; GFX10-NEXT: s_mov_b32 s0, s2 711; GFX10-NEXT: s_mov_b32 s1, s3 712; GFX10-NEXT: s_mov_b32 s2, s4 713; GFX10-NEXT: s_mov_b32 s3, s5 714; GFX10-NEXT: s_mov_b32 s4, s6 715; GFX10-NEXT: s_mov_b32 s5, s7 716; GFX10-NEXT: s_mov_b32 s6, s8 717; GFX10-NEXT: s_mov_b32 s7, s9 718; GFX10-NEXT: v_mov_b32_e32 v2, v1 719; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16 720; GFX10-NEXT: s_waitcnt vmcnt(0) 721; GFX10-NEXT: v_mov_b32_e32 v0, v2 722; GFX10-NEXT: ; return to shader part epilog 723 %v = call { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 724 %v.err = extractvalue { half, i32 } %v, 1 725 %vv = bitcast i32 %v.err to float 726 ret float %vv 727} 728 729define amdgpu_ps float @load_1d_v2f16_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) { 730; GFX8-UNPACKED-LABEL: load_1d_v2f16_tfe_dmask_xy: 731; GFX8-UNPACKED: ; %bb.0: 732; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0 733; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 734; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 735; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 736; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 737; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 738; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 739; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 740; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 741; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1 742; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, v1 743; GFX8-UNPACKED-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x3 unorm tfe d16 744; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 745; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v3 746; GFX8-UNPACKED-NEXT: ; return to shader part epilog 747; 748; GFX8-PACKED-LABEL: load_1d_v2f16_tfe_dmask_xy: 749; GFX8-PACKED: ; %bb.0: 750; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0 751; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 752; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 753; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 754; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 755; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 756; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 757; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 758; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 759; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1 760; GFX8-PACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x3 unorm tfe d16 761; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 762; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2 763; GFX8-PACKED-NEXT: ; return to shader part epilog 764; 765; GFX9-LABEL: load_1d_v2f16_tfe_dmask_xy: 766; GFX9: ; %bb.0: 767; GFX9-NEXT: v_mov_b32_e32 v1, 0 768; GFX9-NEXT: s_mov_b32 s0, s2 769; GFX9-NEXT: s_mov_b32 s1, s3 770; GFX9-NEXT: s_mov_b32 s2, s4 771; GFX9-NEXT: s_mov_b32 s3, s5 772; GFX9-NEXT: s_mov_b32 s4, s6 773; GFX9-NEXT: s_mov_b32 s5, s7 774; GFX9-NEXT: s_mov_b32 s6, s8 775; GFX9-NEXT: s_mov_b32 s7, s9 776; GFX9-NEXT: v_mov_b32_e32 v2, v1 777; GFX9-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x3 unorm tfe d16 778; GFX9-NEXT: s_waitcnt vmcnt(0) 779; GFX9-NEXT: v_mov_b32_e32 v0, v2 780; GFX9-NEXT: ; return to shader part epilog 781; 782; GFX10-LABEL: load_1d_v2f16_tfe_dmask_xy: 783; GFX10: ; %bb.0: 784; GFX10-NEXT: v_mov_b32_e32 v1, 0 785; GFX10-NEXT: s_mov_b32 s0, s2 786; GFX10-NEXT: s_mov_b32 s1, s3 787; GFX10-NEXT: s_mov_b32 s2, s4 788; GFX10-NEXT: s_mov_b32 s3, s5 789; GFX10-NEXT: s_mov_b32 s4, s6 790; GFX10-NEXT: s_mov_b32 s5, s7 791; GFX10-NEXT: s_mov_b32 s6, s8 792; GFX10-NEXT: s_mov_b32 s7, s9 793; GFX10-NEXT: v_mov_b32_e32 v2, v1 794; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe d16 795; GFX10-NEXT: s_waitcnt vmcnt(0) 796; GFX10-NEXT: v_mov_b32_e32 v0, v2 797; GFX10-NEXT: ; return to shader part epilog 798 %v = call { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 799 %v.err = extractvalue { <2 x half>, i32 } %v, 1 800 %vv = bitcast i32 %v.err to float 801 ret float %vv 802} 803 804define amdgpu_ps float @load_1d_v3f16_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) { 805; GFX8-UNPACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz: 806; GFX8-UNPACKED: ; %bb.0: 807; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0 808; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 809; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 810; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 811; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 812; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 813; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 814; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 815; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 816; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1 817; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, v1 818; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v4, v1 819; GFX8-UNPACKED-NEXT: image_load v[1:4], v0, s[0:7] dmask:0x7 unorm tfe d16 820; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 821; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v4 822; GFX8-UNPACKED-NEXT: ; return to shader part epilog 823; 824; GFX8-PACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz: 825; GFX8-PACKED: ; %bb.0: 826; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0 827; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 828; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 829; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 830; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 831; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 832; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 833; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 834; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 835; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1 836; GFX8-PACKED-NEXT: v_mov_b32_e32 v3, v1 837; GFX8-PACKED-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x7 unorm tfe d16 838; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 839; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v3 840; GFX8-PACKED-NEXT: ; return to shader part epilog 841; 842; GFX9-LABEL: load_1d_v3f16_tfe_dmask_xyz: 843; GFX9: ; %bb.0: 844; GFX9-NEXT: v_mov_b32_e32 v1, 0 845; GFX9-NEXT: s_mov_b32 s0, s2 846; GFX9-NEXT: s_mov_b32 s1, s3 847; GFX9-NEXT: s_mov_b32 s2, s4 848; GFX9-NEXT: s_mov_b32 s3, s5 849; GFX9-NEXT: s_mov_b32 s4, s6 850; GFX9-NEXT: s_mov_b32 s5, s7 851; GFX9-NEXT: s_mov_b32 s6, s8 852; GFX9-NEXT: s_mov_b32 s7, s9 853; GFX9-NEXT: v_mov_b32_e32 v2, v1 854; GFX9-NEXT: v_mov_b32_e32 v3, v1 855; GFX9-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x7 unorm tfe d16 856; GFX9-NEXT: s_waitcnt vmcnt(0) 857; GFX9-NEXT: v_mov_b32_e32 v0, v3 858; GFX9-NEXT: ; return to shader part epilog 859; 860; GFX10-LABEL: load_1d_v3f16_tfe_dmask_xyz: 861; GFX10: ; %bb.0: 862; GFX10-NEXT: v_mov_b32_e32 v1, 0 863; GFX10-NEXT: s_mov_b32 s0, s2 864; GFX10-NEXT: s_mov_b32 s1, s3 865; GFX10-NEXT: s_mov_b32 s2, s4 866; GFX10-NEXT: s_mov_b32 s3, s5 867; GFX10-NEXT: s_mov_b32 s4, s6 868; GFX10-NEXT: s_mov_b32 s5, s7 869; GFX10-NEXT: s_mov_b32 s6, s8 870; GFX10-NEXT: s_mov_b32 s7, s9 871; GFX10-NEXT: v_mov_b32_e32 v2, v1 872; GFX10-NEXT: v_mov_b32_e32 v3, v1 873; GFX10-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe d16 874; GFX10-NEXT: s_waitcnt vmcnt(0) 875; GFX10-NEXT: v_mov_b32_e32 v0, v3 876; GFX10-NEXT: ; return to shader part epilog 877 %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 878 %v.err = extractvalue { <3 x half>, i32 } %v, 1 879 %vv = bitcast i32 %v.err to float 880 ret float %vv 881} 882 883define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 884; GFX8-UNPACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 885; GFX8-UNPACKED: ; %bb.0: 886; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0 887; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 888; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 889; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 890; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 891; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 892; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 893; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 894; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 895; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1 896; GFX8-UNPACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16 897; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 898; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v2 899; GFX8-UNPACKED-NEXT: ; return to shader part epilog 900; 901; GFX8-PACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 902; GFX8-PACKED: ; %bb.0: 903; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0 904; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 905; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 906; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 907; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 908; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 909; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 910; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 911; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 912; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1 913; GFX8-PACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16 914; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 915; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2 916; GFX8-PACKED-NEXT: ; return to shader part epilog 917; 918; GFX9-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 919; GFX9: ; %bb.0: 920; GFX9-NEXT: v_mov_b32_e32 v1, 0 921; GFX9-NEXT: s_mov_b32 s0, s2 922; GFX9-NEXT: s_mov_b32 s1, s3 923; GFX9-NEXT: s_mov_b32 s2, s4 924; GFX9-NEXT: s_mov_b32 s3, s5 925; GFX9-NEXT: s_mov_b32 s4, s6 926; GFX9-NEXT: s_mov_b32 s5, s7 927; GFX9-NEXT: s_mov_b32 s6, s8 928; GFX9-NEXT: s_mov_b32 s7, s9 929; GFX9-NEXT: v_mov_b32_e32 v2, v1 930; GFX9-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16 931; GFX9-NEXT: s_waitcnt vmcnt(0) 932; GFX9-NEXT: v_mov_b32_e32 v0, v2 933; GFX9-NEXT: ; return to shader part epilog 934; 935; GFX10-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 936; GFX10: ; %bb.0: 937; GFX10-NEXT: v_mov_b32_e32 v1, 0 938; GFX10-NEXT: s_mov_b32 s0, s2 939; GFX10-NEXT: s_mov_b32 s1, s3 940; GFX10-NEXT: s_mov_b32 s2, s4 941; GFX10-NEXT: s_mov_b32 s3, s5 942; GFX10-NEXT: s_mov_b32 s4, s6 943; GFX10-NEXT: s_mov_b32 s5, s7 944; GFX10-NEXT: s_mov_b32 s6, s8 945; GFX10-NEXT: s_mov_b32 s7, s9 946; GFX10-NEXT: v_mov_b32_e32 v2, v1 947; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe d16 948; GFX10-NEXT: s_waitcnt vmcnt(0) 949; GFX10-NEXT: v_mov_b32_e32 v0, v2 950; GFX10-NEXT: ; return to shader part epilog 951 %v = call { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 952 %v.err = extractvalue { <4 x half>, i32 } %v, 1 953 %vv = bitcast i32 %v.err to float 954 ret float %vv 955} 956 957declare half @llvm.amdgcn.image.load.1d.half.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 958declare <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 959declare <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 960declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 961 962declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 963declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 964declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 965declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 966 967attributes #0 = { nounwind readonly } 968