1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-UNPACKED %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-PACKED %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 6 7define amdgpu_ps half @load_1d_f16_x(<8 x i32> inreg %rsrc, i32 %s) { 8; GFX8-UNPACKED-LABEL: load_1d_f16_x: 9; GFX8-UNPACKED: ; %bb.0: 10; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 11; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 12; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 13; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 14; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 15; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 16; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 17; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 18; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16 19; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 20; GFX8-UNPACKED-NEXT: ; return to shader part epilog 21; 22; GFX8-PACKED-LABEL: load_1d_f16_x: 23; GFX8-PACKED: ; %bb.0: 24; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 25; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 26; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 27; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 28; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 29; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 30; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 31; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 32; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16 33; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 34; GFX8-PACKED-NEXT: ; return to shader part epilog 35; 36; GFX9-LABEL: load_1d_f16_x: 37; GFX9: ; %bb.0: 38; GFX9-NEXT: s_mov_b32 s0, s2 39; GFX9-NEXT: s_mov_b32 s1, s3 40; GFX9-NEXT: s_mov_b32 s2, s4 41; GFX9-NEXT: s_mov_b32 s3, s5 42; GFX9-NEXT: s_mov_b32 s4, s6 43; GFX9-NEXT: s_mov_b32 s5, s7 44; GFX9-NEXT: s_mov_b32 s6, s8 45; GFX9-NEXT: s_mov_b32 s7, s9 46; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16 47; GFX9-NEXT: s_waitcnt vmcnt(0) 48; GFX9-NEXT: ; return to shader part epilog 49; 50; GFX10-LABEL: load_1d_f16_x: 51; GFX10: ; %bb.0: 52; GFX10-NEXT: s_mov_b32 s0, s2 53; GFX10-NEXT: s_mov_b32 s1, s3 54; GFX10-NEXT: s_mov_b32 s2, s4 55; GFX10-NEXT: s_mov_b32 s3, s5 56; GFX10-NEXT: s_mov_b32 s4, s6 57; GFX10-NEXT: s_mov_b32 s5, s7 58; GFX10-NEXT: s_mov_b32 s6, s8 59; GFX10-NEXT: s_mov_b32 s7, s9 60; GFX10-NEXT: ; implicit-def: $vcc_hi 61; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm d16 62; GFX10-NEXT: s_waitcnt vmcnt(0) 63; GFX10-NEXT: ; return to shader part epilog 64 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 65 ret half %v 66} 67 68define amdgpu_ps half @load_1d_f16_y(<8 x i32> inreg %rsrc, i32 %s) { 69; GFX8-UNPACKED-LABEL: load_1d_f16_y: 70; GFX8-UNPACKED: ; %bb.0: 71; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 72; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 73; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 74; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 75; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 76; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 77; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 78; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 79; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16 80; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 81; GFX8-UNPACKED-NEXT: ; return to shader part epilog 82; 83; GFX8-PACKED-LABEL: load_1d_f16_y: 84; GFX8-PACKED: ; %bb.0: 85; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 86; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 87; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 88; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 89; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 90; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 91; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 92; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 93; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16 94; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 95; GFX8-PACKED-NEXT: ; return to shader part epilog 96; 97; GFX9-LABEL: load_1d_f16_y: 98; GFX9: ; %bb.0: 99; GFX9-NEXT: s_mov_b32 s0, s2 100; GFX9-NEXT: s_mov_b32 s1, s3 101; GFX9-NEXT: s_mov_b32 s2, s4 102; GFX9-NEXT: s_mov_b32 s3, s5 103; GFX9-NEXT: s_mov_b32 s4, s6 104; GFX9-NEXT: s_mov_b32 s5, s7 105; GFX9-NEXT: s_mov_b32 s6, s8 106; GFX9-NEXT: s_mov_b32 s7, s9 107; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16 108; GFX9-NEXT: s_waitcnt vmcnt(0) 109; GFX9-NEXT: ; return to shader part epilog 110; 111; GFX10-LABEL: load_1d_f16_y: 112; GFX10: ; %bb.0: 113; GFX10-NEXT: s_mov_b32 s0, s2 114; GFX10-NEXT: s_mov_b32 s1, s3 115; GFX10-NEXT: s_mov_b32 s2, s4 116; GFX10-NEXT: s_mov_b32 s3, s5 117; GFX10-NEXT: s_mov_b32 s4, s6 118; GFX10-NEXT: s_mov_b32 s5, s7 119; GFX10-NEXT: s_mov_b32 s6, s8 120; GFX10-NEXT: s_mov_b32 s7, s9 121; GFX10-NEXT: ; implicit-def: $vcc_hi 122; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm d16 123; GFX10-NEXT: s_waitcnt vmcnt(0) 124; GFX10-NEXT: ; return to shader part epilog 125 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 126 ret half %v 127} 128 129define amdgpu_ps half @load_1d_f16_z(<8 x i32> inreg %rsrc, i32 %s) { 130; GFX8-UNPACKED-LABEL: load_1d_f16_z: 131; GFX8-UNPACKED: ; %bb.0: 132; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 133; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 134; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 135; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 136; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 137; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 138; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 139; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 140; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16 141; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 142; GFX8-UNPACKED-NEXT: ; return to shader part epilog 143; 144; GFX8-PACKED-LABEL: load_1d_f16_z: 145; GFX8-PACKED: ; %bb.0: 146; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 147; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 148; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 149; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 150; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 151; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 152; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 153; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 154; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16 155; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 156; GFX8-PACKED-NEXT: ; return to shader part epilog 157; 158; GFX9-LABEL: load_1d_f16_z: 159; GFX9: ; %bb.0: 160; GFX9-NEXT: s_mov_b32 s0, s2 161; GFX9-NEXT: s_mov_b32 s1, s3 162; GFX9-NEXT: s_mov_b32 s2, s4 163; GFX9-NEXT: s_mov_b32 s3, s5 164; GFX9-NEXT: s_mov_b32 s4, s6 165; GFX9-NEXT: s_mov_b32 s5, s7 166; GFX9-NEXT: s_mov_b32 s6, s8 167; GFX9-NEXT: s_mov_b32 s7, s9 168; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16 169; GFX9-NEXT: s_waitcnt vmcnt(0) 170; GFX9-NEXT: ; return to shader part epilog 171; 172; GFX10-LABEL: load_1d_f16_z: 173; GFX10: ; %bb.0: 174; GFX10-NEXT: s_mov_b32 s0, s2 175; GFX10-NEXT: s_mov_b32 s1, s3 176; GFX10-NEXT: s_mov_b32 s2, s4 177; GFX10-NEXT: s_mov_b32 s3, s5 178; GFX10-NEXT: s_mov_b32 s4, s6 179; GFX10-NEXT: s_mov_b32 s5, s7 180; GFX10-NEXT: s_mov_b32 s6, s8 181; GFX10-NEXT: s_mov_b32 s7, s9 182; GFX10-NEXT: ; implicit-def: $vcc_hi 183; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm d16 184; GFX10-NEXT: s_waitcnt vmcnt(0) 185; GFX10-NEXT: ; return to shader part epilog 186 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 187 ret half %v 188} 189 190define amdgpu_ps half @load_1d_f16_w(<8 x i32> inreg %rsrc, i32 %s) { 191; GFX8-UNPACKED-LABEL: load_1d_f16_w: 192; GFX8-UNPACKED: ; %bb.0: 193; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 194; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 195; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 196; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 197; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 198; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 199; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 200; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 201; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16 202; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 203; GFX8-UNPACKED-NEXT: ; return to shader part epilog 204; 205; GFX8-PACKED-LABEL: load_1d_f16_w: 206; GFX8-PACKED: ; %bb.0: 207; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 208; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 209; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 210; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 211; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 212; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 213; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 214; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 215; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16 216; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 217; GFX8-PACKED-NEXT: ; return to shader part epilog 218; 219; GFX9-LABEL: load_1d_f16_w: 220; GFX9: ; %bb.0: 221; GFX9-NEXT: s_mov_b32 s0, s2 222; GFX9-NEXT: s_mov_b32 s1, s3 223; GFX9-NEXT: s_mov_b32 s2, s4 224; GFX9-NEXT: s_mov_b32 s3, s5 225; GFX9-NEXT: s_mov_b32 s4, s6 226; GFX9-NEXT: s_mov_b32 s5, s7 227; GFX9-NEXT: s_mov_b32 s6, s8 228; GFX9-NEXT: s_mov_b32 s7, s9 229; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16 230; GFX9-NEXT: s_waitcnt vmcnt(0) 231; GFX9-NEXT: ; return to shader part epilog 232; 233; GFX10-LABEL: load_1d_f16_w: 234; GFX10: ; %bb.0: 235; GFX10-NEXT: s_mov_b32 s0, s2 236; GFX10-NEXT: s_mov_b32 s1, s3 237; GFX10-NEXT: s_mov_b32 s2, s4 238; GFX10-NEXT: s_mov_b32 s3, s5 239; GFX10-NEXT: s_mov_b32 s4, s6 240; GFX10-NEXT: s_mov_b32 s5, s7 241; GFX10-NEXT: s_mov_b32 s6, s8 242; GFX10-NEXT: s_mov_b32 s7, s9 243; GFX10-NEXT: ; implicit-def: $vcc_hi 244; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm d16 245; GFX10-NEXT: s_waitcnt vmcnt(0) 246; GFX10-NEXT: ; return to shader part epilog 247 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 248 ret half %v 249} 250 251define amdgpu_ps <2 x half> @load_1d_v2f16_xy(<8 x i32> inreg %rsrc, i32 %s) { 252; GFX8-UNPACKED-LABEL: load_1d_v2f16_xy: 253; GFX8-UNPACKED: ; %bb.0: 254; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 255; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 256; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 257; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 258; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 259; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 260; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 261; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 262; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm d16 263; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 264; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 265; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 266; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 267; GFX8-UNPACKED-NEXT: ; return to shader part epilog 268; 269; GFX8-PACKED-LABEL: load_1d_v2f16_xy: 270; GFX8-PACKED: ; %bb.0: 271; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 272; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 273; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 274; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 275; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 276; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 277; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 278; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 279; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x3 unorm d16 280; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 281; GFX8-PACKED-NEXT: ; return to shader part epilog 282; 283; GFX9-LABEL: load_1d_v2f16_xy: 284; GFX9: ; %bb.0: 285; GFX9-NEXT: s_mov_b32 s0, s2 286; GFX9-NEXT: s_mov_b32 s1, s3 287; GFX9-NEXT: s_mov_b32 s2, s4 288; GFX9-NEXT: s_mov_b32 s3, s5 289; GFX9-NEXT: s_mov_b32 s4, s6 290; GFX9-NEXT: s_mov_b32 s5, s7 291; GFX9-NEXT: s_mov_b32 s6, s8 292; GFX9-NEXT: s_mov_b32 s7, s9 293; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x3 unorm d16 294; GFX9-NEXT: s_waitcnt vmcnt(0) 295; GFX9-NEXT: ; return to shader part epilog 296; 297; GFX10-LABEL: load_1d_v2f16_xy: 298; GFX10: ; %bb.0: 299; GFX10-NEXT: s_mov_b32 s0, s2 300; GFX10-NEXT: s_mov_b32 s1, s3 301; GFX10-NEXT: s_mov_b32 s2, s4 302; GFX10-NEXT: s_mov_b32 s3, s5 303; GFX10-NEXT: s_mov_b32 s4, s6 304; GFX10-NEXT: s_mov_b32 s5, s7 305; GFX10-NEXT: s_mov_b32 s6, s8 306; GFX10-NEXT: s_mov_b32 s7, s9 307; GFX10-NEXT: ; implicit-def: $vcc_hi 308; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm d16 309; GFX10-NEXT: s_waitcnt vmcnt(0) 310; GFX10-NEXT: ; return to shader part epilog 311 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 312 ret <2 x half> %v 313} 314 315define amdgpu_ps <2 x half> @load_1d_v2f16_xz(<8 x i32> inreg %rsrc, i32 %s) { 316; GFX8-UNPACKED-LABEL: load_1d_v2f16_xz: 317; GFX8-UNPACKED: ; %bb.0: 318; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 319; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 320; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 321; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 322; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 323; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 324; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 325; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 326; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 unorm d16 327; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 328; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 329; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 330; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 331; GFX8-UNPACKED-NEXT: ; return to shader part epilog 332; 333; GFX8-PACKED-LABEL: load_1d_v2f16_xz: 334; GFX8-PACKED: ; %bb.0: 335; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 336; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 337; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 338; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 339; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 340; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 341; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 342; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 343; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x5 unorm d16 344; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 345; GFX8-PACKED-NEXT: ; return to shader part epilog 346; 347; GFX9-LABEL: load_1d_v2f16_xz: 348; GFX9: ; %bb.0: 349; GFX9-NEXT: s_mov_b32 s0, s2 350; GFX9-NEXT: s_mov_b32 s1, s3 351; GFX9-NEXT: s_mov_b32 s2, s4 352; GFX9-NEXT: s_mov_b32 s3, s5 353; GFX9-NEXT: s_mov_b32 s4, s6 354; GFX9-NEXT: s_mov_b32 s5, s7 355; GFX9-NEXT: s_mov_b32 s6, s8 356; GFX9-NEXT: s_mov_b32 s7, s9 357; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x5 unorm d16 358; GFX9-NEXT: s_waitcnt vmcnt(0) 359; GFX9-NEXT: ; return to shader part epilog 360; 361; GFX10-LABEL: load_1d_v2f16_xz: 362; GFX10: ; %bb.0: 363; GFX10-NEXT: s_mov_b32 s0, s2 364; GFX10-NEXT: s_mov_b32 s1, s3 365; GFX10-NEXT: s_mov_b32 s2, s4 366; GFX10-NEXT: s_mov_b32 s3, s5 367; GFX10-NEXT: s_mov_b32 s4, s6 368; GFX10-NEXT: s_mov_b32 s5, s7 369; GFX10-NEXT: s_mov_b32 s6, s8 370; GFX10-NEXT: s_mov_b32 s7, s9 371; GFX10-NEXT: ; implicit-def: $vcc_hi 372; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm d16 373; GFX10-NEXT: s_waitcnt vmcnt(0) 374; GFX10-NEXT: ; return to shader part epilog 375 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 376 ret <2 x half> %v 377} 378 379define amdgpu_ps <2 x half> @load_1d_v2f16_xw(<8 x i32> inreg %rsrc, i32 %s) { 380; GFX8-UNPACKED-LABEL: load_1d_v2f16_xw: 381; GFX8-UNPACKED: ; %bb.0: 382; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 383; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 384; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 385; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 386; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 387; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 388; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 389; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 390; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm d16 391; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 392; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 393; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 394; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 395; GFX8-UNPACKED-NEXT: ; return to shader part epilog 396; 397; GFX8-PACKED-LABEL: load_1d_v2f16_xw: 398; GFX8-PACKED: ; %bb.0: 399; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 400; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 401; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 402; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 403; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 404; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 405; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 406; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 407; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x9 unorm d16 408; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 409; GFX8-PACKED-NEXT: ; return to shader part epilog 410; 411; GFX9-LABEL: load_1d_v2f16_xw: 412; GFX9: ; %bb.0: 413; GFX9-NEXT: s_mov_b32 s0, s2 414; GFX9-NEXT: s_mov_b32 s1, s3 415; GFX9-NEXT: s_mov_b32 s2, s4 416; GFX9-NEXT: s_mov_b32 s3, s5 417; GFX9-NEXT: s_mov_b32 s4, s6 418; GFX9-NEXT: s_mov_b32 s5, s7 419; GFX9-NEXT: s_mov_b32 s6, s8 420; GFX9-NEXT: s_mov_b32 s7, s9 421; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x9 unorm d16 422; GFX9-NEXT: s_waitcnt vmcnt(0) 423; GFX9-NEXT: ; return to shader part epilog 424; 425; GFX10-LABEL: load_1d_v2f16_xw: 426; GFX10: ; %bb.0: 427; GFX10-NEXT: s_mov_b32 s0, s2 428; GFX10-NEXT: s_mov_b32 s1, s3 429; GFX10-NEXT: s_mov_b32 s2, s4 430; GFX10-NEXT: s_mov_b32 s3, s5 431; GFX10-NEXT: s_mov_b32 s4, s6 432; GFX10-NEXT: s_mov_b32 s5, s7 433; GFX10-NEXT: s_mov_b32 s6, s8 434; GFX10-NEXT: s_mov_b32 s7, s9 435; GFX10-NEXT: ; implicit-def: $vcc_hi 436; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm d16 437; GFX10-NEXT: s_waitcnt vmcnt(0) 438; GFX10-NEXT: ; return to shader part epilog 439 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 440 ret <2 x half> %v 441} 442 443define amdgpu_ps <2 x half> @load_1d_v2f16_yz(<8 x i32> inreg %rsrc, i32 %s) { 444; GFX8-UNPACKED-LABEL: load_1d_v2f16_yz: 445; GFX8-UNPACKED: ; %bb.0: 446; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 447; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 448; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 449; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 450; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 451; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 452; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 453; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 454; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 unorm d16 455; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 456; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 457; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 458; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 459; GFX8-UNPACKED-NEXT: ; return to shader part epilog 460; 461; GFX8-PACKED-LABEL: load_1d_v2f16_yz: 462; GFX8-PACKED: ; %bb.0: 463; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 464; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 465; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 466; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 467; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 468; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 469; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 470; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 471; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x6 unorm d16 472; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 473; GFX8-PACKED-NEXT: ; return to shader part epilog 474; 475; GFX9-LABEL: load_1d_v2f16_yz: 476; GFX9: ; %bb.0: 477; GFX9-NEXT: s_mov_b32 s0, s2 478; GFX9-NEXT: s_mov_b32 s1, s3 479; GFX9-NEXT: s_mov_b32 s2, s4 480; GFX9-NEXT: s_mov_b32 s3, s5 481; GFX9-NEXT: s_mov_b32 s4, s6 482; GFX9-NEXT: s_mov_b32 s5, s7 483; GFX9-NEXT: s_mov_b32 s6, s8 484; GFX9-NEXT: s_mov_b32 s7, s9 485; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x6 unorm d16 486; GFX9-NEXT: s_waitcnt vmcnt(0) 487; GFX9-NEXT: ; return to shader part epilog 488; 489; GFX10-LABEL: load_1d_v2f16_yz: 490; GFX10: ; %bb.0: 491; GFX10-NEXT: s_mov_b32 s0, s2 492; GFX10-NEXT: s_mov_b32 s1, s3 493; GFX10-NEXT: s_mov_b32 s2, s4 494; GFX10-NEXT: s_mov_b32 s3, s5 495; GFX10-NEXT: s_mov_b32 s4, s6 496; GFX10-NEXT: s_mov_b32 s5, s7 497; GFX10-NEXT: s_mov_b32 s6, s8 498; GFX10-NEXT: s_mov_b32 s7, s9 499; GFX10-NEXT: ; implicit-def: $vcc_hi 500; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm d16 501; GFX10-NEXT: s_waitcnt vmcnt(0) 502; GFX10-NEXT: ; return to shader part epilog 503 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 504 ret <2 x half> %v 505} 506 507; FIXME: 508; define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { 509; %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 510; ret <3 x half> %v 511; } 512 513define amdgpu_ps <4 x half> @load_1d_v4f16_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 514; GFX8-UNPACKED-LABEL: load_1d_v4f16_xyzw: 515; GFX8-UNPACKED: ; %bb.0: 516; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 517; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 518; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 519; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 520; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 521; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 522; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 523; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 524; GFX8-UNPACKED-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm d16 525; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0xffff 526; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 527; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, s0, v1 528; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, s0, v3 529; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 530; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v3, 16, v3 531; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 532; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 533; GFX8-UNPACKED-NEXT: ; return to shader part epilog 534; 535; GFX8-PACKED-LABEL: load_1d_v4f16_xyzw: 536; GFX8-PACKED: ; %bb.0: 537; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 538; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 539; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 540; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 541; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 542; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 543; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 544; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 545; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16 546; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 547; GFX8-PACKED-NEXT: ; return to shader part epilog 548; 549; GFX9-LABEL: load_1d_v4f16_xyzw: 550; GFX9: ; %bb.0: 551; GFX9-NEXT: s_mov_b32 s0, s2 552; GFX9-NEXT: s_mov_b32 s1, s3 553; GFX9-NEXT: s_mov_b32 s2, s4 554; GFX9-NEXT: s_mov_b32 s3, s5 555; GFX9-NEXT: s_mov_b32 s4, s6 556; GFX9-NEXT: s_mov_b32 s5, s7 557; GFX9-NEXT: s_mov_b32 s6, s8 558; GFX9-NEXT: s_mov_b32 s7, s9 559; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16 560; GFX9-NEXT: s_waitcnt vmcnt(0) 561; GFX9-NEXT: ; return to shader part epilog 562; 563; GFX10-LABEL: load_1d_v4f16_xyzw: 564; GFX10: ; %bb.0: 565; GFX10-NEXT: s_mov_b32 s0, s2 566; GFX10-NEXT: s_mov_b32 s1, s3 567; GFX10-NEXT: s_mov_b32 s2, s4 568; GFX10-NEXT: s_mov_b32 s3, s5 569; GFX10-NEXT: s_mov_b32 s4, s6 570; GFX10-NEXT: s_mov_b32 s5, s7 571; GFX10-NEXT: s_mov_b32 s6, s8 572; GFX10-NEXT: s_mov_b32 s7, s9 573; GFX10-NEXT: ; implicit-def: $vcc_hi 574; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16 575; GFX10-NEXT: s_waitcnt vmcnt(0) 576; GFX10-NEXT: ; return to shader part epilog 577 %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 578 ret <4 x half> %v 579} 580 581define amdgpu_ps float @load_1d_f16_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) { 582; GFX8-UNPACKED-LABEL: load_1d_f16_tfe_dmask_x: 583; GFX8-UNPACKED: ; %bb.0: 584; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 585; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 586; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 587; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 588; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 589; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 590; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 591; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 592; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe d16 593; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 594; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v1 595; GFX8-UNPACKED-NEXT: ; return to shader part epilog 596; 597; GFX8-PACKED-LABEL: load_1d_f16_tfe_dmask_x: 598; GFX8-PACKED: ; %bb.0: 599; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 600; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 601; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 602; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 603; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 604; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 605; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 606; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 607; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe d16 608; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 609; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v1 610; GFX8-PACKED-NEXT: ; return to shader part epilog 611; 612; GFX9-LABEL: load_1d_f16_tfe_dmask_x: 613; GFX9: ; %bb.0: 614; GFX9-NEXT: s_mov_b32 s0, s2 615; GFX9-NEXT: s_mov_b32 s1, s3 616; GFX9-NEXT: s_mov_b32 s2, s4 617; GFX9-NEXT: s_mov_b32 s3, s5 618; GFX9-NEXT: s_mov_b32 s4, s6 619; GFX9-NEXT: s_mov_b32 s5, s7 620; GFX9-NEXT: s_mov_b32 s6, s8 621; GFX9-NEXT: s_mov_b32 s7, s9 622; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe d16 623; GFX9-NEXT: s_waitcnt vmcnt(0) 624; GFX9-NEXT: v_mov_b32_e32 v0, v1 625; GFX9-NEXT: ; return to shader part epilog 626; 627; GFX10-LABEL: load_1d_f16_tfe_dmask_x: 628; GFX10: ; %bb.0: 629; GFX10-NEXT: s_mov_b32 s0, s2 630; GFX10-NEXT: s_mov_b32 s1, s3 631; GFX10-NEXT: s_mov_b32 s2, s4 632; GFX10-NEXT: s_mov_b32 s3, s5 633; GFX10-NEXT: s_mov_b32 s4, s6 634; GFX10-NEXT: s_mov_b32 s5, s7 635; GFX10-NEXT: s_mov_b32 s6, s8 636; GFX10-NEXT: s_mov_b32 s7, s9 637; GFX10-NEXT: ; implicit-def: $vcc_hi 638; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16 639; GFX10-NEXT: s_waitcnt vmcnt(0) 640; GFX10-NEXT: v_mov_b32_e32 v0, v1 641; GFX10-NEXT: ; return to shader part epilog 642 %v = call { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 643 %v.err = extractvalue { half, i32 } %v, 1 644 %vv = bitcast i32 %v.err to float 645 ret float %vv 646} 647 648define amdgpu_ps float @load_1d_v2f16_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) { 649; GFX8-UNPACKED-LABEL: load_1d_v2f16_tfe_dmask_xy: 650; GFX8-UNPACKED: ; %bb.0: 651; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 652; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 653; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 654; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 655; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 656; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 657; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 658; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 659; GFX8-UNPACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x3 unorm tfe d16 660; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 661; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v2 662; GFX8-UNPACKED-NEXT: ; return to shader part epilog 663; 664; GFX8-PACKED-LABEL: load_1d_v2f16_tfe_dmask_xy: 665; GFX8-PACKED: ; %bb.0: 666; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 667; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 668; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 669; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 670; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 671; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 672; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 673; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 674; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm tfe d16 675; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 676; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v1 677; GFX8-PACKED-NEXT: ; return to shader part epilog 678; 679; GFX9-LABEL: load_1d_v2f16_tfe_dmask_xy: 680; GFX9: ; %bb.0: 681; GFX9-NEXT: s_mov_b32 s0, s2 682; GFX9-NEXT: s_mov_b32 s1, s3 683; GFX9-NEXT: s_mov_b32 s2, s4 684; GFX9-NEXT: s_mov_b32 s3, s5 685; GFX9-NEXT: s_mov_b32 s4, s6 686; GFX9-NEXT: s_mov_b32 s5, s7 687; GFX9-NEXT: s_mov_b32 s6, s8 688; GFX9-NEXT: s_mov_b32 s7, s9 689; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm tfe d16 690; GFX9-NEXT: s_waitcnt vmcnt(0) 691; GFX9-NEXT: v_mov_b32_e32 v0, v1 692; GFX9-NEXT: ; return to shader part epilog 693; 694; GFX10-LABEL: load_1d_v2f16_tfe_dmask_xy: 695; GFX10: ; %bb.0: 696; GFX10-NEXT: s_mov_b32 s0, s2 697; GFX10-NEXT: s_mov_b32 s1, s3 698; GFX10-NEXT: s_mov_b32 s2, s4 699; GFX10-NEXT: s_mov_b32 s3, s5 700; GFX10-NEXT: s_mov_b32 s4, s6 701; GFX10-NEXT: s_mov_b32 s5, s7 702; GFX10-NEXT: s_mov_b32 s6, s8 703; GFX10-NEXT: s_mov_b32 s7, s9 704; GFX10-NEXT: ; implicit-def: $vcc_hi 705; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe d16 706; GFX10-NEXT: s_waitcnt vmcnt(0) 707; GFX10-NEXT: v_mov_b32_e32 v0, v1 708; GFX10-NEXT: ; return to shader part epilog 709 %v = call { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 710 %v.err = extractvalue { <2 x half>, i32 } %v, 1 711 %vv = bitcast i32 %v.err to float 712 ret float %vv 713} 714 715; FIXME: 716; define amdgpu_ps float @load_1d_v3f16_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) { 717; %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 718; %v.err = extractvalue { <3 x half>, i32 } %v, 1 719; %vv = bitcast i32 %v.err to float 720; ret float %vv 721; } 722 723define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 724; GFX8-UNPACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 725; GFX8-UNPACKED: ; %bb.0: 726; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 727; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 728; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 729; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 730; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 731; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 732; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 733; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 734; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe d16 735; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 736; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v1 737; GFX8-UNPACKED-NEXT: ; return to shader part epilog 738; 739; GFX8-PACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 740; GFX8-PACKED: ; %bb.0: 741; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 742; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 743; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 744; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 745; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 746; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 747; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 748; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 749; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe d16 750; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 751; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v1 752; GFX8-PACKED-NEXT: ; return to shader part epilog 753; 754; GFX9-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 755; GFX9: ; %bb.0: 756; GFX9-NEXT: s_mov_b32 s0, s2 757; GFX9-NEXT: s_mov_b32 s1, s3 758; GFX9-NEXT: s_mov_b32 s2, s4 759; GFX9-NEXT: s_mov_b32 s3, s5 760; GFX9-NEXT: s_mov_b32 s4, s6 761; GFX9-NEXT: s_mov_b32 s5, s7 762; GFX9-NEXT: s_mov_b32 s6, s8 763; GFX9-NEXT: s_mov_b32 s7, s9 764; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe d16 765; GFX9-NEXT: s_waitcnt vmcnt(0) 766; GFX9-NEXT: v_mov_b32_e32 v0, v1 767; GFX9-NEXT: ; return to shader part epilog 768; 769; GFX10-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 770; GFX10: ; %bb.0: 771; GFX10-NEXT: s_mov_b32 s0, s2 772; GFX10-NEXT: s_mov_b32 s1, s3 773; GFX10-NEXT: s_mov_b32 s2, s4 774; GFX10-NEXT: s_mov_b32 s3, s5 775; GFX10-NEXT: s_mov_b32 s4, s6 776; GFX10-NEXT: s_mov_b32 s5, s7 777; GFX10-NEXT: s_mov_b32 s6, s8 778; GFX10-NEXT: s_mov_b32 s7, s9 779; GFX10-NEXT: ; implicit-def: $vcc_hi 780; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe d16 781; GFX10-NEXT: s_waitcnt vmcnt(0) 782; GFX10-NEXT: v_mov_b32_e32 v0, v1 783; GFX10-NEXT: ; return to shader part epilog 784 %v = call { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 785 %v.err = extractvalue { <4 x half>, i32 } %v, 1 786 %vv = bitcast i32 %v.err to float 787 ret float %vv 788} 789 790declare half @llvm.amdgcn.image.load.1d.half.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 791declare <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 792declare <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 793declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 794 795declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 796declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 797declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 798declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 799 800attributes #0 = { nounwind readonly } 801