1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 5 6define amdgpu_ps float @load_1d_f32_x(<8 x i32> inreg %rsrc, i32 %s) { 7; GFX6-LABEL: load_1d_f32_x: 8; GFX6: ; %bb.0: 9; GFX6-NEXT: s_mov_b32 s0, s2 10; GFX6-NEXT: s_mov_b32 s1, s3 11; GFX6-NEXT: s_mov_b32 s2, s4 12; GFX6-NEXT: s_mov_b32 s3, s5 13; GFX6-NEXT: s_mov_b32 s4, s6 14; GFX6-NEXT: s_mov_b32 s5, s7 15; GFX6-NEXT: s_mov_b32 s6, s8 16; GFX6-NEXT: s_mov_b32 s7, s9 17; GFX6-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm 18; GFX6-NEXT: s_waitcnt vmcnt(0) 19; GFX6-NEXT: ; return to shader part epilog 20; 21; GFX8-LABEL: load_1d_f32_x: 22; GFX8: ; %bb.0: 23; GFX8-NEXT: s_mov_b32 s0, s2 24; GFX8-NEXT: s_mov_b32 s1, s3 25; GFX8-NEXT: s_mov_b32 s2, s4 26; GFX8-NEXT: s_mov_b32 s3, s5 27; GFX8-NEXT: s_mov_b32 s4, s6 28; GFX8-NEXT: s_mov_b32 s5, s7 29; GFX8-NEXT: s_mov_b32 s6, s8 30; GFX8-NEXT: s_mov_b32 s7, s9 31; GFX8-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm 32; GFX8-NEXT: s_waitcnt vmcnt(0) 33; GFX8-NEXT: ; return to shader part epilog 34; 35; GFX10-LABEL: load_1d_f32_x: 36; GFX10: ; %bb.0: 37; GFX10-NEXT: s_mov_b32 s0, s2 38; GFX10-NEXT: s_mov_b32 s1, s3 39; GFX10-NEXT: s_mov_b32 s2, s4 40; GFX10-NEXT: s_mov_b32 s3, s5 41; GFX10-NEXT: s_mov_b32 s4, s6 42; GFX10-NEXT: s_mov_b32 s5, s7 43; GFX10-NEXT: s_mov_b32 s6, s8 44; GFX10-NEXT: s_mov_b32 s7, s9 45; GFX10-NEXT: ; implicit-def: $vcc_hi 46; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm 47; GFX10-NEXT: s_waitcnt vmcnt(0) 48; GFX10-NEXT: ; return to shader part epilog 49 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 50 ret float %v 51} 52 53define amdgpu_ps float @load_1d_f32_y(<8 x i32> inreg %rsrc, i32 %s) { 54; GFX6-LABEL: load_1d_f32_y: 55; GFX6: ; %bb.0: 56; GFX6-NEXT: s_mov_b32 s0, s2 57; GFX6-NEXT: s_mov_b32 s1, s3 58; GFX6-NEXT: s_mov_b32 s2, s4 59; GFX6-NEXT: s_mov_b32 s3, s5 60; GFX6-NEXT: s_mov_b32 s4, s6 61; GFX6-NEXT: s_mov_b32 s5, s7 62; GFX6-NEXT: s_mov_b32 s6, s8 63; GFX6-NEXT: s_mov_b32 s7, s9 64; GFX6-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm 65; GFX6-NEXT: s_waitcnt vmcnt(0) 66; GFX6-NEXT: ; return to shader part epilog 67; 68; GFX8-LABEL: load_1d_f32_y: 69; GFX8: ; %bb.0: 70; GFX8-NEXT: s_mov_b32 s0, s2 71; GFX8-NEXT: s_mov_b32 s1, s3 72; GFX8-NEXT: s_mov_b32 s2, s4 73; GFX8-NEXT: s_mov_b32 s3, s5 74; GFX8-NEXT: s_mov_b32 s4, s6 75; GFX8-NEXT: s_mov_b32 s5, s7 76; GFX8-NEXT: s_mov_b32 s6, s8 77; GFX8-NEXT: s_mov_b32 s7, s9 78; GFX8-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm 79; GFX8-NEXT: s_waitcnt vmcnt(0) 80; GFX8-NEXT: ; return to shader part epilog 81; 82; GFX10-LABEL: load_1d_f32_y: 83; GFX10: ; %bb.0: 84; GFX10-NEXT: s_mov_b32 s0, s2 85; GFX10-NEXT: s_mov_b32 s1, s3 86; GFX10-NEXT: s_mov_b32 s2, s4 87; GFX10-NEXT: s_mov_b32 s3, s5 88; GFX10-NEXT: s_mov_b32 s4, s6 89; GFX10-NEXT: s_mov_b32 s5, s7 90; GFX10-NEXT: s_mov_b32 s6, s8 91; GFX10-NEXT: s_mov_b32 s7, s9 92; GFX10-NEXT: ; implicit-def: $vcc_hi 93; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm 94; GFX10-NEXT: s_waitcnt vmcnt(0) 95; GFX10-NEXT: ; return to shader part epilog 96 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 97 ret float %v 98} 99 100define amdgpu_ps float @load_1d_f32_z(<8 x i32> inreg %rsrc, i32 %s) { 101; GFX6-LABEL: load_1d_f32_z: 102; GFX6: ; %bb.0: 103; GFX6-NEXT: s_mov_b32 s0, s2 104; GFX6-NEXT: s_mov_b32 s1, s3 105; GFX6-NEXT: s_mov_b32 s2, s4 106; GFX6-NEXT: s_mov_b32 s3, s5 107; GFX6-NEXT: s_mov_b32 s4, s6 108; GFX6-NEXT: s_mov_b32 s5, s7 109; GFX6-NEXT: s_mov_b32 s6, s8 110; GFX6-NEXT: s_mov_b32 s7, s9 111; GFX6-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm 112; GFX6-NEXT: s_waitcnt vmcnt(0) 113; GFX6-NEXT: ; return to shader part epilog 114; 115; GFX8-LABEL: load_1d_f32_z: 116; GFX8: ; %bb.0: 117; GFX8-NEXT: s_mov_b32 s0, s2 118; GFX8-NEXT: s_mov_b32 s1, s3 119; GFX8-NEXT: s_mov_b32 s2, s4 120; GFX8-NEXT: s_mov_b32 s3, s5 121; GFX8-NEXT: s_mov_b32 s4, s6 122; GFX8-NEXT: s_mov_b32 s5, s7 123; GFX8-NEXT: s_mov_b32 s6, s8 124; GFX8-NEXT: s_mov_b32 s7, s9 125; GFX8-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm 126; GFX8-NEXT: s_waitcnt vmcnt(0) 127; GFX8-NEXT: ; return to shader part epilog 128; 129; GFX10-LABEL: load_1d_f32_z: 130; GFX10: ; %bb.0: 131; GFX10-NEXT: s_mov_b32 s0, s2 132; GFX10-NEXT: s_mov_b32 s1, s3 133; GFX10-NEXT: s_mov_b32 s2, s4 134; GFX10-NEXT: s_mov_b32 s3, s5 135; GFX10-NEXT: s_mov_b32 s4, s6 136; GFX10-NEXT: s_mov_b32 s5, s7 137; GFX10-NEXT: s_mov_b32 s6, s8 138; GFX10-NEXT: s_mov_b32 s7, s9 139; GFX10-NEXT: ; implicit-def: $vcc_hi 140; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm 141; GFX10-NEXT: s_waitcnt vmcnt(0) 142; GFX10-NEXT: ; return to shader part epilog 143 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 144 ret float %v 145} 146 147define amdgpu_ps float @load_1d_f32_w(<8 x i32> inreg %rsrc, i32 %s) { 148; GFX6-LABEL: load_1d_f32_w: 149; GFX6: ; %bb.0: 150; GFX6-NEXT: s_mov_b32 s0, s2 151; GFX6-NEXT: s_mov_b32 s1, s3 152; GFX6-NEXT: s_mov_b32 s2, s4 153; GFX6-NEXT: s_mov_b32 s3, s5 154; GFX6-NEXT: s_mov_b32 s4, s6 155; GFX6-NEXT: s_mov_b32 s5, s7 156; GFX6-NEXT: s_mov_b32 s6, s8 157; GFX6-NEXT: s_mov_b32 s7, s9 158; GFX6-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm 159; GFX6-NEXT: s_waitcnt vmcnt(0) 160; GFX6-NEXT: ; return to shader part epilog 161; 162; GFX8-LABEL: load_1d_f32_w: 163; GFX8: ; %bb.0: 164; GFX8-NEXT: s_mov_b32 s0, s2 165; GFX8-NEXT: s_mov_b32 s1, s3 166; GFX8-NEXT: s_mov_b32 s2, s4 167; GFX8-NEXT: s_mov_b32 s3, s5 168; GFX8-NEXT: s_mov_b32 s4, s6 169; GFX8-NEXT: s_mov_b32 s5, s7 170; GFX8-NEXT: s_mov_b32 s6, s8 171; GFX8-NEXT: s_mov_b32 s7, s9 172; GFX8-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm 173; GFX8-NEXT: s_waitcnt vmcnt(0) 174; GFX8-NEXT: ; return to shader part epilog 175; 176; GFX10-LABEL: load_1d_f32_w: 177; GFX10: ; %bb.0: 178; GFX10-NEXT: s_mov_b32 s0, s2 179; GFX10-NEXT: s_mov_b32 s1, s3 180; GFX10-NEXT: s_mov_b32 s2, s4 181; GFX10-NEXT: s_mov_b32 s3, s5 182; GFX10-NEXT: s_mov_b32 s4, s6 183; GFX10-NEXT: s_mov_b32 s5, s7 184; GFX10-NEXT: s_mov_b32 s6, s8 185; GFX10-NEXT: s_mov_b32 s7, s9 186; GFX10-NEXT: ; implicit-def: $vcc_hi 187; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm 188; GFX10-NEXT: s_waitcnt vmcnt(0) 189; GFX10-NEXT: ; return to shader part epilog 190 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 191 ret float %v 192} 193 194define amdgpu_ps <2 x float> @load_1d_v2f32_xy(<8 x i32> inreg %rsrc, i32 %s) { 195; GFX6-LABEL: load_1d_v2f32_xy: 196; GFX6: ; %bb.0: 197; GFX6-NEXT: s_mov_b32 s0, s2 198; GFX6-NEXT: s_mov_b32 s1, s3 199; GFX6-NEXT: s_mov_b32 s2, s4 200; GFX6-NEXT: s_mov_b32 s3, s5 201; GFX6-NEXT: s_mov_b32 s4, s6 202; GFX6-NEXT: s_mov_b32 s5, s7 203; GFX6-NEXT: s_mov_b32 s6, s8 204; GFX6-NEXT: s_mov_b32 s7, s9 205; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm 206; GFX6-NEXT: s_waitcnt vmcnt(0) 207; GFX6-NEXT: ; return to shader part epilog 208; 209; GFX8-LABEL: load_1d_v2f32_xy: 210; GFX8: ; %bb.0: 211; GFX8-NEXT: s_mov_b32 s0, s2 212; GFX8-NEXT: s_mov_b32 s1, s3 213; GFX8-NEXT: s_mov_b32 s2, s4 214; GFX8-NEXT: s_mov_b32 s3, s5 215; GFX8-NEXT: s_mov_b32 s4, s6 216; GFX8-NEXT: s_mov_b32 s5, s7 217; GFX8-NEXT: s_mov_b32 s6, s8 218; GFX8-NEXT: s_mov_b32 s7, s9 219; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm 220; GFX8-NEXT: s_waitcnt vmcnt(0) 221; GFX8-NEXT: ; return to shader part epilog 222; 223; GFX10-LABEL: load_1d_v2f32_xy: 224; GFX10: ; %bb.0: 225; GFX10-NEXT: s_mov_b32 s0, s2 226; GFX10-NEXT: s_mov_b32 s1, s3 227; GFX10-NEXT: s_mov_b32 s2, s4 228; GFX10-NEXT: s_mov_b32 s3, s5 229; GFX10-NEXT: s_mov_b32 s4, s6 230; GFX10-NEXT: s_mov_b32 s5, s7 231; GFX10-NEXT: s_mov_b32 s6, s8 232; GFX10-NEXT: s_mov_b32 s7, s9 233; GFX10-NEXT: ; implicit-def: $vcc_hi 234; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm 235; GFX10-NEXT: s_waitcnt vmcnt(0) 236; GFX10-NEXT: ; return to shader part epilog 237 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 238 ret <2 x float> %v 239} 240 241define amdgpu_ps <2 x float> @load_1d_v2f32_xz(<8 x i32> inreg %rsrc, i32 %s) { 242; GFX6-LABEL: load_1d_v2f32_xz: 243; GFX6: ; %bb.0: 244; GFX6-NEXT: s_mov_b32 s0, s2 245; GFX6-NEXT: s_mov_b32 s1, s3 246; GFX6-NEXT: s_mov_b32 s2, s4 247; GFX6-NEXT: s_mov_b32 s3, s5 248; GFX6-NEXT: s_mov_b32 s4, s6 249; GFX6-NEXT: s_mov_b32 s5, s7 250; GFX6-NEXT: s_mov_b32 s6, s8 251; GFX6-NEXT: s_mov_b32 s7, s9 252; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 unorm 253; GFX6-NEXT: s_waitcnt vmcnt(0) 254; GFX6-NEXT: ; return to shader part epilog 255; 256; GFX8-LABEL: load_1d_v2f32_xz: 257; GFX8: ; %bb.0: 258; GFX8-NEXT: s_mov_b32 s0, s2 259; GFX8-NEXT: s_mov_b32 s1, s3 260; GFX8-NEXT: s_mov_b32 s2, s4 261; GFX8-NEXT: s_mov_b32 s3, s5 262; GFX8-NEXT: s_mov_b32 s4, s6 263; GFX8-NEXT: s_mov_b32 s5, s7 264; GFX8-NEXT: s_mov_b32 s6, s8 265; GFX8-NEXT: s_mov_b32 s7, s9 266; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 unorm 267; GFX8-NEXT: s_waitcnt vmcnt(0) 268; GFX8-NEXT: ; return to shader part epilog 269; 270; GFX10-LABEL: load_1d_v2f32_xz: 271; GFX10: ; %bb.0: 272; GFX10-NEXT: s_mov_b32 s0, s2 273; GFX10-NEXT: s_mov_b32 s1, s3 274; GFX10-NEXT: s_mov_b32 s2, s4 275; GFX10-NEXT: s_mov_b32 s3, s5 276; GFX10-NEXT: s_mov_b32 s4, s6 277; GFX10-NEXT: s_mov_b32 s5, s7 278; GFX10-NEXT: s_mov_b32 s6, s8 279; GFX10-NEXT: s_mov_b32 s7, s9 280; GFX10-NEXT: ; implicit-def: $vcc_hi 281; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm 282; GFX10-NEXT: s_waitcnt vmcnt(0) 283; GFX10-NEXT: ; return to shader part epilog 284 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 285 ret <2 x float> %v 286} 287 288define amdgpu_ps <2 x float> @load_1d_v2f32_xw(<8 x i32> inreg %rsrc, i32 %s) { 289; GFX6-LABEL: load_1d_v2f32_xw: 290; GFX6: ; %bb.0: 291; GFX6-NEXT: s_mov_b32 s0, s2 292; GFX6-NEXT: s_mov_b32 s1, s3 293; GFX6-NEXT: s_mov_b32 s2, s4 294; GFX6-NEXT: s_mov_b32 s3, s5 295; GFX6-NEXT: s_mov_b32 s4, s6 296; GFX6-NEXT: s_mov_b32 s5, s7 297; GFX6-NEXT: s_mov_b32 s6, s8 298; GFX6-NEXT: s_mov_b32 s7, s9 299; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm 300; GFX6-NEXT: s_waitcnt vmcnt(0) 301; GFX6-NEXT: ; return to shader part epilog 302; 303; GFX8-LABEL: load_1d_v2f32_xw: 304; GFX8: ; %bb.0: 305; GFX8-NEXT: s_mov_b32 s0, s2 306; GFX8-NEXT: s_mov_b32 s1, s3 307; GFX8-NEXT: s_mov_b32 s2, s4 308; GFX8-NEXT: s_mov_b32 s3, s5 309; GFX8-NEXT: s_mov_b32 s4, s6 310; GFX8-NEXT: s_mov_b32 s5, s7 311; GFX8-NEXT: s_mov_b32 s6, s8 312; GFX8-NEXT: s_mov_b32 s7, s9 313; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm 314; GFX8-NEXT: s_waitcnt vmcnt(0) 315; GFX8-NEXT: ; return to shader part epilog 316; 317; GFX10-LABEL: load_1d_v2f32_xw: 318; GFX10: ; %bb.0: 319; GFX10-NEXT: s_mov_b32 s0, s2 320; GFX10-NEXT: s_mov_b32 s1, s3 321; GFX10-NEXT: s_mov_b32 s2, s4 322; GFX10-NEXT: s_mov_b32 s3, s5 323; GFX10-NEXT: s_mov_b32 s4, s6 324; GFX10-NEXT: s_mov_b32 s5, s7 325; GFX10-NEXT: s_mov_b32 s6, s8 326; GFX10-NEXT: s_mov_b32 s7, s9 327; GFX10-NEXT: ; implicit-def: $vcc_hi 328; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm 329; GFX10-NEXT: s_waitcnt vmcnt(0) 330; GFX10-NEXT: ; return to shader part epilog 331 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 332 ret <2 x float> %v 333} 334 335define amdgpu_ps <2 x float> @load_1d_v2f32_yz(<8 x i32> inreg %rsrc, i32 %s) { 336; GFX6-LABEL: load_1d_v2f32_yz: 337; GFX6: ; %bb.0: 338; GFX6-NEXT: s_mov_b32 s0, s2 339; GFX6-NEXT: s_mov_b32 s1, s3 340; GFX6-NEXT: s_mov_b32 s2, s4 341; GFX6-NEXT: s_mov_b32 s3, s5 342; GFX6-NEXT: s_mov_b32 s4, s6 343; GFX6-NEXT: s_mov_b32 s5, s7 344; GFX6-NEXT: s_mov_b32 s6, s8 345; GFX6-NEXT: s_mov_b32 s7, s9 346; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 unorm 347; GFX6-NEXT: s_waitcnt vmcnt(0) 348; GFX6-NEXT: ; return to shader part epilog 349; 350; GFX8-LABEL: load_1d_v2f32_yz: 351; GFX8: ; %bb.0: 352; GFX8-NEXT: s_mov_b32 s0, s2 353; GFX8-NEXT: s_mov_b32 s1, s3 354; GFX8-NEXT: s_mov_b32 s2, s4 355; GFX8-NEXT: s_mov_b32 s3, s5 356; GFX8-NEXT: s_mov_b32 s4, s6 357; GFX8-NEXT: s_mov_b32 s5, s7 358; GFX8-NEXT: s_mov_b32 s6, s8 359; GFX8-NEXT: s_mov_b32 s7, s9 360; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 unorm 361; GFX8-NEXT: s_waitcnt vmcnt(0) 362; GFX8-NEXT: ; return to shader part epilog 363; 364; GFX10-LABEL: load_1d_v2f32_yz: 365; GFX10: ; %bb.0: 366; GFX10-NEXT: s_mov_b32 s0, s2 367; GFX10-NEXT: s_mov_b32 s1, s3 368; GFX10-NEXT: s_mov_b32 s2, s4 369; GFX10-NEXT: s_mov_b32 s3, s5 370; GFX10-NEXT: s_mov_b32 s4, s6 371; GFX10-NEXT: s_mov_b32 s5, s7 372; GFX10-NEXT: s_mov_b32 s6, s8 373; GFX10-NEXT: s_mov_b32 s7, s9 374; GFX10-NEXT: ; implicit-def: $vcc_hi 375; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm 376; GFX10-NEXT: s_waitcnt vmcnt(0) 377; GFX10-NEXT: ; return to shader part epilog 378 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 379 ret <2 x float> %v 380} 381 382define amdgpu_ps <3 x float> @load_1d_v3f32_xyz(<8 x i32> inreg %rsrc, i32 %s) { 383; GFX6-LABEL: load_1d_v3f32_xyz: 384; GFX6: ; %bb.0: 385; GFX6-NEXT: s_mov_b32 s0, s2 386; GFX6-NEXT: s_mov_b32 s1, s3 387; GFX6-NEXT: s_mov_b32 s2, s4 388; GFX6-NEXT: s_mov_b32 s3, s5 389; GFX6-NEXT: s_mov_b32 s4, s6 390; GFX6-NEXT: s_mov_b32 s5, s7 391; GFX6-NEXT: s_mov_b32 s6, s8 392; GFX6-NEXT: s_mov_b32 s7, s9 393; GFX6-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm 394; GFX6-NEXT: s_waitcnt vmcnt(0) 395; GFX6-NEXT: ; return to shader part epilog 396; 397; GFX8-LABEL: load_1d_v3f32_xyz: 398; GFX8: ; %bb.0: 399; GFX8-NEXT: s_mov_b32 s0, s2 400; GFX8-NEXT: s_mov_b32 s1, s3 401; GFX8-NEXT: s_mov_b32 s2, s4 402; GFX8-NEXT: s_mov_b32 s3, s5 403; GFX8-NEXT: s_mov_b32 s4, s6 404; GFX8-NEXT: s_mov_b32 s5, s7 405; GFX8-NEXT: s_mov_b32 s6, s8 406; GFX8-NEXT: s_mov_b32 s7, s9 407; GFX8-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm 408; GFX8-NEXT: s_waitcnt vmcnt(0) 409; GFX8-NEXT: ; return to shader part epilog 410; 411; GFX10-LABEL: load_1d_v3f32_xyz: 412; GFX10: ; %bb.0: 413; GFX10-NEXT: s_mov_b32 s0, s2 414; GFX10-NEXT: s_mov_b32 s1, s3 415; GFX10-NEXT: s_mov_b32 s2, s4 416; GFX10-NEXT: s_mov_b32 s3, s5 417; GFX10-NEXT: s_mov_b32 s4, s6 418; GFX10-NEXT: s_mov_b32 s5, s7 419; GFX10-NEXT: s_mov_b32 s6, s8 420; GFX10-NEXT: s_mov_b32 s7, s9 421; GFX10-NEXT: ; implicit-def: $vcc_hi 422; GFX10-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm 423; GFX10-NEXT: s_waitcnt vmcnt(0) 424; GFX10-NEXT: ; return to shader part epilog 425 %v = call <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 426 ret <3 x float> %v 427} 428 429define amdgpu_ps <4 x float> @load_1d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 430; GFX6-LABEL: load_1d_v4f32_xyzw: 431; GFX6: ; %bb.0: 432; GFX6-NEXT: s_mov_b32 s0, s2 433; GFX6-NEXT: s_mov_b32 s1, s3 434; GFX6-NEXT: s_mov_b32 s2, s4 435; GFX6-NEXT: s_mov_b32 s3, s5 436; GFX6-NEXT: s_mov_b32 s4, s6 437; GFX6-NEXT: s_mov_b32 s5, s7 438; GFX6-NEXT: s_mov_b32 s6, s8 439; GFX6-NEXT: s_mov_b32 s7, s9 440; GFX6-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm 441; GFX6-NEXT: s_waitcnt vmcnt(0) 442; GFX6-NEXT: ; return to shader part epilog 443; 444; GFX8-LABEL: load_1d_v4f32_xyzw: 445; GFX8: ; %bb.0: 446; GFX8-NEXT: s_mov_b32 s0, s2 447; GFX8-NEXT: s_mov_b32 s1, s3 448; GFX8-NEXT: s_mov_b32 s2, s4 449; GFX8-NEXT: s_mov_b32 s3, s5 450; GFX8-NEXT: s_mov_b32 s4, s6 451; GFX8-NEXT: s_mov_b32 s5, s7 452; GFX8-NEXT: s_mov_b32 s6, s8 453; GFX8-NEXT: s_mov_b32 s7, s9 454; GFX8-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm 455; GFX8-NEXT: s_waitcnt vmcnt(0) 456; GFX8-NEXT: ; return to shader part epilog 457; 458; GFX10-LABEL: load_1d_v4f32_xyzw: 459; GFX10: ; %bb.0: 460; GFX10-NEXT: s_mov_b32 s0, s2 461; GFX10-NEXT: s_mov_b32 s1, s3 462; GFX10-NEXT: s_mov_b32 s2, s4 463; GFX10-NEXT: s_mov_b32 s3, s5 464; GFX10-NEXT: s_mov_b32 s4, s6 465; GFX10-NEXT: s_mov_b32 s5, s7 466; GFX10-NEXT: s_mov_b32 s6, s8 467; GFX10-NEXT: s_mov_b32 s7, s9 468; GFX10-NEXT: ; implicit-def: $vcc_hi 469; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm 470; GFX10-NEXT: s_waitcnt vmcnt(0) 471; GFX10-NEXT: ; return to shader part epilog 472 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 473 ret <4 x float> %v 474} 475 476define amdgpu_ps float @load_1d_f32_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) { 477; GFX6-LABEL: load_1d_f32_tfe_dmask_x: 478; GFX6: ; %bb.0: 479; GFX6-NEXT: s_mov_b32 s0, s2 480; GFX6-NEXT: s_mov_b32 s1, s3 481; GFX6-NEXT: s_mov_b32 s2, s4 482; GFX6-NEXT: s_mov_b32 s3, s5 483; GFX6-NEXT: s_mov_b32 s4, s6 484; GFX6-NEXT: s_mov_b32 s5, s7 485; GFX6-NEXT: s_mov_b32 s6, s8 486; GFX6-NEXT: s_mov_b32 s7, s9 487; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe 488; GFX6-NEXT: s_waitcnt vmcnt(0) 489; GFX6-NEXT: v_mov_b32_e32 v0, v1 490; GFX6-NEXT: ; return to shader part epilog 491; 492; GFX8-LABEL: load_1d_f32_tfe_dmask_x: 493; GFX8: ; %bb.0: 494; GFX8-NEXT: s_mov_b32 s0, s2 495; GFX8-NEXT: s_mov_b32 s1, s3 496; GFX8-NEXT: s_mov_b32 s2, s4 497; GFX8-NEXT: s_mov_b32 s3, s5 498; GFX8-NEXT: s_mov_b32 s4, s6 499; GFX8-NEXT: s_mov_b32 s5, s7 500; GFX8-NEXT: s_mov_b32 s6, s8 501; GFX8-NEXT: s_mov_b32 s7, s9 502; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe 503; GFX8-NEXT: s_waitcnt vmcnt(0) 504; GFX8-NEXT: v_mov_b32_e32 v0, v1 505; GFX8-NEXT: ; return to shader part epilog 506; 507; GFX10-LABEL: load_1d_f32_tfe_dmask_x: 508; GFX10: ; %bb.0: 509; GFX10-NEXT: s_mov_b32 s0, s2 510; GFX10-NEXT: s_mov_b32 s1, s3 511; GFX10-NEXT: s_mov_b32 s2, s4 512; GFX10-NEXT: s_mov_b32 s3, s5 513; GFX10-NEXT: s_mov_b32 s4, s6 514; GFX10-NEXT: s_mov_b32 s5, s7 515; GFX10-NEXT: s_mov_b32 s6, s8 516; GFX10-NEXT: s_mov_b32 s7, s9 517; GFX10-NEXT: ; implicit-def: $vcc_hi 518; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe 519; GFX10-NEXT: s_waitcnt vmcnt(0) 520; GFX10-NEXT: v_mov_b32_e32 v0, v1 521; GFX10-NEXT: ; return to shader part epilog 522 %v = call { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 523 %v.err = extractvalue { float, i32 } %v, 1 524 %vv = bitcast i32 %v.err to float 525 ret float %vv 526} 527 528define amdgpu_ps float @load_1d_v2f32_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) { 529; GFX6-LABEL: load_1d_v2f32_tfe_dmask_xy: 530; GFX6: ; %bb.0: 531; GFX6-NEXT: s_mov_b32 s0, s2 532; GFX6-NEXT: s_mov_b32 s1, s3 533; GFX6-NEXT: s_mov_b32 s2, s4 534; GFX6-NEXT: s_mov_b32 s3, s5 535; GFX6-NEXT: s_mov_b32 s4, s6 536; GFX6-NEXT: s_mov_b32 s5, s7 537; GFX6-NEXT: s_mov_b32 s6, s8 538; GFX6-NEXT: s_mov_b32 s7, s9 539; GFX6-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x3 unorm tfe 540; GFX6-NEXT: s_waitcnt vmcnt(0) 541; GFX6-NEXT: v_mov_b32_e32 v0, v2 542; GFX6-NEXT: ; return to shader part epilog 543; 544; GFX8-LABEL: load_1d_v2f32_tfe_dmask_xy: 545; GFX8: ; %bb.0: 546; GFX8-NEXT: s_mov_b32 s0, s2 547; GFX8-NEXT: s_mov_b32 s1, s3 548; GFX8-NEXT: s_mov_b32 s2, s4 549; GFX8-NEXT: s_mov_b32 s3, s5 550; GFX8-NEXT: s_mov_b32 s4, s6 551; GFX8-NEXT: s_mov_b32 s5, s7 552; GFX8-NEXT: s_mov_b32 s6, s8 553; GFX8-NEXT: s_mov_b32 s7, s9 554; GFX8-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x3 unorm tfe 555; GFX8-NEXT: s_waitcnt vmcnt(0) 556; GFX8-NEXT: v_mov_b32_e32 v0, v2 557; GFX8-NEXT: ; return to shader part epilog 558; 559; GFX10-LABEL: load_1d_v2f32_tfe_dmask_xy: 560; GFX10: ; %bb.0: 561; GFX10-NEXT: s_mov_b32 s0, s2 562; GFX10-NEXT: s_mov_b32 s1, s3 563; GFX10-NEXT: s_mov_b32 s2, s4 564; GFX10-NEXT: s_mov_b32 s3, s5 565; GFX10-NEXT: s_mov_b32 s4, s6 566; GFX10-NEXT: s_mov_b32 s5, s7 567; GFX10-NEXT: s_mov_b32 s6, s8 568; GFX10-NEXT: s_mov_b32 s7, s9 569; GFX10-NEXT: ; implicit-def: $vcc_hi 570; GFX10-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe 571; GFX10-NEXT: s_waitcnt vmcnt(0) 572; GFX10-NEXT: v_mov_b32_e32 v0, v2 573; GFX10-NEXT: ; return to shader part epilog 574 %v = call { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 575 %v.err = extractvalue { <2 x float>, i32 } %v, 1 576 %vv = bitcast i32 %v.err to float 577 ret float %vv 578} 579 580define amdgpu_ps float @load_1d_v3f32_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) { 581; GFX6-LABEL: load_1d_v3f32_tfe_dmask_xyz: 582; GFX6: ; %bb.0: 583; GFX6-NEXT: s_mov_b32 s0, s2 584; GFX6-NEXT: s_mov_b32 s1, s3 585; GFX6-NEXT: s_mov_b32 s2, s4 586; GFX6-NEXT: s_mov_b32 s3, s5 587; GFX6-NEXT: s_mov_b32 s4, s6 588; GFX6-NEXT: s_mov_b32 s5, s7 589; GFX6-NEXT: s_mov_b32 s6, s8 590; GFX6-NEXT: s_mov_b32 s7, s9 591; GFX6-NEXT: image_load v[0:3], v0, s[0:7] dmask:0x7 unorm tfe 592; GFX6-NEXT: s_waitcnt vmcnt(0) 593; GFX6-NEXT: v_mov_b32_e32 v0, v3 594; GFX6-NEXT: ; return to shader part epilog 595; 596; GFX8-LABEL: load_1d_v3f32_tfe_dmask_xyz: 597; GFX8: ; %bb.0: 598; GFX8-NEXT: s_mov_b32 s0, s2 599; GFX8-NEXT: s_mov_b32 s1, s3 600; GFX8-NEXT: s_mov_b32 s2, s4 601; GFX8-NEXT: s_mov_b32 s3, s5 602; GFX8-NEXT: s_mov_b32 s4, s6 603; GFX8-NEXT: s_mov_b32 s5, s7 604; GFX8-NEXT: s_mov_b32 s6, s8 605; GFX8-NEXT: s_mov_b32 s7, s9 606; GFX8-NEXT: image_load v[0:3], v0, s[0:7] dmask:0x7 unorm tfe 607; GFX8-NEXT: s_waitcnt vmcnt(0) 608; GFX8-NEXT: v_mov_b32_e32 v0, v3 609; GFX8-NEXT: ; return to shader part epilog 610; 611; GFX10-LABEL: load_1d_v3f32_tfe_dmask_xyz: 612; GFX10: ; %bb.0: 613; GFX10-NEXT: s_mov_b32 s0, s2 614; GFX10-NEXT: s_mov_b32 s1, s3 615; GFX10-NEXT: s_mov_b32 s2, s4 616; GFX10-NEXT: s_mov_b32 s3, s5 617; GFX10-NEXT: s_mov_b32 s4, s6 618; GFX10-NEXT: s_mov_b32 s5, s7 619; GFX10-NEXT: s_mov_b32 s6, s8 620; GFX10-NEXT: s_mov_b32 s7, s9 621; GFX10-NEXT: ; implicit-def: $vcc_hi 622; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe 623; GFX10-NEXT: s_waitcnt vmcnt(0) 624; GFX10-NEXT: v_mov_b32_e32 v0, v3 625; GFX10-NEXT: ; return to shader part epilog 626 %v = call { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 627 %v.err = extractvalue { <3 x float>, i32 } %v, 1 628 %vv = bitcast i32 %v.err to float 629 ret float %vv 630} 631 632define amdgpu_ps float @load_1d_v4f32_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 633; GFX6-LABEL: load_1d_v4f32_tfe_dmask_xyzw: 634; GFX6: ; %bb.0: 635; GFX6-NEXT: s_mov_b32 s0, s2 636; GFX6-NEXT: s_mov_b32 s1, s3 637; GFX6-NEXT: s_mov_b32 s2, s4 638; GFX6-NEXT: s_mov_b32 s3, s5 639; GFX6-NEXT: s_mov_b32 s4, s6 640; GFX6-NEXT: s_mov_b32 s5, s7 641; GFX6-NEXT: s_mov_b32 s6, s8 642; GFX6-NEXT: s_mov_b32 s7, s9 643; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe 644; GFX6-NEXT: s_waitcnt vmcnt(0) 645; GFX6-NEXT: v_mov_b32_e32 v0, v1 646; GFX6-NEXT: ; return to shader part epilog 647; 648; GFX8-LABEL: load_1d_v4f32_tfe_dmask_xyzw: 649; GFX8: ; %bb.0: 650; GFX8-NEXT: s_mov_b32 s0, s2 651; GFX8-NEXT: s_mov_b32 s1, s3 652; GFX8-NEXT: s_mov_b32 s2, s4 653; GFX8-NEXT: s_mov_b32 s3, s5 654; GFX8-NEXT: s_mov_b32 s4, s6 655; GFX8-NEXT: s_mov_b32 s5, s7 656; GFX8-NEXT: s_mov_b32 s6, s8 657; GFX8-NEXT: s_mov_b32 s7, s9 658; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe 659; GFX8-NEXT: s_waitcnt vmcnt(0) 660; GFX8-NEXT: v_mov_b32_e32 v0, v1 661; GFX8-NEXT: ; return to shader part epilog 662; 663; GFX10-LABEL: load_1d_v4f32_tfe_dmask_xyzw: 664; GFX10: ; %bb.0: 665; GFX10-NEXT: s_mov_b32 s0, s2 666; GFX10-NEXT: s_mov_b32 s1, s3 667; GFX10-NEXT: s_mov_b32 s2, s4 668; GFX10-NEXT: s_mov_b32 s3, s5 669; GFX10-NEXT: s_mov_b32 s4, s6 670; GFX10-NEXT: s_mov_b32 s5, s7 671; GFX10-NEXT: s_mov_b32 s6, s8 672; GFX10-NEXT: s_mov_b32 s7, s9 673; GFX10-NEXT: ; implicit-def: $vcc_hi 674; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe 675; GFX10-NEXT: s_waitcnt vmcnt(0) 676; GFX10-NEXT: v_mov_b32_e32 v0, v1 677; GFX10-NEXT: ; return to shader part epilog 678 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 679 %v.err = extractvalue { <4 x float>, i32 } %v, 1 680 %vv = bitcast i32 %v.err to float 681 ret float %vv 682} 683 684define amdgpu_ps float @load_1d_f32_tfe_dmask_0(<8 x i32> inreg %rsrc, i32 %s) { 685; GFX6-LABEL: load_1d_f32_tfe_dmask_0: 686; GFX6: ; %bb.0: 687; GFX6-NEXT: s_mov_b32 s0, s2 688; GFX6-NEXT: s_mov_b32 s1, s3 689; GFX6-NEXT: s_mov_b32 s2, s4 690; GFX6-NEXT: s_mov_b32 s3, s5 691; GFX6-NEXT: s_mov_b32 s4, s6 692; GFX6-NEXT: s_mov_b32 s5, s7 693; GFX6-NEXT: s_mov_b32 s6, s8 694; GFX6-NEXT: s_mov_b32 s7, s9 695; GFX6-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe 696; GFX6-NEXT: s_waitcnt vmcnt(0) 697; GFX6-NEXT: v_mov_b32_e32 v0, v1 698; GFX6-NEXT: ; return to shader part epilog 699; 700; GFX8-LABEL: load_1d_f32_tfe_dmask_0: 701; GFX8: ; %bb.0: 702; GFX8-NEXT: s_mov_b32 s0, s2 703; GFX8-NEXT: s_mov_b32 s1, s3 704; GFX8-NEXT: s_mov_b32 s2, s4 705; GFX8-NEXT: s_mov_b32 s3, s5 706; GFX8-NEXT: s_mov_b32 s4, s6 707; GFX8-NEXT: s_mov_b32 s5, s7 708; GFX8-NEXT: s_mov_b32 s6, s8 709; GFX8-NEXT: s_mov_b32 s7, s9 710; GFX8-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe 711; GFX8-NEXT: s_waitcnt vmcnt(0) 712; GFX8-NEXT: v_mov_b32_e32 v0, v1 713; GFX8-NEXT: ; return to shader part epilog 714; 715; GFX10-LABEL: load_1d_f32_tfe_dmask_0: 716; GFX10: ; %bb.0: 717; GFX10-NEXT: s_mov_b32 s0, s2 718; GFX10-NEXT: s_mov_b32 s1, s3 719; GFX10-NEXT: s_mov_b32 s2, s4 720; GFX10-NEXT: s_mov_b32 s3, s5 721; GFX10-NEXT: s_mov_b32 s4, s6 722; GFX10-NEXT: s_mov_b32 s5, s7 723; GFX10-NEXT: s_mov_b32 s6, s8 724; GFX10-NEXT: s_mov_b32 s7, s9 725; GFX10-NEXT: ; implicit-def: $vcc_hi 726; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe 727; GFX10-NEXT: s_waitcnt vmcnt(0) 728; GFX10-NEXT: v_mov_b32_e32 v0, v1 729; GFX10-NEXT: ; return to shader part epilog 730 %v = call { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 731 %v.err = extractvalue { float, i32 } %v, 1 732 %vv = bitcast i32 %v.err to float 733 ret float %vv 734} 735 736declare float @llvm.amdgcn.image.load.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 737declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 738declare <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 739declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 740 741declare { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 742declare { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 743declare { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 744declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 745 746attributes #0 = { nounwind readonly } 747