1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s 4 5define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 6; GFX9-LABEL: atomic_swap_i32_1d: 7; GFX9: ; %bb.0: ; %main_body 8; GFX9-NEXT: s_mov_b32 s0, s2 9; GFX9-NEXT: s_mov_b32 s1, s3 10; GFX9-NEXT: s_mov_b32 s2, s4 11; GFX9-NEXT: s_mov_b32 s3, s5 12; GFX9-NEXT: s_mov_b32 s4, s6 13; GFX9-NEXT: s_mov_b32 s5, s7 14; GFX9-NEXT: s_mov_b32 s6, s8 15; GFX9-NEXT: s_mov_b32 s7, s9 16; GFX9-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc a16 17; GFX9-NEXT: s_waitcnt vmcnt(0) 18; GFX9-NEXT: ; return to shader part epilog 19; 20; GFX10-LABEL: atomic_swap_i32_1d: 21; GFX10: ; %bb.0: ; %main_body 22; GFX10-NEXT: s_mov_b32 s0, s2 23; GFX10-NEXT: s_mov_b32 s1, s3 24; GFX10-NEXT: s_mov_b32 s2, s4 25; GFX10-NEXT: s_mov_b32 s3, s5 26; GFX10-NEXT: s_mov_b32 s4, s6 27; GFX10-NEXT: s_mov_b32 s5, s7 28; GFX10-NEXT: s_mov_b32 s6, s8 29; GFX10-NEXT: s_mov_b32 s7, s9 30; GFX10-NEXT: ; implicit-def: $vcc_hi 31; GFX10-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 32; GFX10-NEXT: s_waitcnt vmcnt(0) 33; GFX10-NEXT: ; return to shader part epilog 34main_body: 35 %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 36 %out = bitcast i32 %v to float 37 ret float %out 38} 39 40define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 41; GFX9-LABEL: atomic_add_i32_1d: 42; GFX9: ; %bb.0: ; %main_body 43; GFX9-NEXT: s_mov_b32 s0, s2 44; GFX9-NEXT: s_mov_b32 s1, s3 45; GFX9-NEXT: s_mov_b32 s2, s4 46; GFX9-NEXT: s_mov_b32 s3, s5 47; GFX9-NEXT: s_mov_b32 s4, s6 48; GFX9-NEXT: s_mov_b32 s5, s7 49; GFX9-NEXT: s_mov_b32 s6, s8 50; GFX9-NEXT: s_mov_b32 s7, s9 51; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 52; GFX9-NEXT: s_waitcnt vmcnt(0) 53; GFX9-NEXT: ; return to shader part epilog 54; 55; GFX10-LABEL: atomic_add_i32_1d: 56; GFX10: ; %bb.0: ; %main_body 57; GFX10-NEXT: s_mov_b32 s0, s2 58; GFX10-NEXT: s_mov_b32 s1, s3 59; GFX10-NEXT: s_mov_b32 s2, s4 60; GFX10-NEXT: s_mov_b32 s3, s5 61; GFX10-NEXT: s_mov_b32 s4, s6 62; GFX10-NEXT: s_mov_b32 s5, s7 63; GFX10-NEXT: s_mov_b32 s6, s8 64; GFX10-NEXT: s_mov_b32 s7, s9 65; GFX10-NEXT: ; implicit-def: $vcc_hi 66; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 67; GFX10-NEXT: s_waitcnt vmcnt(0) 68; GFX10-NEXT: ; return to shader part epilog 69main_body: 70 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 71 %out = bitcast i32 %v to float 72 ret float %out 73} 74 75define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 76; GFX9-LABEL: atomic_sub_i32_1d: 77; GFX9: ; %bb.0: ; %main_body 78; GFX9-NEXT: s_mov_b32 s0, s2 79; GFX9-NEXT: s_mov_b32 s1, s3 80; GFX9-NEXT: s_mov_b32 s2, s4 81; GFX9-NEXT: s_mov_b32 s3, s5 82; GFX9-NEXT: s_mov_b32 s4, s6 83; GFX9-NEXT: s_mov_b32 s5, s7 84; GFX9-NEXT: s_mov_b32 s6, s8 85; GFX9-NEXT: s_mov_b32 s7, s9 86; GFX9-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc a16 87; GFX9-NEXT: s_waitcnt vmcnt(0) 88; GFX9-NEXT: ; return to shader part epilog 89; 90; GFX10-LABEL: atomic_sub_i32_1d: 91; GFX10: ; %bb.0: ; %main_body 92; GFX10-NEXT: s_mov_b32 s0, s2 93; GFX10-NEXT: s_mov_b32 s1, s3 94; GFX10-NEXT: s_mov_b32 s2, s4 95; GFX10-NEXT: s_mov_b32 s3, s5 96; GFX10-NEXT: s_mov_b32 s4, s6 97; GFX10-NEXT: s_mov_b32 s5, s7 98; GFX10-NEXT: s_mov_b32 s6, s8 99; GFX10-NEXT: s_mov_b32 s7, s9 100; GFX10-NEXT: ; implicit-def: $vcc_hi 101; GFX10-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 102; GFX10-NEXT: s_waitcnt vmcnt(0) 103; GFX10-NEXT: ; return to shader part epilog 104main_body: 105 %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 106 %out = bitcast i32 %v to float 107 ret float %out 108} 109 110define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 111; GFX9-LABEL: atomic_smin_i32_1d: 112; GFX9: ; %bb.0: ; %main_body 113; GFX9-NEXT: s_mov_b32 s0, s2 114; GFX9-NEXT: s_mov_b32 s1, s3 115; GFX9-NEXT: s_mov_b32 s2, s4 116; GFX9-NEXT: s_mov_b32 s3, s5 117; GFX9-NEXT: s_mov_b32 s4, s6 118; GFX9-NEXT: s_mov_b32 s5, s7 119; GFX9-NEXT: s_mov_b32 s6, s8 120; GFX9-NEXT: s_mov_b32 s7, s9 121; GFX9-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc a16 122; GFX9-NEXT: s_waitcnt vmcnt(0) 123; GFX9-NEXT: ; return to shader part epilog 124; 125; GFX10-LABEL: atomic_smin_i32_1d: 126; GFX10: ; %bb.0: ; %main_body 127; GFX10-NEXT: s_mov_b32 s0, s2 128; GFX10-NEXT: s_mov_b32 s1, s3 129; GFX10-NEXT: s_mov_b32 s2, s4 130; GFX10-NEXT: s_mov_b32 s3, s5 131; GFX10-NEXT: s_mov_b32 s4, s6 132; GFX10-NEXT: s_mov_b32 s5, s7 133; GFX10-NEXT: s_mov_b32 s6, s8 134; GFX10-NEXT: s_mov_b32 s7, s9 135; GFX10-NEXT: ; implicit-def: $vcc_hi 136; GFX10-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 137; GFX10-NEXT: s_waitcnt vmcnt(0) 138; GFX10-NEXT: ; return to shader part epilog 139main_body: 140 %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 141 %out = bitcast i32 %v to float 142 ret float %out 143} 144 145define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 146; GFX9-LABEL: atomic_umin_i32_1d: 147; GFX9: ; %bb.0: ; %main_body 148; GFX9-NEXT: s_mov_b32 s0, s2 149; GFX9-NEXT: s_mov_b32 s1, s3 150; GFX9-NEXT: s_mov_b32 s2, s4 151; GFX9-NEXT: s_mov_b32 s3, s5 152; GFX9-NEXT: s_mov_b32 s4, s6 153; GFX9-NEXT: s_mov_b32 s5, s7 154; GFX9-NEXT: s_mov_b32 s6, s8 155; GFX9-NEXT: s_mov_b32 s7, s9 156; GFX9-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc a16 157; GFX9-NEXT: s_waitcnt vmcnt(0) 158; GFX9-NEXT: ; return to shader part epilog 159; 160; GFX10-LABEL: atomic_umin_i32_1d: 161; GFX10: ; %bb.0: ; %main_body 162; GFX10-NEXT: s_mov_b32 s0, s2 163; GFX10-NEXT: s_mov_b32 s1, s3 164; GFX10-NEXT: s_mov_b32 s2, s4 165; GFX10-NEXT: s_mov_b32 s3, s5 166; GFX10-NEXT: s_mov_b32 s4, s6 167; GFX10-NEXT: s_mov_b32 s5, s7 168; GFX10-NEXT: s_mov_b32 s6, s8 169; GFX10-NEXT: s_mov_b32 s7, s9 170; GFX10-NEXT: ; implicit-def: $vcc_hi 171; GFX10-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 172; GFX10-NEXT: s_waitcnt vmcnt(0) 173; GFX10-NEXT: ; return to shader part epilog 174main_body: 175 %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 176 %out = bitcast i32 %v to float 177 ret float %out 178} 179 180define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 181; GFX9-LABEL: atomic_smax_i32_1d: 182; GFX9: ; %bb.0: ; %main_body 183; GFX9-NEXT: s_mov_b32 s0, s2 184; GFX9-NEXT: s_mov_b32 s1, s3 185; GFX9-NEXT: s_mov_b32 s2, s4 186; GFX9-NEXT: s_mov_b32 s3, s5 187; GFX9-NEXT: s_mov_b32 s4, s6 188; GFX9-NEXT: s_mov_b32 s5, s7 189; GFX9-NEXT: s_mov_b32 s6, s8 190; GFX9-NEXT: s_mov_b32 s7, s9 191; GFX9-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc a16 192; GFX9-NEXT: s_waitcnt vmcnt(0) 193; GFX9-NEXT: ; return to shader part epilog 194; 195; GFX10-LABEL: atomic_smax_i32_1d: 196; GFX10: ; %bb.0: ; %main_body 197; GFX10-NEXT: s_mov_b32 s0, s2 198; GFX10-NEXT: s_mov_b32 s1, s3 199; GFX10-NEXT: s_mov_b32 s2, s4 200; GFX10-NEXT: s_mov_b32 s3, s5 201; GFX10-NEXT: s_mov_b32 s4, s6 202; GFX10-NEXT: s_mov_b32 s5, s7 203; GFX10-NEXT: s_mov_b32 s6, s8 204; GFX10-NEXT: s_mov_b32 s7, s9 205; GFX10-NEXT: ; implicit-def: $vcc_hi 206; GFX10-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 207; GFX10-NEXT: s_waitcnt vmcnt(0) 208; GFX10-NEXT: ; return to shader part epilog 209main_body: 210 %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 211 %out = bitcast i32 %v to float 212 ret float %out 213} 214 215define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 216; GFX9-LABEL: atomic_umax_i32_1d: 217; GFX9: ; %bb.0: ; %main_body 218; GFX9-NEXT: s_mov_b32 s0, s2 219; GFX9-NEXT: s_mov_b32 s1, s3 220; GFX9-NEXT: s_mov_b32 s2, s4 221; GFX9-NEXT: s_mov_b32 s3, s5 222; GFX9-NEXT: s_mov_b32 s4, s6 223; GFX9-NEXT: s_mov_b32 s5, s7 224; GFX9-NEXT: s_mov_b32 s6, s8 225; GFX9-NEXT: s_mov_b32 s7, s9 226; GFX9-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc a16 227; GFX9-NEXT: s_waitcnt vmcnt(0) 228; GFX9-NEXT: ; return to shader part epilog 229; 230; GFX10-LABEL: atomic_umax_i32_1d: 231; GFX10: ; %bb.0: ; %main_body 232; GFX10-NEXT: s_mov_b32 s0, s2 233; GFX10-NEXT: s_mov_b32 s1, s3 234; GFX10-NEXT: s_mov_b32 s2, s4 235; GFX10-NEXT: s_mov_b32 s3, s5 236; GFX10-NEXT: s_mov_b32 s4, s6 237; GFX10-NEXT: s_mov_b32 s5, s7 238; GFX10-NEXT: s_mov_b32 s6, s8 239; GFX10-NEXT: s_mov_b32 s7, s9 240; GFX10-NEXT: ; implicit-def: $vcc_hi 241; GFX10-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 242; GFX10-NEXT: s_waitcnt vmcnt(0) 243; GFX10-NEXT: ; return to shader part epilog 244main_body: 245 %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 246 %out = bitcast i32 %v to float 247 ret float %out 248} 249 250define amdgpu_ps float @atomic_and_i321d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 251; GFX9-LABEL: atomic_and_i321d: 252; GFX9: ; %bb.0: ; %main_body 253; GFX9-NEXT: s_mov_b32 s0, s2 254; GFX9-NEXT: s_mov_b32 s1, s3 255; GFX9-NEXT: s_mov_b32 s2, s4 256; GFX9-NEXT: s_mov_b32 s3, s5 257; GFX9-NEXT: s_mov_b32 s4, s6 258; GFX9-NEXT: s_mov_b32 s5, s7 259; GFX9-NEXT: s_mov_b32 s6, s8 260; GFX9-NEXT: s_mov_b32 s7, s9 261; GFX9-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc a16 262; GFX9-NEXT: s_waitcnt vmcnt(0) 263; GFX9-NEXT: ; return to shader part epilog 264; 265; GFX10-LABEL: atomic_and_i321d: 266; GFX10: ; %bb.0: ; %main_body 267; GFX10-NEXT: s_mov_b32 s0, s2 268; GFX10-NEXT: s_mov_b32 s1, s3 269; GFX10-NEXT: s_mov_b32 s2, s4 270; GFX10-NEXT: s_mov_b32 s3, s5 271; GFX10-NEXT: s_mov_b32 s4, s6 272; GFX10-NEXT: s_mov_b32 s5, s7 273; GFX10-NEXT: s_mov_b32 s6, s8 274; GFX10-NEXT: s_mov_b32 s7, s9 275; GFX10-NEXT: ; implicit-def: $vcc_hi 276; GFX10-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 277; GFX10-NEXT: s_waitcnt vmcnt(0) 278; GFX10-NEXT: ; return to shader part epilog 279main_body: 280 %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 281 %out = bitcast i32 %v to float 282 ret float %out 283} 284 285define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 286; GFX9-LABEL: atomic_or_i32_1d: 287; GFX9: ; %bb.0: ; %main_body 288; GFX9-NEXT: s_mov_b32 s0, s2 289; GFX9-NEXT: s_mov_b32 s1, s3 290; GFX9-NEXT: s_mov_b32 s2, s4 291; GFX9-NEXT: s_mov_b32 s3, s5 292; GFX9-NEXT: s_mov_b32 s4, s6 293; GFX9-NEXT: s_mov_b32 s5, s7 294; GFX9-NEXT: s_mov_b32 s6, s8 295; GFX9-NEXT: s_mov_b32 s7, s9 296; GFX9-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc a16 297; GFX9-NEXT: s_waitcnt vmcnt(0) 298; GFX9-NEXT: ; return to shader part epilog 299; 300; GFX10-LABEL: atomic_or_i32_1d: 301; GFX10: ; %bb.0: ; %main_body 302; GFX10-NEXT: s_mov_b32 s0, s2 303; GFX10-NEXT: s_mov_b32 s1, s3 304; GFX10-NEXT: s_mov_b32 s2, s4 305; GFX10-NEXT: s_mov_b32 s3, s5 306; GFX10-NEXT: s_mov_b32 s4, s6 307; GFX10-NEXT: s_mov_b32 s5, s7 308; GFX10-NEXT: s_mov_b32 s6, s8 309; GFX10-NEXT: s_mov_b32 s7, s9 310; GFX10-NEXT: ; implicit-def: $vcc_hi 311; GFX10-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 312; GFX10-NEXT: s_waitcnt vmcnt(0) 313; GFX10-NEXT: ; return to shader part epilog 314main_body: 315 %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 316 %out = bitcast i32 %v to float 317 ret float %out 318} 319 320define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 321; GFX9-LABEL: atomic_xor_i32_1d: 322; GFX9: ; %bb.0: ; %main_body 323; GFX9-NEXT: s_mov_b32 s0, s2 324; GFX9-NEXT: s_mov_b32 s1, s3 325; GFX9-NEXT: s_mov_b32 s2, s4 326; GFX9-NEXT: s_mov_b32 s3, s5 327; GFX9-NEXT: s_mov_b32 s4, s6 328; GFX9-NEXT: s_mov_b32 s5, s7 329; GFX9-NEXT: s_mov_b32 s6, s8 330; GFX9-NEXT: s_mov_b32 s7, s9 331; GFX9-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc a16 332; GFX9-NEXT: s_waitcnt vmcnt(0) 333; GFX9-NEXT: ; return to shader part epilog 334; 335; GFX10-LABEL: atomic_xor_i32_1d: 336; GFX10: ; %bb.0: ; %main_body 337; GFX10-NEXT: s_mov_b32 s0, s2 338; GFX10-NEXT: s_mov_b32 s1, s3 339; GFX10-NEXT: s_mov_b32 s2, s4 340; GFX10-NEXT: s_mov_b32 s3, s5 341; GFX10-NEXT: s_mov_b32 s4, s6 342; GFX10-NEXT: s_mov_b32 s5, s7 343; GFX10-NEXT: s_mov_b32 s6, s8 344; GFX10-NEXT: s_mov_b32 s7, s9 345; GFX10-NEXT: ; implicit-def: $vcc_hi 346; GFX10-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 347; GFX10-NEXT: s_waitcnt vmcnt(0) 348; GFX10-NEXT: ; return to shader part epilog 349main_body: 350 %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 351 %out = bitcast i32 %v to float 352 ret float %out 353} 354 355define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 356; GFX9-LABEL: atomic_inc_i32_1d: 357; GFX9: ; %bb.0: ; %main_body 358; GFX9-NEXT: s_mov_b32 s0, s2 359; GFX9-NEXT: s_mov_b32 s1, s3 360; GFX9-NEXT: s_mov_b32 s2, s4 361; GFX9-NEXT: s_mov_b32 s3, s5 362; GFX9-NEXT: s_mov_b32 s4, s6 363; GFX9-NEXT: s_mov_b32 s5, s7 364; GFX9-NEXT: s_mov_b32 s6, s8 365; GFX9-NEXT: s_mov_b32 s7, s9 366; GFX9-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc a16 367; GFX9-NEXT: s_waitcnt vmcnt(0) 368; GFX9-NEXT: ; return to shader part epilog 369; 370; GFX10-LABEL: atomic_inc_i32_1d: 371; GFX10: ; %bb.0: ; %main_body 372; GFX10-NEXT: s_mov_b32 s0, s2 373; GFX10-NEXT: s_mov_b32 s1, s3 374; GFX10-NEXT: s_mov_b32 s2, s4 375; GFX10-NEXT: s_mov_b32 s3, s5 376; GFX10-NEXT: s_mov_b32 s4, s6 377; GFX10-NEXT: s_mov_b32 s5, s7 378; GFX10-NEXT: s_mov_b32 s6, s8 379; GFX10-NEXT: s_mov_b32 s7, s9 380; GFX10-NEXT: ; implicit-def: $vcc_hi 381; GFX10-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 382; GFX10-NEXT: s_waitcnt vmcnt(0) 383; GFX10-NEXT: ; return to shader part epilog 384main_body: 385 %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 386 %out = bitcast i32 %v to float 387 ret float %out 388} 389 390define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 391; GFX9-LABEL: atomic_dec_i32_1d: 392; GFX9: ; %bb.0: ; %main_body 393; GFX9-NEXT: s_mov_b32 s0, s2 394; GFX9-NEXT: s_mov_b32 s1, s3 395; GFX9-NEXT: s_mov_b32 s2, s4 396; GFX9-NEXT: s_mov_b32 s3, s5 397; GFX9-NEXT: s_mov_b32 s4, s6 398; GFX9-NEXT: s_mov_b32 s5, s7 399; GFX9-NEXT: s_mov_b32 s6, s8 400; GFX9-NEXT: s_mov_b32 s7, s9 401; GFX9-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc a16 402; GFX9-NEXT: s_waitcnt vmcnt(0) 403; GFX9-NEXT: ; return to shader part epilog 404; 405; GFX10-LABEL: atomic_dec_i32_1d: 406; GFX10: ; %bb.0: ; %main_body 407; GFX10-NEXT: s_mov_b32 s0, s2 408; GFX10-NEXT: s_mov_b32 s1, s3 409; GFX10-NEXT: s_mov_b32 s2, s4 410; GFX10-NEXT: s_mov_b32 s3, s5 411; GFX10-NEXT: s_mov_b32 s4, s6 412; GFX10-NEXT: s_mov_b32 s5, s7 413; GFX10-NEXT: s_mov_b32 s6, s8 414; GFX10-NEXT: s_mov_b32 s7, s9 415; GFX10-NEXT: ; implicit-def: $vcc_hi 416; GFX10-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 417; GFX10-NEXT: s_waitcnt vmcnt(0) 418; GFX10-NEXT: ; return to shader part epilog 419main_body: 420 %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 421 %out = bitcast i32 %v to float 422 ret float %out 423} 424 425define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s) { 426; GFX9-LABEL: atomic_cmpswap_i32_1d: 427; GFX9: ; %bb.0: ; %main_body 428; GFX9-NEXT: s_mov_b32 s0, s2 429; GFX9-NEXT: s_mov_b32 s1, s3 430; GFX9-NEXT: s_mov_b32 s2, s4 431; GFX9-NEXT: s_mov_b32 s3, s5 432; GFX9-NEXT: s_mov_b32 s4, s6 433; GFX9-NEXT: s_mov_b32 s5, s7 434; GFX9-NEXT: s_mov_b32 s6, s8 435; GFX9-NEXT: s_mov_b32 s7, s9 436; GFX9-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 437; GFX9-NEXT: s_waitcnt vmcnt(0) 438; GFX9-NEXT: ; return to shader part epilog 439; 440; GFX10-LABEL: atomic_cmpswap_i32_1d: 441; GFX10: ; %bb.0: ; %main_body 442; GFX10-NEXT: s_mov_b32 s0, s2 443; GFX10-NEXT: s_mov_b32 s1, s3 444; GFX10-NEXT: s_mov_b32 s2, s4 445; GFX10-NEXT: s_mov_b32 s3, s5 446; GFX10-NEXT: s_mov_b32 s4, s6 447; GFX10-NEXT: s_mov_b32 s5, s7 448; GFX10-NEXT: s_mov_b32 s6, s8 449; GFX10-NEXT: s_mov_b32 s7, s9 450; GFX10-NEXT: ; implicit-def: $vcc_hi 451; GFX10-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 452; GFX10-NEXT: s_waitcnt vmcnt(0) 453; GFX10-NEXT: ; return to shader part epilog 454main_body: 455 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 456 %out = bitcast i32 %v to float 457 ret float %out 458} 459 460define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t) { 461; GFX9-LABEL: atomic_add_i32_2d: 462; GFX9: ; %bb.0: ; %main_body 463; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 464; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 465; GFX9-NEXT: s_mov_b32 s0, s2 466; GFX9-NEXT: s_mov_b32 s1, s3 467; GFX9-NEXT: s_mov_b32 s2, s4 468; GFX9-NEXT: s_mov_b32 s3, s5 469; GFX9-NEXT: s_mov_b32 s4, s6 470; GFX9-NEXT: s_mov_b32 s5, s7 471; GFX9-NEXT: s_mov_b32 s6, s8 472; GFX9-NEXT: s_mov_b32 s7, s9 473; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 474; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 475; GFX9-NEXT: s_waitcnt vmcnt(0) 476; GFX9-NEXT: ; return to shader part epilog 477; 478; GFX10-LABEL: atomic_add_i32_2d: 479; GFX10: ; %bb.0: ; %main_body 480; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 481; GFX10-NEXT: s_mov_b32 s0, s2 482; GFX10-NEXT: s_mov_b32 s1, s3 483; GFX10-NEXT: s_mov_b32 s2, s4 484; GFX10-NEXT: s_mov_b32 s3, s5 485; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 486; GFX10-NEXT: s_mov_b32 s4, s6 487; GFX10-NEXT: s_mov_b32 s5, s7 488; GFX10-NEXT: s_mov_b32 s6, s8 489; GFX10-NEXT: s_mov_b32 s7, s9 490; GFX10-NEXT: ; implicit-def: $vcc_hi 491; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc a16 492; GFX10-NEXT: s_waitcnt vmcnt(0) 493; GFX10-NEXT: ; return to shader part epilog 494main_body: 495 %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) 496 %out = bitcast i32 %v to float 497 ret float %out 498} 499 500define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %r) { 501; GFX9-LABEL: atomic_add_i32_3d: 502; GFX9: ; %bb.0: ; %main_body 503; GFX9-NEXT: s_mov_b32 s0, s2 504; GFX9-NEXT: s_mov_b32 s2, s4 505; GFX9-NEXT: s_mov_b32 s4, s6 506; GFX9-NEXT: s_mov_b32 s6, s8 507; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 508; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 509; GFX9-NEXT: s_lshl_b32 s8, s0, 16 510; GFX9-NEXT: s_mov_b32 s1, s3 511; GFX9-NEXT: s_mov_b32 s3, s5 512; GFX9-NEXT: s_mov_b32 s5, s7 513; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 514; GFX9-NEXT: s_mov_b32 s7, s9 515; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 516; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 517; GFX9-NEXT: s_waitcnt vmcnt(0) 518; GFX9-NEXT: ; return to shader part epilog 519; 520; GFX10-LABEL: atomic_add_i32_3d: 521; GFX10: ; %bb.0: ; %main_body 522; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 523; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 524; GFX10-NEXT: s_mov_b32 s0, s2 525; GFX10-NEXT: s_mov_b32 s2, s4 526; GFX10-NEXT: s_mov_b32 s4, s6 527; GFX10-NEXT: s_mov_b32 s6, s8 528; GFX10-NEXT: s_lshl_b32 s8, s0, 16 529; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 530; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 531; GFX10-NEXT: s_mov_b32 s1, s3 532; GFX10-NEXT: s_mov_b32 s3, s5 533; GFX10-NEXT: s_mov_b32 s5, s7 534; GFX10-NEXT: s_mov_b32 s7, s9 535; GFX10-NEXT: ; implicit-def: $vcc_hi 536; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc a16 537; GFX10-NEXT: s_waitcnt vmcnt(0) 538; GFX10-NEXT: ; return to shader part epilog 539main_body: 540 %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) 541 %out = bitcast i32 %v to float 542 ret float %out 543} 544 545define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %face) { 546; GFX9-LABEL: atomic_add_i32_cube: 547; GFX9: ; %bb.0: ; %main_body 548; GFX9-NEXT: s_mov_b32 s0, s2 549; GFX9-NEXT: s_mov_b32 s2, s4 550; GFX9-NEXT: s_mov_b32 s4, s6 551; GFX9-NEXT: s_mov_b32 s6, s8 552; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 553; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 554; GFX9-NEXT: s_lshl_b32 s8, s0, 16 555; GFX9-NEXT: s_mov_b32 s1, s3 556; GFX9-NEXT: s_mov_b32 s3, s5 557; GFX9-NEXT: s_mov_b32 s5, s7 558; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 559; GFX9-NEXT: s_mov_b32 s7, s9 560; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 561; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da 562; GFX9-NEXT: s_waitcnt vmcnt(0) 563; GFX9-NEXT: ; return to shader part epilog 564; 565; GFX10-LABEL: atomic_add_i32_cube: 566; GFX10: ; %bb.0: ; %main_body 567; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 568; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 569; GFX10-NEXT: s_mov_b32 s0, s2 570; GFX10-NEXT: s_mov_b32 s2, s4 571; GFX10-NEXT: s_mov_b32 s4, s6 572; GFX10-NEXT: s_mov_b32 s6, s8 573; GFX10-NEXT: s_lshl_b32 s8, s0, 16 574; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 575; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 576; GFX10-NEXT: s_mov_b32 s1, s3 577; GFX10-NEXT: s_mov_b32 s3, s5 578; GFX10-NEXT: s_mov_b32 s5, s7 579; GFX10-NEXT: s_mov_b32 s7, s9 580; GFX10-NEXT: ; implicit-def: $vcc_hi 581; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc a16 582; GFX10-NEXT: s_waitcnt vmcnt(0) 583; GFX10-NEXT: ; return to shader part epilog 584main_body: 585 %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0) 586 %out = bitcast i32 %v to float 587 ret float %out 588} 589 590define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %slice) { 591; GFX9-LABEL: atomic_add_i32_1darray: 592; GFX9: ; %bb.0: ; %main_body 593; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 594; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 595; GFX9-NEXT: s_mov_b32 s0, s2 596; GFX9-NEXT: s_mov_b32 s1, s3 597; GFX9-NEXT: s_mov_b32 s2, s4 598; GFX9-NEXT: s_mov_b32 s3, s5 599; GFX9-NEXT: s_mov_b32 s4, s6 600; GFX9-NEXT: s_mov_b32 s5, s7 601; GFX9-NEXT: s_mov_b32 s6, s8 602; GFX9-NEXT: s_mov_b32 s7, s9 603; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 604; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 da 605; GFX9-NEXT: s_waitcnt vmcnt(0) 606; GFX9-NEXT: ; return to shader part epilog 607; 608; GFX10-LABEL: atomic_add_i32_1darray: 609; GFX10: ; %bb.0: ; %main_body 610; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 611; GFX10-NEXT: s_mov_b32 s0, s2 612; GFX10-NEXT: s_mov_b32 s1, s3 613; GFX10-NEXT: s_mov_b32 s2, s4 614; GFX10-NEXT: s_mov_b32 s3, s5 615; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 616; GFX10-NEXT: s_mov_b32 s4, s6 617; GFX10-NEXT: s_mov_b32 s5, s7 618; GFX10-NEXT: s_mov_b32 s6, s8 619; GFX10-NEXT: s_mov_b32 s7, s9 620; GFX10-NEXT: ; implicit-def: $vcc_hi 621; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16 622; GFX10-NEXT: s_waitcnt vmcnt(0) 623; GFX10-NEXT: ; return to shader part epilog 624main_body: 625 %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 626 %out = bitcast i32 %v to float 627 ret float %out 628} 629 630define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice) { 631; GFX9-LABEL: atomic_add_i32_2darray: 632; GFX9: ; %bb.0: ; %main_body 633; GFX9-NEXT: s_mov_b32 s0, s2 634; GFX9-NEXT: s_mov_b32 s2, s4 635; GFX9-NEXT: s_mov_b32 s4, s6 636; GFX9-NEXT: s_mov_b32 s6, s8 637; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 638; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 639; GFX9-NEXT: s_lshl_b32 s8, s0, 16 640; GFX9-NEXT: s_mov_b32 s1, s3 641; GFX9-NEXT: s_mov_b32 s3, s5 642; GFX9-NEXT: s_mov_b32 s5, s7 643; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 644; GFX9-NEXT: s_mov_b32 s7, s9 645; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 646; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da 647; GFX9-NEXT: s_waitcnt vmcnt(0) 648; GFX9-NEXT: ; return to shader part epilog 649; 650; GFX10-LABEL: atomic_add_i32_2darray: 651; GFX10: ; %bb.0: ; %main_body 652; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 653; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 654; GFX10-NEXT: s_mov_b32 s0, s2 655; GFX10-NEXT: s_mov_b32 s2, s4 656; GFX10-NEXT: s_mov_b32 s4, s6 657; GFX10-NEXT: s_mov_b32 s6, s8 658; GFX10-NEXT: s_lshl_b32 s8, s0, 16 659; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 660; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 661; GFX10-NEXT: s_mov_b32 s1, s3 662; GFX10-NEXT: s_mov_b32 s3, s5 663; GFX10-NEXT: s_mov_b32 s5, s7 664; GFX10-NEXT: s_mov_b32 s7, s9 665; GFX10-NEXT: ; implicit-def: $vcc_hi 666; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16 667; GFX10-NEXT: s_waitcnt vmcnt(0) 668; GFX10-NEXT: ; return to shader part epilog 669main_body: 670 %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 671 %out = bitcast i32 %v to float 672 ret float %out 673} 674 675define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %fragid) { 676; GFX9-LABEL: atomic_add_i32_2dmsaa: 677; GFX9: ; %bb.0: ; %main_body 678; GFX9-NEXT: s_mov_b32 s0, s2 679; GFX9-NEXT: s_mov_b32 s2, s4 680; GFX9-NEXT: s_mov_b32 s4, s6 681; GFX9-NEXT: s_mov_b32 s6, s8 682; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 683; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 684; GFX9-NEXT: s_lshl_b32 s8, s0, 16 685; GFX9-NEXT: s_mov_b32 s1, s3 686; GFX9-NEXT: s_mov_b32 s3, s5 687; GFX9-NEXT: s_mov_b32 s5, s7 688; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 689; GFX9-NEXT: s_mov_b32 s7, s9 690; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 691; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 692; GFX9-NEXT: s_waitcnt vmcnt(0) 693; GFX9-NEXT: ; return to shader part epilog 694; 695; GFX10-LABEL: atomic_add_i32_2dmsaa: 696; GFX10: ; %bb.0: ; %main_body 697; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 698; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 699; GFX10-NEXT: s_mov_b32 s0, s2 700; GFX10-NEXT: s_mov_b32 s2, s4 701; GFX10-NEXT: s_mov_b32 s4, s6 702; GFX10-NEXT: s_mov_b32 s6, s8 703; GFX10-NEXT: s_lshl_b32 s8, s0, 16 704; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 705; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 706; GFX10-NEXT: s_mov_b32 s1, s3 707; GFX10-NEXT: s_mov_b32 s3, s5 708; GFX10-NEXT: s_mov_b32 s5, s7 709; GFX10-NEXT: s_mov_b32 s7, s9 710; GFX10-NEXT: ; implicit-def: $vcc_hi 711; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16 712; GFX10-NEXT: s_waitcnt vmcnt(0) 713; GFX10-NEXT: ; return to shader part epilog 714main_body: 715 %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 716 %out = bitcast i32 %v to float 717 ret float %out 718} 719 720define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 721; GFX9-LABEL: atomic_add_i32_2darraymsaa: 722; GFX9: ; %bb.0: ; %main_body 723; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff 724; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 725; GFX9-NEXT: v_and_or_b32 v1, v1, v5, v2 726; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v4 727; GFX9-NEXT: s_mov_b32 s0, s2 728; GFX9-NEXT: s_mov_b32 s1, s3 729; GFX9-NEXT: s_mov_b32 s2, s4 730; GFX9-NEXT: s_mov_b32 s3, s5 731; GFX9-NEXT: s_mov_b32 s4, s6 732; GFX9-NEXT: s_mov_b32 s5, s7 733; GFX9-NEXT: s_mov_b32 s6, s8 734; GFX9-NEXT: s_mov_b32 s7, s9 735; GFX9-NEXT: v_and_or_b32 v2, v3, v5, v2 736; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da 737; GFX9-NEXT: s_waitcnt vmcnt(0) 738; GFX9-NEXT: ; return to shader part epilog 739; 740; GFX10-LABEL: atomic_add_i32_2darraymsaa: 741; GFX10: ; %bb.0: ; %main_body 742; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff 743; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 744; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 745; GFX10-NEXT: s_mov_b32 s0, s2 746; GFX10-NEXT: s_mov_b32 s1, s3 747; GFX10-NEXT: s_mov_b32 s2, s4 748; GFX10-NEXT: v_and_or_b32 v1, v1, v5, v2 749; GFX10-NEXT: v_and_or_b32 v2, v3, v5, v4 750; GFX10-NEXT: s_mov_b32 s3, s5 751; GFX10-NEXT: s_mov_b32 s4, s6 752; GFX10-NEXT: s_mov_b32 s5, s7 753; GFX10-NEXT: s_mov_b32 s6, s8 754; GFX10-NEXT: s_mov_b32 s7, s9 755; GFX10-NEXT: ; implicit-def: $vcc_hi 756; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16 757; GFX10-NEXT: s_waitcnt vmcnt(0) 758; GFX10-NEXT: ; return to shader part epilog 759main_body: 760 %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 761 %out = bitcast i32 %v to float 762 ret float %out 763} 764 765define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 766; GFX9-LABEL: atomic_add_i32_1d_slc: 767; GFX9: ; %bb.0: ; %main_body 768; GFX9-NEXT: s_mov_b32 s0, s2 769; GFX9-NEXT: s_mov_b32 s1, s3 770; GFX9-NEXT: s_mov_b32 s2, s4 771; GFX9-NEXT: s_mov_b32 s3, s5 772; GFX9-NEXT: s_mov_b32 s4, s6 773; GFX9-NEXT: s_mov_b32 s5, s7 774; GFX9-NEXT: s_mov_b32 s6, s8 775; GFX9-NEXT: s_mov_b32 s7, s9 776; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc a16 777; GFX9-NEXT: s_waitcnt vmcnt(0) 778; GFX9-NEXT: ; return to shader part epilog 779; 780; GFX10-LABEL: atomic_add_i32_1d_slc: 781; GFX10: ; %bb.0: ; %main_body 782; GFX10-NEXT: s_mov_b32 s0, s2 783; GFX10-NEXT: s_mov_b32 s1, s3 784; GFX10-NEXT: s_mov_b32 s2, s4 785; GFX10-NEXT: s_mov_b32 s3, s5 786; GFX10-NEXT: s_mov_b32 s4, s6 787; GFX10-NEXT: s_mov_b32 s5, s7 788; GFX10-NEXT: s_mov_b32 s6, s8 789; GFX10-NEXT: s_mov_b32 s7, s9 790; GFX10-NEXT: ; implicit-def: $vcc_hi 791; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc a16 792; GFX10-NEXT: s_waitcnt vmcnt(0) 793; GFX10-NEXT: ; return to shader part epilog 794main_body: 795 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) 796 %out = bitcast i32 %v to float 797 ret float %out 798} 799 800define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 801; GFX9-LABEL: atomic_swap_i64_1d: 802; GFX9: ; %bb.0: ; %main_body 803; GFX9-NEXT: s_mov_b32 s0, s2 804; GFX9-NEXT: s_mov_b32 s1, s3 805; GFX9-NEXT: s_mov_b32 s2, s4 806; GFX9-NEXT: s_mov_b32 s3, s5 807; GFX9-NEXT: s_mov_b32 s4, s6 808; GFX9-NEXT: s_mov_b32 s5, s7 809; GFX9-NEXT: s_mov_b32 s6, s8 810; GFX9-NEXT: s_mov_b32 s7, s9 811; GFX9-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 812; GFX9-NEXT: s_waitcnt vmcnt(0) 813; GFX9-NEXT: ; return to shader part epilog 814; 815; GFX10-LABEL: atomic_swap_i64_1d: 816; GFX10: ; %bb.0: ; %main_body 817; GFX10-NEXT: s_mov_b32 s0, s2 818; GFX10-NEXT: s_mov_b32 s1, s3 819; GFX10-NEXT: s_mov_b32 s2, s4 820; GFX10-NEXT: s_mov_b32 s3, s5 821; GFX10-NEXT: s_mov_b32 s4, s6 822; GFX10-NEXT: s_mov_b32 s5, s7 823; GFX10-NEXT: s_mov_b32 s6, s8 824; GFX10-NEXT: s_mov_b32 s7, s9 825; GFX10-NEXT: ; implicit-def: $vcc_hi 826; GFX10-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 827; GFX10-NEXT: s_waitcnt vmcnt(0) 828; GFX10-NEXT: ; return to shader part epilog 829main_body: 830 %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 831 %out = bitcast i64 %v to <2 x float> 832 ret <2 x float> %out 833} 834 835define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 836; GFX9-LABEL: atomic_add_i64_1d: 837; GFX9: ; %bb.0: ; %main_body 838; GFX9-NEXT: s_mov_b32 s0, s2 839; GFX9-NEXT: s_mov_b32 s1, s3 840; GFX9-NEXT: s_mov_b32 s2, s4 841; GFX9-NEXT: s_mov_b32 s3, s5 842; GFX9-NEXT: s_mov_b32 s4, s6 843; GFX9-NEXT: s_mov_b32 s5, s7 844; GFX9-NEXT: s_mov_b32 s6, s8 845; GFX9-NEXT: s_mov_b32 s7, s9 846; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 847; GFX9-NEXT: s_waitcnt vmcnt(0) 848; GFX9-NEXT: ; return to shader part epilog 849; 850; GFX10-LABEL: atomic_add_i64_1d: 851; GFX10: ; %bb.0: ; %main_body 852; GFX10-NEXT: s_mov_b32 s0, s2 853; GFX10-NEXT: s_mov_b32 s1, s3 854; GFX10-NEXT: s_mov_b32 s2, s4 855; GFX10-NEXT: s_mov_b32 s3, s5 856; GFX10-NEXT: s_mov_b32 s4, s6 857; GFX10-NEXT: s_mov_b32 s5, s7 858; GFX10-NEXT: s_mov_b32 s6, s8 859; GFX10-NEXT: s_mov_b32 s7, s9 860; GFX10-NEXT: ; implicit-def: $vcc_hi 861; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 862; GFX10-NEXT: s_waitcnt vmcnt(0) 863; GFX10-NEXT: ; return to shader part epilog 864main_body: 865 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 866 %out = bitcast i64 %v to <2 x float> 867 ret <2 x float> %out 868} 869 870define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 871; GFX9-LABEL: atomic_sub_i64_1d: 872; GFX9: ; %bb.0: ; %main_body 873; GFX9-NEXT: s_mov_b32 s0, s2 874; GFX9-NEXT: s_mov_b32 s1, s3 875; GFX9-NEXT: s_mov_b32 s2, s4 876; GFX9-NEXT: s_mov_b32 s3, s5 877; GFX9-NEXT: s_mov_b32 s4, s6 878; GFX9-NEXT: s_mov_b32 s5, s7 879; GFX9-NEXT: s_mov_b32 s6, s8 880; GFX9-NEXT: s_mov_b32 s7, s9 881; GFX9-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 882; GFX9-NEXT: s_waitcnt vmcnt(0) 883; GFX9-NEXT: ; return to shader part epilog 884; 885; GFX10-LABEL: atomic_sub_i64_1d: 886; GFX10: ; %bb.0: ; %main_body 887; GFX10-NEXT: s_mov_b32 s0, s2 888; GFX10-NEXT: s_mov_b32 s1, s3 889; GFX10-NEXT: s_mov_b32 s2, s4 890; GFX10-NEXT: s_mov_b32 s3, s5 891; GFX10-NEXT: s_mov_b32 s4, s6 892; GFX10-NEXT: s_mov_b32 s5, s7 893; GFX10-NEXT: s_mov_b32 s6, s8 894; GFX10-NEXT: s_mov_b32 s7, s9 895; GFX10-NEXT: ; implicit-def: $vcc_hi 896; GFX10-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 897; GFX10-NEXT: s_waitcnt vmcnt(0) 898; GFX10-NEXT: ; return to shader part epilog 899main_body: 900 %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 901 %out = bitcast i64 %v to <2 x float> 902 ret <2 x float> %out 903} 904 905define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 906; GFX9-LABEL: atomic_smin_i64_1d: 907; GFX9: ; %bb.0: ; %main_body 908; GFX9-NEXT: s_mov_b32 s0, s2 909; GFX9-NEXT: s_mov_b32 s1, s3 910; GFX9-NEXT: s_mov_b32 s2, s4 911; GFX9-NEXT: s_mov_b32 s3, s5 912; GFX9-NEXT: s_mov_b32 s4, s6 913; GFX9-NEXT: s_mov_b32 s5, s7 914; GFX9-NEXT: s_mov_b32 s6, s8 915; GFX9-NEXT: s_mov_b32 s7, s9 916; GFX9-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 917; GFX9-NEXT: s_waitcnt vmcnt(0) 918; GFX9-NEXT: ; return to shader part epilog 919; 920; GFX10-LABEL: atomic_smin_i64_1d: 921; GFX10: ; %bb.0: ; %main_body 922; GFX10-NEXT: s_mov_b32 s0, s2 923; GFX10-NEXT: s_mov_b32 s1, s3 924; GFX10-NEXT: s_mov_b32 s2, s4 925; GFX10-NEXT: s_mov_b32 s3, s5 926; GFX10-NEXT: s_mov_b32 s4, s6 927; GFX10-NEXT: s_mov_b32 s5, s7 928; GFX10-NEXT: s_mov_b32 s6, s8 929; GFX10-NEXT: s_mov_b32 s7, s9 930; GFX10-NEXT: ; implicit-def: $vcc_hi 931; GFX10-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 932; GFX10-NEXT: s_waitcnt vmcnt(0) 933; GFX10-NEXT: ; return to shader part epilog 934main_body: 935 %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 936 %out = bitcast i64 %v to <2 x float> 937 ret <2 x float> %out 938} 939 940define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 941; GFX9-LABEL: atomic_umin_i64_1d: 942; GFX9: ; %bb.0: ; %main_body 943; GFX9-NEXT: s_mov_b32 s0, s2 944; GFX9-NEXT: s_mov_b32 s1, s3 945; GFX9-NEXT: s_mov_b32 s2, s4 946; GFX9-NEXT: s_mov_b32 s3, s5 947; GFX9-NEXT: s_mov_b32 s4, s6 948; GFX9-NEXT: s_mov_b32 s5, s7 949; GFX9-NEXT: s_mov_b32 s6, s8 950; GFX9-NEXT: s_mov_b32 s7, s9 951; GFX9-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 952; GFX9-NEXT: s_waitcnt vmcnt(0) 953; GFX9-NEXT: ; return to shader part epilog 954; 955; GFX10-LABEL: atomic_umin_i64_1d: 956; GFX10: ; %bb.0: ; %main_body 957; GFX10-NEXT: s_mov_b32 s0, s2 958; GFX10-NEXT: s_mov_b32 s1, s3 959; GFX10-NEXT: s_mov_b32 s2, s4 960; GFX10-NEXT: s_mov_b32 s3, s5 961; GFX10-NEXT: s_mov_b32 s4, s6 962; GFX10-NEXT: s_mov_b32 s5, s7 963; GFX10-NEXT: s_mov_b32 s6, s8 964; GFX10-NEXT: s_mov_b32 s7, s9 965; GFX10-NEXT: ; implicit-def: $vcc_hi 966; GFX10-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 967; GFX10-NEXT: s_waitcnt vmcnt(0) 968; GFX10-NEXT: ; return to shader part epilog 969main_body: 970 %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 971 %out = bitcast i64 %v to <2 x float> 972 ret <2 x float> %out 973} 974 975define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 976; GFX9-LABEL: atomic_smax_i64_1d: 977; GFX9: ; %bb.0: ; %main_body 978; GFX9-NEXT: s_mov_b32 s0, s2 979; GFX9-NEXT: s_mov_b32 s1, s3 980; GFX9-NEXT: s_mov_b32 s2, s4 981; GFX9-NEXT: s_mov_b32 s3, s5 982; GFX9-NEXT: s_mov_b32 s4, s6 983; GFX9-NEXT: s_mov_b32 s5, s7 984; GFX9-NEXT: s_mov_b32 s6, s8 985; GFX9-NEXT: s_mov_b32 s7, s9 986; GFX9-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 987; GFX9-NEXT: s_waitcnt vmcnt(0) 988; GFX9-NEXT: ; return to shader part epilog 989; 990; GFX10-LABEL: atomic_smax_i64_1d: 991; GFX10: ; %bb.0: ; %main_body 992; GFX10-NEXT: s_mov_b32 s0, s2 993; GFX10-NEXT: s_mov_b32 s1, s3 994; GFX10-NEXT: s_mov_b32 s2, s4 995; GFX10-NEXT: s_mov_b32 s3, s5 996; GFX10-NEXT: s_mov_b32 s4, s6 997; GFX10-NEXT: s_mov_b32 s5, s7 998; GFX10-NEXT: s_mov_b32 s6, s8 999; GFX10-NEXT: s_mov_b32 s7, s9 1000; GFX10-NEXT: ; implicit-def: $vcc_hi 1001; GFX10-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 1002; GFX10-NEXT: s_waitcnt vmcnt(0) 1003; GFX10-NEXT: ; return to shader part epilog 1004main_body: 1005 %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1006 %out = bitcast i64 %v to <2 x float> 1007 ret <2 x float> %out 1008} 1009 1010define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 1011; GFX9-LABEL: atomic_umax_i64_1d: 1012; GFX9: ; %bb.0: ; %main_body 1013; GFX9-NEXT: s_mov_b32 s0, s2 1014; GFX9-NEXT: s_mov_b32 s1, s3 1015; GFX9-NEXT: s_mov_b32 s2, s4 1016; GFX9-NEXT: s_mov_b32 s3, s5 1017; GFX9-NEXT: s_mov_b32 s4, s6 1018; GFX9-NEXT: s_mov_b32 s5, s7 1019; GFX9-NEXT: s_mov_b32 s6, s8 1020; GFX9-NEXT: s_mov_b32 s7, s9 1021; GFX9-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 1022; GFX9-NEXT: s_waitcnt vmcnt(0) 1023; GFX9-NEXT: ; return to shader part epilog 1024; 1025; GFX10-LABEL: atomic_umax_i64_1d: 1026; GFX10: ; %bb.0: ; %main_body 1027; GFX10-NEXT: s_mov_b32 s0, s2 1028; GFX10-NEXT: s_mov_b32 s1, s3 1029; GFX10-NEXT: s_mov_b32 s2, s4 1030; GFX10-NEXT: s_mov_b32 s3, s5 1031; GFX10-NEXT: s_mov_b32 s4, s6 1032; GFX10-NEXT: s_mov_b32 s5, s7 1033; GFX10-NEXT: s_mov_b32 s6, s8 1034; GFX10-NEXT: s_mov_b32 s7, s9 1035; GFX10-NEXT: ; implicit-def: $vcc_hi 1036; GFX10-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 1037; GFX10-NEXT: s_waitcnt vmcnt(0) 1038; GFX10-NEXT: ; return to shader part epilog 1039main_body: 1040 %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1041 %out = bitcast i64 %v to <2 x float> 1042 ret <2 x float> %out 1043} 1044 1045define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 1046; GFX9-LABEL: atomic_and_i64_1d: 1047; GFX9: ; %bb.0: ; %main_body 1048; GFX9-NEXT: s_mov_b32 s0, s2 1049; GFX9-NEXT: s_mov_b32 s1, s3 1050; GFX9-NEXT: s_mov_b32 s2, s4 1051; GFX9-NEXT: s_mov_b32 s3, s5 1052; GFX9-NEXT: s_mov_b32 s4, s6 1053; GFX9-NEXT: s_mov_b32 s5, s7 1054; GFX9-NEXT: s_mov_b32 s6, s8 1055; GFX9-NEXT: s_mov_b32 s7, s9 1056; GFX9-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 1057; GFX9-NEXT: s_waitcnt vmcnt(0) 1058; GFX9-NEXT: ; return to shader part epilog 1059; 1060; GFX10-LABEL: atomic_and_i64_1d: 1061; GFX10: ; %bb.0: ; %main_body 1062; GFX10-NEXT: s_mov_b32 s0, s2 1063; GFX10-NEXT: s_mov_b32 s1, s3 1064; GFX10-NEXT: s_mov_b32 s2, s4 1065; GFX10-NEXT: s_mov_b32 s3, s5 1066; GFX10-NEXT: s_mov_b32 s4, s6 1067; GFX10-NEXT: s_mov_b32 s5, s7 1068; GFX10-NEXT: s_mov_b32 s6, s8 1069; GFX10-NEXT: s_mov_b32 s7, s9 1070; GFX10-NEXT: ; implicit-def: $vcc_hi 1071; GFX10-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 1072; GFX10-NEXT: s_waitcnt vmcnt(0) 1073; GFX10-NEXT: ; return to shader part epilog 1074main_body: 1075 %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1076 %out = bitcast i64 %v to <2 x float> 1077 ret <2 x float> %out 1078} 1079 1080define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 1081; GFX9-LABEL: atomic_or_i64_1d: 1082; GFX9: ; %bb.0: ; %main_body 1083; GFX9-NEXT: s_mov_b32 s0, s2 1084; GFX9-NEXT: s_mov_b32 s1, s3 1085; GFX9-NEXT: s_mov_b32 s2, s4 1086; GFX9-NEXT: s_mov_b32 s3, s5 1087; GFX9-NEXT: s_mov_b32 s4, s6 1088; GFX9-NEXT: s_mov_b32 s5, s7 1089; GFX9-NEXT: s_mov_b32 s6, s8 1090; GFX9-NEXT: s_mov_b32 s7, s9 1091; GFX9-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 1092; GFX9-NEXT: s_waitcnt vmcnt(0) 1093; GFX9-NEXT: ; return to shader part epilog 1094; 1095; GFX10-LABEL: atomic_or_i64_1d: 1096; GFX10: ; %bb.0: ; %main_body 1097; GFX10-NEXT: s_mov_b32 s0, s2 1098; GFX10-NEXT: s_mov_b32 s1, s3 1099; GFX10-NEXT: s_mov_b32 s2, s4 1100; GFX10-NEXT: s_mov_b32 s3, s5 1101; GFX10-NEXT: s_mov_b32 s4, s6 1102; GFX10-NEXT: s_mov_b32 s5, s7 1103; GFX10-NEXT: s_mov_b32 s6, s8 1104; GFX10-NEXT: s_mov_b32 s7, s9 1105; GFX10-NEXT: ; implicit-def: $vcc_hi 1106; GFX10-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 1107; GFX10-NEXT: s_waitcnt vmcnt(0) 1108; GFX10-NEXT: ; return to shader part epilog 1109main_body: 1110 %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1111 %out = bitcast i64 %v to <2 x float> 1112 ret <2 x float> %out 1113} 1114 1115define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 1116; GFX9-LABEL: atomic_xor_i64_1d: 1117; GFX9: ; %bb.0: ; %main_body 1118; GFX9-NEXT: s_mov_b32 s0, s2 1119; GFX9-NEXT: s_mov_b32 s1, s3 1120; GFX9-NEXT: s_mov_b32 s2, s4 1121; GFX9-NEXT: s_mov_b32 s3, s5 1122; GFX9-NEXT: s_mov_b32 s4, s6 1123; GFX9-NEXT: s_mov_b32 s5, s7 1124; GFX9-NEXT: s_mov_b32 s6, s8 1125; GFX9-NEXT: s_mov_b32 s7, s9 1126; GFX9-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 1127; GFX9-NEXT: s_waitcnt vmcnt(0) 1128; GFX9-NEXT: ; return to shader part epilog 1129; 1130; GFX10-LABEL: atomic_xor_i64_1d: 1131; GFX10: ; %bb.0: ; %main_body 1132; GFX10-NEXT: s_mov_b32 s0, s2 1133; GFX10-NEXT: s_mov_b32 s1, s3 1134; GFX10-NEXT: s_mov_b32 s2, s4 1135; GFX10-NEXT: s_mov_b32 s3, s5 1136; GFX10-NEXT: s_mov_b32 s4, s6 1137; GFX10-NEXT: s_mov_b32 s5, s7 1138; GFX10-NEXT: s_mov_b32 s6, s8 1139; GFX10-NEXT: s_mov_b32 s7, s9 1140; GFX10-NEXT: ; implicit-def: $vcc_hi 1141; GFX10-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 1142; GFX10-NEXT: s_waitcnt vmcnt(0) 1143; GFX10-NEXT: ; return to shader part epilog 1144main_body: 1145 %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1146 %out = bitcast i64 %v to <2 x float> 1147 ret <2 x float> %out 1148} 1149 1150define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 1151; GFX9-LABEL: atomic_inc_i64_1d: 1152; GFX9: ; %bb.0: ; %main_body 1153; GFX9-NEXT: s_mov_b32 s0, s2 1154; GFX9-NEXT: s_mov_b32 s1, s3 1155; GFX9-NEXT: s_mov_b32 s2, s4 1156; GFX9-NEXT: s_mov_b32 s3, s5 1157; GFX9-NEXT: s_mov_b32 s4, s6 1158; GFX9-NEXT: s_mov_b32 s5, s7 1159; GFX9-NEXT: s_mov_b32 s6, s8 1160; GFX9-NEXT: s_mov_b32 s7, s9 1161; GFX9-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 1162; GFX9-NEXT: s_waitcnt vmcnt(0) 1163; GFX9-NEXT: ; return to shader part epilog 1164; 1165; GFX10-LABEL: atomic_inc_i64_1d: 1166; GFX10: ; %bb.0: ; %main_body 1167; GFX10-NEXT: s_mov_b32 s0, s2 1168; GFX10-NEXT: s_mov_b32 s1, s3 1169; GFX10-NEXT: s_mov_b32 s2, s4 1170; GFX10-NEXT: s_mov_b32 s3, s5 1171; GFX10-NEXT: s_mov_b32 s4, s6 1172; GFX10-NEXT: s_mov_b32 s5, s7 1173; GFX10-NEXT: s_mov_b32 s6, s8 1174; GFX10-NEXT: s_mov_b32 s7, s9 1175; GFX10-NEXT: ; implicit-def: $vcc_hi 1176; GFX10-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 1177; GFX10-NEXT: s_waitcnt vmcnt(0) 1178; GFX10-NEXT: ; return to shader part epilog 1179main_body: 1180 %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1181 %out = bitcast i64 %v to <2 x float> 1182 ret <2 x float> %out 1183} 1184 1185define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 1186; GFX9-LABEL: atomic_dec_i64_1d: 1187; GFX9: ; %bb.0: ; %main_body 1188; GFX9-NEXT: s_mov_b32 s0, s2 1189; GFX9-NEXT: s_mov_b32 s1, s3 1190; GFX9-NEXT: s_mov_b32 s2, s4 1191; GFX9-NEXT: s_mov_b32 s3, s5 1192; GFX9-NEXT: s_mov_b32 s4, s6 1193; GFX9-NEXT: s_mov_b32 s5, s7 1194; GFX9-NEXT: s_mov_b32 s6, s8 1195; GFX9-NEXT: s_mov_b32 s7, s9 1196; GFX9-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 1197; GFX9-NEXT: s_waitcnt vmcnt(0) 1198; GFX9-NEXT: ; return to shader part epilog 1199; 1200; GFX10-LABEL: atomic_dec_i64_1d: 1201; GFX10: ; %bb.0: ; %main_body 1202; GFX10-NEXT: s_mov_b32 s0, s2 1203; GFX10-NEXT: s_mov_b32 s1, s3 1204; GFX10-NEXT: s_mov_b32 s2, s4 1205; GFX10-NEXT: s_mov_b32 s3, s5 1206; GFX10-NEXT: s_mov_b32 s4, s6 1207; GFX10-NEXT: s_mov_b32 s5, s7 1208; GFX10-NEXT: s_mov_b32 s6, s8 1209; GFX10-NEXT: s_mov_b32 s7, s9 1210; GFX10-NEXT: ; implicit-def: $vcc_hi 1211; GFX10-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 1212; GFX10-NEXT: s_waitcnt vmcnt(0) 1213; GFX10-NEXT: ; return to shader part epilog 1214main_body: 1215 %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1216 %out = bitcast i64 %v to <2 x float> 1217 ret <2 x float> %out 1218} 1219 1220define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i16 %s) { 1221; GFX9-LABEL: atomic_cmpswap_i64_1d: 1222; GFX9: ; %bb.0: ; %main_body 1223; GFX9-NEXT: s_mov_b32 s0, s2 1224; GFX9-NEXT: s_mov_b32 s1, s3 1225; GFX9-NEXT: s_mov_b32 s2, s4 1226; GFX9-NEXT: s_mov_b32 s3, s5 1227; GFX9-NEXT: s_mov_b32 s4, s6 1228; GFX9-NEXT: s_mov_b32 s5, s7 1229; GFX9-NEXT: s_mov_b32 s6, s8 1230; GFX9-NEXT: s_mov_b32 s7, s9 1231; GFX9-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc a16 1232; GFX9-NEXT: s_waitcnt vmcnt(0) 1233; GFX9-NEXT: ; return to shader part epilog 1234; 1235; GFX10-LABEL: atomic_cmpswap_i64_1d: 1236; GFX10: ; %bb.0: ; %main_body 1237; GFX10-NEXT: s_mov_b32 s0, s2 1238; GFX10-NEXT: s_mov_b32 s1, s3 1239; GFX10-NEXT: s_mov_b32 s2, s4 1240; GFX10-NEXT: s_mov_b32 s3, s5 1241; GFX10-NEXT: s_mov_b32 s4, s6 1242; GFX10-NEXT: s_mov_b32 s5, s7 1243; GFX10-NEXT: s_mov_b32 s6, s8 1244; GFX10-NEXT: s_mov_b32 s7, s9 1245; GFX10-NEXT: ; implicit-def: $vcc_hi 1246; GFX10-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16 1247; GFX10-NEXT: s_waitcnt vmcnt(0) 1248; GFX10-NEXT: ; return to shader part epilog 1249main_body: 1250 %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64 %cmp, i64 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1251 %out = bitcast i64 %v to <2 x float> 1252 ret <2 x float> %out 1253} 1254 1255define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t) { 1256; GFX9-LABEL: atomic_add_i64_2d: 1257; GFX9: ; %bb.0: ; %main_body 1258; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 1259; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1260; GFX9-NEXT: s_mov_b32 s0, s2 1261; GFX9-NEXT: s_mov_b32 s1, s3 1262; GFX9-NEXT: s_mov_b32 s2, s4 1263; GFX9-NEXT: s_mov_b32 s3, s5 1264; GFX9-NEXT: s_mov_b32 s4, s6 1265; GFX9-NEXT: s_mov_b32 s5, s7 1266; GFX9-NEXT: s_mov_b32 s6, s8 1267; GFX9-NEXT: s_mov_b32 s7, s9 1268; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 1269; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 1270; GFX9-NEXT: s_waitcnt vmcnt(0) 1271; GFX9-NEXT: ; return to shader part epilog 1272; 1273; GFX10-LABEL: atomic_add_i64_2d: 1274; GFX10: ; %bb.0: ; %main_body 1275; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1276; GFX10-NEXT: s_mov_b32 s0, s2 1277; GFX10-NEXT: s_mov_b32 s1, s3 1278; GFX10-NEXT: s_mov_b32 s2, s4 1279; GFX10-NEXT: s_mov_b32 s3, s5 1280; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, v3 1281; GFX10-NEXT: s_mov_b32 s4, s6 1282; GFX10-NEXT: s_mov_b32 s5, s7 1283; GFX10-NEXT: s_mov_b32 s6, s8 1284; GFX10-NEXT: s_mov_b32 s7, s9 1285; GFX10-NEXT: ; implicit-def: $vcc_hi 1286; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc a16 1287; GFX10-NEXT: s_waitcnt vmcnt(0) 1288; GFX10-NEXT: ; return to shader part epilog 1289main_body: 1290 %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) 1291 %out = bitcast i64 %v to <2 x float> 1292 ret <2 x float> %out 1293} 1294 1295define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %r) { 1296; GFX9-LABEL: atomic_add_i64_3d: 1297; GFX9: ; %bb.0: ; %main_body 1298; GFX9-NEXT: s_mov_b32 s0, s2 1299; GFX9-NEXT: s_mov_b32 s2, s4 1300; GFX9-NEXT: s_mov_b32 s4, s6 1301; GFX9-NEXT: s_mov_b32 s6, s8 1302; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff 1303; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1304; GFX9-NEXT: s_lshl_b32 s8, s0, 16 1305; GFX9-NEXT: s_mov_b32 s1, s3 1306; GFX9-NEXT: s_mov_b32 s3, s5 1307; GFX9-NEXT: s_mov_b32 s5, s7 1308; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 1309; GFX9-NEXT: s_mov_b32 s7, s9 1310; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 1311; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 1312; GFX9-NEXT: s_waitcnt vmcnt(0) 1313; GFX9-NEXT: ; return to shader part epilog 1314; 1315; GFX10-LABEL: atomic_add_i64_3d: 1316; GFX10: ; %bb.0: ; %main_body 1317; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff 1318; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1319; GFX10-NEXT: s_mov_b32 s0, s2 1320; GFX10-NEXT: s_mov_b32 s2, s4 1321; GFX10-NEXT: s_mov_b32 s4, s6 1322; GFX10-NEXT: s_mov_b32 s6, s8 1323; GFX10-NEXT: s_lshl_b32 s8, s0, 16 1324; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 1325; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 1326; GFX10-NEXT: s_mov_b32 s1, s3 1327; GFX10-NEXT: s_mov_b32 s3, s5 1328; GFX10-NEXT: s_mov_b32 s5, s7 1329; GFX10-NEXT: s_mov_b32 s7, s9 1330; GFX10-NEXT: ; implicit-def: $vcc_hi 1331; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc a16 1332; GFX10-NEXT: s_waitcnt vmcnt(0) 1333; GFX10-NEXT: ; return to shader part epilog 1334main_body: 1335 %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) 1336 %out = bitcast i64 %v to <2 x float> 1337 ret <2 x float> %out 1338} 1339 1340define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %face) { 1341; GFX9-LABEL: atomic_add_i64_cube: 1342; GFX9: ; %bb.0: ; %main_body 1343; GFX9-NEXT: s_mov_b32 s0, s2 1344; GFX9-NEXT: s_mov_b32 s2, s4 1345; GFX9-NEXT: s_mov_b32 s4, s6 1346; GFX9-NEXT: s_mov_b32 s6, s8 1347; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff 1348; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1349; GFX9-NEXT: s_lshl_b32 s8, s0, 16 1350; GFX9-NEXT: s_mov_b32 s1, s3 1351; GFX9-NEXT: s_mov_b32 s3, s5 1352; GFX9-NEXT: s_mov_b32 s5, s7 1353; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 1354; GFX9-NEXT: s_mov_b32 s7, s9 1355; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 1356; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da 1357; GFX9-NEXT: s_waitcnt vmcnt(0) 1358; GFX9-NEXT: ; return to shader part epilog 1359; 1360; GFX10-LABEL: atomic_add_i64_cube: 1361; GFX10: ; %bb.0: ; %main_body 1362; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff 1363; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1364; GFX10-NEXT: s_mov_b32 s0, s2 1365; GFX10-NEXT: s_mov_b32 s2, s4 1366; GFX10-NEXT: s_mov_b32 s4, s6 1367; GFX10-NEXT: s_mov_b32 s6, s8 1368; GFX10-NEXT: s_lshl_b32 s8, s0, 16 1369; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 1370; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 1371; GFX10-NEXT: s_mov_b32 s1, s3 1372; GFX10-NEXT: s_mov_b32 s3, s5 1373; GFX10-NEXT: s_mov_b32 s5, s7 1374; GFX10-NEXT: s_mov_b32 s7, s9 1375; GFX10-NEXT: ; implicit-def: $vcc_hi 1376; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc a16 1377; GFX10-NEXT: s_waitcnt vmcnt(0) 1378; GFX10-NEXT: ; return to shader part epilog 1379main_body: 1380 %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64 %data, i16 %s, i16 %t, i16 %face , <8 x i32> %rsrc, i32 0, i32 0) 1381 %out = bitcast i64 %v to <2 x float> 1382 ret <2 x float> %out 1383} 1384 1385define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %slice) { 1386; GFX9-LABEL: atomic_add_i64_1darray: 1387; GFX9: ; %bb.0: ; %main_body 1388; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 1389; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1390; GFX9-NEXT: s_mov_b32 s0, s2 1391; GFX9-NEXT: s_mov_b32 s1, s3 1392; GFX9-NEXT: s_mov_b32 s2, s4 1393; GFX9-NEXT: s_mov_b32 s3, s5 1394; GFX9-NEXT: s_mov_b32 s4, s6 1395; GFX9-NEXT: s_mov_b32 s5, s7 1396; GFX9-NEXT: s_mov_b32 s6, s8 1397; GFX9-NEXT: s_mov_b32 s7, s9 1398; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 1399; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 da 1400; GFX9-NEXT: s_waitcnt vmcnt(0) 1401; GFX9-NEXT: ; return to shader part epilog 1402; 1403; GFX10-LABEL: atomic_add_i64_1darray: 1404; GFX10: ; %bb.0: ; %main_body 1405; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1406; GFX10-NEXT: s_mov_b32 s0, s2 1407; GFX10-NEXT: s_mov_b32 s1, s3 1408; GFX10-NEXT: s_mov_b32 s2, s4 1409; GFX10-NEXT: s_mov_b32 s3, s5 1410; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, v3 1411; GFX10-NEXT: s_mov_b32 s4, s6 1412; GFX10-NEXT: s_mov_b32 s5, s7 1413; GFX10-NEXT: s_mov_b32 s6, s8 1414; GFX10-NEXT: s_mov_b32 s7, s9 1415; GFX10-NEXT: ; implicit-def: $vcc_hi 1416; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16 1417; GFX10-NEXT: s_waitcnt vmcnt(0) 1418; GFX10-NEXT: ; return to shader part epilog 1419main_body: 1420 %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 1421 %out = bitcast i64 %v to <2 x float> 1422 ret <2 x float> %out 1423} 1424 1425define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice) { 1426; GFX9-LABEL: atomic_add_i64_2darray: 1427; GFX9: ; %bb.0: ; %main_body 1428; GFX9-NEXT: s_mov_b32 s0, s2 1429; GFX9-NEXT: s_mov_b32 s2, s4 1430; GFX9-NEXT: s_mov_b32 s4, s6 1431; GFX9-NEXT: s_mov_b32 s6, s8 1432; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff 1433; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1434; GFX9-NEXT: s_lshl_b32 s8, s0, 16 1435; GFX9-NEXT: s_mov_b32 s1, s3 1436; GFX9-NEXT: s_mov_b32 s3, s5 1437; GFX9-NEXT: s_mov_b32 s5, s7 1438; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 1439; GFX9-NEXT: s_mov_b32 s7, s9 1440; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 1441; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da 1442; GFX9-NEXT: s_waitcnt vmcnt(0) 1443; GFX9-NEXT: ; return to shader part epilog 1444; 1445; GFX10-LABEL: atomic_add_i64_2darray: 1446; GFX10: ; %bb.0: ; %main_body 1447; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff 1448; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1449; GFX10-NEXT: s_mov_b32 s0, s2 1450; GFX10-NEXT: s_mov_b32 s2, s4 1451; GFX10-NEXT: s_mov_b32 s4, s6 1452; GFX10-NEXT: s_mov_b32 s6, s8 1453; GFX10-NEXT: s_lshl_b32 s8, s0, 16 1454; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 1455; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 1456; GFX10-NEXT: s_mov_b32 s1, s3 1457; GFX10-NEXT: s_mov_b32 s3, s5 1458; GFX10-NEXT: s_mov_b32 s5, s7 1459; GFX10-NEXT: s_mov_b32 s7, s9 1460; GFX10-NEXT: ; implicit-def: $vcc_hi 1461; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16 1462; GFX10-NEXT: s_waitcnt vmcnt(0) 1463; GFX10-NEXT: ; return to shader part epilog 1464main_body: 1465 %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 1466 %out = bitcast i64 %v to <2 x float> 1467 ret <2 x float> %out 1468} 1469 1470define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %fragid) { 1471; GFX9-LABEL: atomic_add_i64_2dmsaa: 1472; GFX9: ; %bb.0: ; %main_body 1473; GFX9-NEXT: s_mov_b32 s0, s2 1474; GFX9-NEXT: s_mov_b32 s2, s4 1475; GFX9-NEXT: s_mov_b32 s4, s6 1476; GFX9-NEXT: s_mov_b32 s6, s8 1477; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff 1478; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1479; GFX9-NEXT: s_lshl_b32 s8, s0, 16 1480; GFX9-NEXT: s_mov_b32 s1, s3 1481; GFX9-NEXT: s_mov_b32 s3, s5 1482; GFX9-NEXT: s_mov_b32 s5, s7 1483; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 1484; GFX9-NEXT: s_mov_b32 s7, s9 1485; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 1486; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 1487; GFX9-NEXT: s_waitcnt vmcnt(0) 1488; GFX9-NEXT: ; return to shader part epilog 1489; 1490; GFX10-LABEL: atomic_add_i64_2dmsaa: 1491; GFX10: ; %bb.0: ; %main_body 1492; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff 1493; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1494; GFX10-NEXT: s_mov_b32 s0, s2 1495; GFX10-NEXT: s_mov_b32 s2, s4 1496; GFX10-NEXT: s_mov_b32 s4, s6 1497; GFX10-NEXT: s_mov_b32 s6, s8 1498; GFX10-NEXT: s_lshl_b32 s8, s0, 16 1499; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 1500; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 1501; GFX10-NEXT: s_mov_b32 s1, s3 1502; GFX10-NEXT: s_mov_b32 s3, s5 1503; GFX10-NEXT: s_mov_b32 s5, s7 1504; GFX10-NEXT: s_mov_b32 s7, s9 1505; GFX10-NEXT: ; implicit-def: $vcc_hi 1506; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16 1507; GFX10-NEXT: s_waitcnt vmcnt(0) 1508; GFX10-NEXT: ; return to shader part epilog 1509main_body: 1510 %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 1511 %out = bitcast i64 %v to <2 x float> 1512 ret <2 x float> %out 1513} 1514 1515define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 1516; GFX9-LABEL: atomic_add_i64_2darraymsaa: 1517; GFX9: ; %bb.0: ; %main_body 1518; GFX9-NEXT: v_mov_b32_e32 v6, 0xffff 1519; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1520; GFX9-NEXT: v_and_or_b32 v2, v2, v6, v3 1521; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v5 1522; GFX9-NEXT: s_mov_b32 s0, s2 1523; GFX9-NEXT: s_mov_b32 s1, s3 1524; GFX9-NEXT: s_mov_b32 s2, s4 1525; GFX9-NEXT: s_mov_b32 s3, s5 1526; GFX9-NEXT: s_mov_b32 s4, s6 1527; GFX9-NEXT: s_mov_b32 s5, s7 1528; GFX9-NEXT: s_mov_b32 s6, s8 1529; GFX9-NEXT: s_mov_b32 s7, s9 1530; GFX9-NEXT: v_and_or_b32 v3, v4, v6, v3 1531; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da 1532; GFX9-NEXT: s_waitcnt vmcnt(0) 1533; GFX9-NEXT: ; return to shader part epilog 1534; 1535; GFX10-LABEL: atomic_add_i64_2darraymsaa: 1536; GFX10: ; %bb.0: ; %main_body 1537; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff 1538; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1539; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 1540; GFX10-NEXT: s_mov_b32 s0, s2 1541; GFX10-NEXT: s_mov_b32 s1, s3 1542; GFX10-NEXT: s_mov_b32 s2, s4 1543; GFX10-NEXT: v_and_or_b32 v2, v2, v6, v3 1544; GFX10-NEXT: v_and_or_b32 v3, v4, v6, v5 1545; GFX10-NEXT: s_mov_b32 s3, s5 1546; GFX10-NEXT: s_mov_b32 s4, s6 1547; GFX10-NEXT: s_mov_b32 s5, s7 1548; GFX10-NEXT: s_mov_b32 s6, s8 1549; GFX10-NEXT: s_mov_b32 s7, s9 1550; GFX10-NEXT: ; implicit-def: $vcc_hi 1551; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16 1552; GFX10-NEXT: s_waitcnt vmcnt(0) 1553; GFX10-NEXT: ; return to shader part epilog 1554main_body: 1555 %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 1556 %out = bitcast i64 %v to <2 x float> 1557 ret <2 x float> %out 1558} 1559 1560define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 1561; GFX9-LABEL: atomic_add_i64_1d_slc: 1562; GFX9: ; %bb.0: ; %main_body 1563; GFX9-NEXT: s_mov_b32 s0, s2 1564; GFX9-NEXT: s_mov_b32 s1, s3 1565; GFX9-NEXT: s_mov_b32 s2, s4 1566; GFX9-NEXT: s_mov_b32 s3, s5 1567; GFX9-NEXT: s_mov_b32 s4, s6 1568; GFX9-NEXT: s_mov_b32 s5, s7 1569; GFX9-NEXT: s_mov_b32 s6, s8 1570; GFX9-NEXT: s_mov_b32 s7, s9 1571; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc a16 1572; GFX9-NEXT: s_waitcnt vmcnt(0) 1573; GFX9-NEXT: ; return to shader part epilog 1574; 1575; GFX10-LABEL: atomic_add_i64_1d_slc: 1576; GFX10: ; %bb.0: ; %main_body 1577; GFX10-NEXT: s_mov_b32 s0, s2 1578; GFX10-NEXT: s_mov_b32 s1, s3 1579; GFX10-NEXT: s_mov_b32 s2, s4 1580; GFX10-NEXT: s_mov_b32 s3, s5 1581; GFX10-NEXT: s_mov_b32 s4, s6 1582; GFX10-NEXT: s_mov_b32 s5, s7 1583; GFX10-NEXT: s_mov_b32 s6, s8 1584; GFX10-NEXT: s_mov_b32 s7, s9 1585; GFX10-NEXT: ; implicit-def: $vcc_hi 1586; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc a16 1587; GFX10-NEXT: s_waitcnt vmcnt(0) 1588; GFX10-NEXT: ; return to shader part epilog 1589main_body: 1590 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) 1591 %out = bitcast i64 %v to <2 x float> 1592 ret <2 x float> %out 1593} 1594 1595declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1596declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1597declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1598declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1599declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1600declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1601declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1602declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1603declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1604declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1605declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1606declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1607declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1608declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1609declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1610declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1611declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1612declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1613declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1614declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1615 1616declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1617declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1618declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1619declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1620declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1621declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1622declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1623declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1624declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1625declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1626declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1627declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1628declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64, i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1629declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1630declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1631declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1632declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1633declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1634declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1635declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1636 1637attributes #0 = { nounwind } 1638