1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s 3; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX9 %s 4; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s 5 6declare i32 @llvm.amdgcn.workitem.id.x() #0 7 8@lds.obj = addrspace(3) global [256 x i32] undef, align 4 9 10define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 { 11; CI-LABEL: write_ds_sub0_offset0_global: 12; CI: ; %bb.0: ; %entry 13; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 14; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 15; CI-NEXT: v_mov_b32_e32 v1, 0x7b 16; CI-NEXT: s_mov_b32 m0, -1 17; CI-NEXT: ds_write_b32 v0, v1 offset:12 18; CI-NEXT: s_endpgm 19; 20; GFX9-LABEL: write_ds_sub0_offset0_global: 21; GFX9: ; %bb.0: ; %entry 22; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 23; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 24; GFX9-NEXT: v_mov_b32_e32 v1, 0x7b 25; GFX9-NEXT: ds_write_b32 v0, v1 offset:12 26; GFX9-NEXT: s_endpgm 27; 28; GFX10-LABEL: write_ds_sub0_offset0_global: 29; GFX10: ; %bb.0: ; %entry 30; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 31; GFX10-NEXT: v_mov_b32_e32 v1, 0x7b 32; GFX10-NEXT: v_sub_nc_u32_e32 v0, 0, v0 33; GFX10-NEXT: ds_write_b32 v0, v1 offset:12 34; GFX10-NEXT: s_endpgm 35entry: 36 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1 37 %sub1 = sub i32 0, %x.i 38 %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1 39 %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3 40 store i32 123, i32 addrspace(3)* %arrayidx 41 ret void 42} 43 44define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.val) #0 { 45; CI-LABEL: write_ds_sub0_offset0_global_clamp_bit: 46; CI: ; %bb.0: ; %entry 47; CI-NEXT: s_load_dword s0, s[0:1], 0x9 48; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 49; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 50; CI-NEXT: s_mov_b64 vcc, 0 51; CI-NEXT: v_mov_b32_e32 v2, 0x7b 52; CI-NEXT: s_waitcnt lgkmcnt(0) 53; CI-NEXT: v_mov_b32_e32 v1, s0 54; CI-NEXT: s_mov_b32 s0, 0 55; CI-NEXT: v_div_fmas_f32 v1, v1, v1, v1 56; CI-NEXT: s_mov_b32 m0, -1 57; CI-NEXT: s_mov_b32 s3, 0xf000 58; CI-NEXT: s_mov_b32 s2, -1 59; CI-NEXT: s_mov_b32 s1, s0 60; CI-NEXT: ds_write_b32 v0, v2 offset:12 61; CI-NEXT: buffer_store_dword v1, off, s[0:3], 0 62; CI-NEXT: s_waitcnt vmcnt(0) 63; CI-NEXT: s_endpgm 64; 65; GFX9-LABEL: write_ds_sub0_offset0_global_clamp_bit: 66; GFX9: ; %bb.0: ; %entry 67; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 68; GFX9-NEXT: s_mov_b64 vcc, 0 69; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 70; GFX9-NEXT: v_sub_u32_e32 v3, 0, v0 71; GFX9-NEXT: v_mov_b32_e32 v4, 0x7b 72; GFX9-NEXT: s_waitcnt lgkmcnt(0) 73; GFX9-NEXT: v_mov_b32_e32 v1, s0 74; GFX9-NEXT: v_div_fmas_f32 v2, v1, v1, v1 75; GFX9-NEXT: v_mov_b32_e32 v0, 0 76; GFX9-NEXT: v_mov_b32_e32 v1, 0 77; GFX9-NEXT: ds_write_b32 v3, v4 offset:12 78; GFX9-NEXT: global_store_dword v[0:1], v2, off 79; GFX9-NEXT: s_waitcnt vmcnt(0) 80; GFX9-NEXT: s_endpgm 81; 82; GFX10-LABEL: write_ds_sub0_offset0_global_clamp_bit: 83; GFX10: ; %bb.0: ; %entry 84; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 85; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 86; GFX10-NEXT: s_mov_b32 vcc_lo, 0 87; GFX10-NEXT: v_mov_b32_e32 v3, 0x7b 88; GFX10-NEXT: v_sub_nc_u32_e32 v2, 0, v0 89; GFX10-NEXT: v_mov_b32_e32 v0, 0 90; GFX10-NEXT: v_mov_b32_e32 v1, 0 91; GFX10-NEXT: ds_write_b32 v2, v3 offset:12 92; GFX10-NEXT: s_waitcnt lgkmcnt(0) 93; GFX10-NEXT: v_div_fmas_f32 v4, s0, s0, s0 94; GFX10-NEXT: global_store_dword v[0:1], v4, off 95; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 96; GFX10-NEXT: s_endpgm 97entry: 98 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1 99 %sub1 = sub i32 0, %x.i 100 %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1 101 %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3 102 store i32 123, i32 addrspace(3)* %arrayidx 103 %fmas = call float @llvm.amdgcn.div.fmas.f32(float %dummy.val, float %dummy.val, float %dummy.val, i1 false) 104 store volatile float %fmas, float addrspace(1)* null 105 ret void 106} 107 108define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset() #1 { 109; CI-LABEL: add_x_shl_neg_to_sub_max_offset: 110; CI: ; %bb.0: 111; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 112; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 113; CI-NEXT: v_mov_b32_e32 v1, 13 114; CI-NEXT: s_mov_b32 m0, -1 115; CI-NEXT: ds_write_b8 v0, v1 offset:65535 116; CI-NEXT: s_endpgm 117; 118; GFX9-LABEL: add_x_shl_neg_to_sub_max_offset: 119; GFX9: ; %bb.0: 120; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 121; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 122; GFX9-NEXT: v_mov_b32_e32 v1, 13 123; GFX9-NEXT: ds_write_b8 v0, v1 offset:65535 124; GFX9-NEXT: s_endpgm 125; 126; GFX10-LABEL: add_x_shl_neg_to_sub_max_offset: 127; GFX10: ; %bb.0: 128; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 129; GFX10-NEXT: v_mov_b32_e32 v1, 13 130; GFX10-NEXT: v_sub_nc_u32_e32 v0, 0, v0 131; GFX10-NEXT: ds_write_b8 v0, v1 offset:65535 132; GFX10-NEXT: s_endpgm 133 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 134 %neg = sub i32 0, %x.i 135 %shl = shl i32 %neg, 2 136 %add = add i32 65535, %shl 137 %ptr = inttoptr i32 %add to i8 addrspace(3)* 138 store i8 13, i8 addrspace(3)* %ptr 139 ret void 140} 141 142define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 { 143; CI-LABEL: add_x_shl_neg_to_sub_max_offset_p1: 144; CI: ; %bb.0: 145; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 146; CI-NEXT: v_sub_i32_e32 v0, vcc, 0x10000, v0 147; CI-NEXT: v_mov_b32_e32 v1, 13 148; CI-NEXT: s_mov_b32 m0, -1 149; CI-NEXT: ds_write_b8 v0, v1 150; CI-NEXT: s_endpgm 151; 152; GFX9-LABEL: add_x_shl_neg_to_sub_max_offset_p1: 153; GFX9: ; %bb.0: 154; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 155; GFX9-NEXT: v_sub_u32_e32 v0, 0x10000, v0 156; GFX9-NEXT: v_mov_b32_e32 v1, 13 157; GFX9-NEXT: ds_write_b8 v0, v1 158; GFX9-NEXT: s_endpgm 159; 160; GFX10-LABEL: add_x_shl_neg_to_sub_max_offset_p1: 161; GFX10: ; %bb.0: 162; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 163; GFX10-NEXT: v_mov_b32_e32 v1, 13 164; GFX10-NEXT: v_sub_nc_u32_e32 v0, 0x10000, v0 165; GFX10-NEXT: ds_write_b8 v0, v1 166; GFX10-NEXT: s_endpgm 167 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 168 %neg = sub i32 0, %x.i 169 %shl = shl i32 %neg, 2 170 %add = add i32 65536, %shl 171 %ptr = inttoptr i32 %add to i8 addrspace(3)* 172 store i8 13, i8 addrspace(3)* %ptr 173 ret void 174} 175 176define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 { 177; CI-LABEL: add_x_shl_neg_to_sub_multi_use: 178; CI: ; %bb.0: 179; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 180; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 181; CI-NEXT: v_mov_b32_e32 v1, 13 182; CI-NEXT: s_mov_b32 m0, -1 183; CI-NEXT: ds_write_b32 v0, v1 offset:123 184; CI-NEXT: ds_write_b32 v0, v1 offset:456 185; CI-NEXT: s_endpgm 186; 187; GFX9-LABEL: add_x_shl_neg_to_sub_multi_use: 188; GFX9: ; %bb.0: 189; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 190; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 191; GFX9-NEXT: v_mov_b32_e32 v1, 13 192; GFX9-NEXT: ds_write_b32 v0, v1 offset:123 193; GFX9-NEXT: ds_write_b32 v0, v1 offset:456 194; GFX9-NEXT: s_endpgm 195; 196; GFX10-LABEL: add_x_shl_neg_to_sub_multi_use: 197; GFX10: ; %bb.0: 198; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 199; GFX10-NEXT: v_mov_b32_e32 v1, 13 200; GFX10-NEXT: v_sub_nc_u32_e32 v0, 0, v0 201; GFX10-NEXT: ds_write_b32 v0, v1 offset:123 202; GFX10-NEXT: ds_write_b32 v0, v1 offset:456 203; GFX10-NEXT: s_endpgm 204 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 205 %neg = sub i32 0, %x.i 206 %shl = shl i32 %neg, 2 207 %add0 = add i32 123, %shl 208 %add1 = add i32 456, %shl 209 %ptr0 = inttoptr i32 %add0 to i32 addrspace(3)* 210 store volatile i32 13, i32 addrspace(3)* %ptr0 211 %ptr1 = inttoptr i32 %add1 to i32 addrspace(3)* 212 store volatile i32 13, i32 addrspace(3)* %ptr1 213 ret void 214} 215 216define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 { 217; CI-LABEL: add_x_shl_neg_to_sub_multi_use_same_offset: 218; CI: ; %bb.0: 219; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 220; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 221; CI-NEXT: v_mov_b32_e32 v1, 13 222; CI-NEXT: s_mov_b32 m0, -1 223; CI-NEXT: ds_write_b32 v0, v1 offset:123 224; CI-NEXT: ds_write_b32 v0, v1 offset:123 225; CI-NEXT: s_endpgm 226; 227; GFX9-LABEL: add_x_shl_neg_to_sub_multi_use_same_offset: 228; GFX9: ; %bb.0: 229; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 230; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 231; GFX9-NEXT: v_mov_b32_e32 v1, 13 232; GFX9-NEXT: ds_write_b32 v0, v1 offset:123 233; GFX9-NEXT: ds_write_b32 v0, v1 offset:123 234; GFX9-NEXT: s_endpgm 235; 236; GFX10-LABEL: add_x_shl_neg_to_sub_multi_use_same_offset: 237; GFX10: ; %bb.0: 238; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 239; GFX10-NEXT: v_mov_b32_e32 v1, 13 240; GFX10-NEXT: v_sub_nc_u32_e32 v0, 0, v0 241; GFX10-NEXT: ds_write_b32 v0, v1 offset:123 242; GFX10-NEXT: ds_write_b32 v0, v1 offset:123 243; GFX10-NEXT: s_endpgm 244 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 245 %neg = sub i32 0, %x.i 246 %shl = shl i32 %neg, 2 247 %add = add i32 123, %shl 248 %ptr = inttoptr i32 %add to i32 addrspace(3)* 249 store volatile i32 13, i32 addrspace(3)* %ptr 250 store volatile i32 13, i32 addrspace(3)* %ptr 251 ret void 252} 253 254define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 { 255; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset: 256; CI: ; %bb.0: 257; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 258; CI-NEXT: v_sub_i32_e32 v0, vcc, 0x3fb, v0 259; CI-NEXT: v_mov_b32_e32 v1, 0x7b 260; CI-NEXT: v_mov_b32_e32 v2, 0 261; CI-NEXT: s_mov_b32 m0, -1 262; CI-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 263; CI-NEXT: s_endpgm 264; 265; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset: 266; GFX9: ; %bb.0: 267; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 268; GFX9-NEXT: v_sub_u32_e32 v0, 0x3fb, v0 269; GFX9-NEXT: v_mov_b32_e32 v1, 0x7b 270; GFX9-NEXT: v_mov_b32_e32 v2, 0 271; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 272; GFX9-NEXT: s_endpgm 273; 274; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset: 275; GFX10: ; %bb.0: 276; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 277; GFX10-NEXT: v_mov_b32_e32 v1, 0x7b 278; GFX10-NEXT: v_mov_b32_e32 v2, 0 279; GFX10-NEXT: v_sub_nc_u32_e32 v0, 0x3fb, v0 280; GFX10-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 281; GFX10-NEXT: s_endpgm 282 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 283 %neg = sub i32 0, %x.i 284 %shl = shl i32 %neg, 2 285 %add = add i32 1019, %shl 286 %ptr = inttoptr i32 %add to i64 addrspace(3)* 287 store i64 123, i64 addrspace(3)* %ptr, align 4 288 ret void 289} 290 291define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit(float %dummy.val) #1 { 292; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: 293; CI: ; %bb.0: 294; CI-NEXT: s_load_dword s0, s[0:1], 0x9 295; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 296; CI-NEXT: v_sub_i32_e32 v0, vcc, 0x3fb, v0 297; CI-NEXT: s_mov_b64 vcc, 0 298; CI-NEXT: v_mov_b32_e32 v2, 0x7b 299; CI-NEXT: s_waitcnt lgkmcnt(0) 300; CI-NEXT: v_mov_b32_e32 v1, s0 301; CI-NEXT: s_mov_b32 s0, 0 302; CI-NEXT: v_div_fmas_f32 v1, v1, v1, v1 303; CI-NEXT: v_mov_b32_e32 v3, 0 304; CI-NEXT: s_mov_b32 m0, -1 305; CI-NEXT: s_mov_b32 s3, 0xf000 306; CI-NEXT: s_mov_b32 s2, -1 307; CI-NEXT: s_mov_b32 s1, s0 308; CI-NEXT: ds_write2_b32 v0, v2, v3 offset1:1 309; CI-NEXT: buffer_store_dword v1, off, s[0:3], 0 310; CI-NEXT: s_waitcnt vmcnt(0) 311; CI-NEXT: s_endpgm 312; 313; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: 314; GFX9: ; %bb.0: 315; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 316; GFX9-NEXT: s_mov_b64 vcc, 0 317; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 318; GFX9-NEXT: v_sub_u32_e32 v3, 0x3fb, v0 319; GFX9-NEXT: v_mov_b32_e32 v4, 0x7b 320; GFX9-NEXT: s_waitcnt lgkmcnt(0) 321; GFX9-NEXT: v_mov_b32_e32 v1, s0 322; GFX9-NEXT: v_div_fmas_f32 v2, v1, v1, v1 323; GFX9-NEXT: v_mov_b32_e32 v0, 0 324; GFX9-NEXT: v_mov_b32_e32 v5, 0 325; GFX9-NEXT: v_mov_b32_e32 v1, 0 326; GFX9-NEXT: ds_write2_b32 v3, v4, v5 offset1:1 327; GFX9-NEXT: global_store_dword v[0:1], v2, off 328; GFX9-NEXT: s_waitcnt vmcnt(0) 329; GFX9-NEXT: s_endpgm 330; 331; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: 332; GFX10: ; %bb.0: 333; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 334; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 335; GFX10-NEXT: s_mov_b32 vcc_lo, 0 336; GFX10-NEXT: v_mov_b32_e32 v3, 0x7b 337; GFX10-NEXT: v_mov_b32_e32 v4, 0 338; GFX10-NEXT: v_sub_nc_u32_e32 v2, 0x3fb, v0 339; GFX10-NEXT: v_mov_b32_e32 v0, 0 340; GFX10-NEXT: v_mov_b32_e32 v1, 0 341; GFX10-NEXT: ds_write2_b32 v2, v3, v4 offset1:1 342; GFX10-NEXT: s_waitcnt lgkmcnt(0) 343; GFX10-NEXT: v_div_fmas_f32 v5, s0, s0, s0 344; GFX10-NEXT: global_store_dword v[0:1], v5, off 345; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 346; GFX10-NEXT: s_endpgm 347 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 348 %neg = sub i32 0, %x.i 349 %shl = shl i32 %neg, 2 350 %add = add i32 1019, %shl 351 %ptr = inttoptr i32 %add to i64 addrspace(3)* 352 store i64 123, i64 addrspace(3)* %ptr, align 4 353 %fmas = call float @llvm.amdgcn.div.fmas.f32(float %dummy.val, float %dummy.val, float %dummy.val, i1 false) 354 store volatile float %fmas, float addrspace(1)* null 355 ret void 356} 357 358define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 { 359; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1: 360; CI: ; %bb.0: 361; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 362; CI-NEXT: v_sub_i32_e32 v0, vcc, 0x3fc, v0 363; CI-NEXT: v_mov_b32_e32 v1, 0x7b 364; CI-NEXT: v_mov_b32_e32 v2, 0 365; CI-NEXT: s_mov_b32 m0, -1 366; CI-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 367; CI-NEXT: s_endpgm 368; 369; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1: 370; GFX9: ; %bb.0: 371; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 372; GFX9-NEXT: v_sub_u32_e32 v0, 0x3fc, v0 373; GFX9-NEXT: v_mov_b32_e32 v1, 0x7b 374; GFX9-NEXT: v_mov_b32_e32 v2, 0 375; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 376; GFX9-NEXT: s_endpgm 377; 378; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1: 379; GFX10: ; %bb.0: 380; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 381; GFX10-NEXT: v_mov_b32_e32 v1, 0x7b 382; GFX10-NEXT: v_mov_b32_e32 v2, 0 383; GFX10-NEXT: v_sub_nc_u32_e32 v0, 0x3fc, v0 384; GFX10-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 385; GFX10-NEXT: s_endpgm 386 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 387 %neg = sub i32 0, %x.i 388 %shl = shl i32 %neg, 2 389 %add = add i32 1020, %shl 390 %ptr = inttoptr i32 %add to i64 addrspace(3)* 391 store i64 123, i64 addrspace(3)* %ptr, align 4 392 ret void 393} 394 395declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) 396 397attributes #0 = { nounwind readnone } 398attributes #1 = { nounwind } 399attributes #2 = { nounwind convergent } 400