1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX89 %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX89 %s 4 5declare void @external_void_func_void() #0 6 7declare i1 @external_i1_func_void() #0 8declare zeroext i1 @external_i1_zeroext_func_void() #0 9declare signext i1 @external_i1_signext_func_void() #0 10 11declare i8 @external_i8_func_void() #0 12declare zeroext i8 @external_i8_zeroext_func_void() #0 13declare signext i8 @external_i8_signext_func_void() #0 14 15declare i16 @external_i16_func_void() #0 16declare <2 x i16> @external_v2i16_func_void() #0 17declare <4 x i16> @external_v4i16_func_void() #0 18declare zeroext i16 @external_i16_zeroext_func_void() #0 19declare signext i16 @external_i16_signext_func_void() #0 20 21declare i32 @external_i32_func_void() #0 22declare i64 @external_i64_func_void() #0 23declare half @external_f16_func_void() #0 24declare float @external_f32_func_void() #0 25declare double @external_f64_func_void() #0 26 27declare <2 x half> @external_v2f16_func_void() #0 28declare <4 x half> @external_v4f16_func_void() #0 29declare <3 x float> @external_v3f32_func_void() #0 30declare <5 x float> @external_v5f32_func_void() #0 31declare <2 x double> @external_v2f64_func_void() #0 32 33declare <2 x i24> @external_v2i24_func_void() #0 34 35declare <2 x i32> @external_v2i32_func_void() #0 36declare <3 x i32> @external_v3i32_func_void() #0 37declare <4 x i32> @external_v4i32_func_void() #0 38declare <5 x i32> @external_v5i32_func_void() #0 39declare <8 x i32> @external_v8i32_func_void() #0 40declare <16 x i32> @external_v16i32_func_void() #0 41declare <32 x i32> @external_v32i32_func_void() #0 42declare { <32 x i32>, i32 } @external_v32i32_i32_func_void() #0 43 44declare { i32, i64 } @external_i32_i64_func_void() #0 45 46; GCN-LABEL: {{^}}test_call_external_void_func_void: 47define amdgpu_kernel void @test_call_external_void_func_void() #0 { 48 call void @external_void_func_void() 49 ret void 50} 51 52; GCN-LABEL: {{^}}test_call_external_void_func_void_x2: 53define amdgpu_kernel void @test_call_external_void_func_void_x2() #0 { 54 call void @external_void_func_void() 55 call void @external_void_func_void() 56 ret void 57} 58 59; GCN-LABEL: {{^}}test_call_external_i1_func_void: 60define amdgpu_kernel void @test_call_external_i1_func_void() #0 { 61 %val = call i1 @external_i1_func_void() 62 store volatile i1 %val, i1 addrspace(1)* undef 63 ret void 64} 65 66; GCN-LABEL: {{^}}test_call_external_i1_zeroext_func_void: 67define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 { 68 %val = call i1 @external_i1_zeroext_func_void() 69 %val.ext = zext i1 %val to i32 70 store volatile i32 %val.ext, i32 addrspace(1)* undef 71 ret void 72} 73 74; GCN-LABEL: {{^}}test_call_external_i1_signext_func_void: 75define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 { 76 %val = call i1 @external_i1_signext_func_void() 77 %val.ext = zext i1 %val to i32 78 store volatile i32 %val.ext, i32 addrspace(1)* undef 79 ret void 80} 81 82; GCN-LABEL: {{^}}test_call_external_i8_func_void: 83define amdgpu_kernel void @test_call_external_i8_func_void() #0 { 84 %val = call i8 @external_i8_func_void() 85 store volatile i8 %val, i8 addrspace(1)* undef 86 ret void 87} 88 89; GCN-LABEL: {{^}}test_call_external_i8_zeroext_func_void: 90define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 { 91 %val = call i8 @external_i8_zeroext_func_void() 92 %val.ext = zext i8 %val to i32 93 store volatile i32 %val.ext, i32 addrspace(1)* undef 94 ret void 95} 96 97; GCN-LABEL: {{^}}test_call_external_i8_signext_func_void: 98define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 { 99 %val = call i8 @external_i8_signext_func_void() 100 %val.ext = zext i8 %val to i32 101 store volatile i32 %val.ext, i32 addrspace(1)* undef 102 ret void 103} 104 105; GCN-LABEL: {{^}}test_call_external_i16_func_void: 106define amdgpu_kernel void @test_call_external_i16_func_void() #0 { 107 %val = call i16 @external_i16_func_void() 108 store volatile i16 %val, i16 addrspace(1)* undef 109 ret void 110} 111 112; GCN-LABEL: {{^}}test_call_external_i16_zeroext_func_void: 113define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 { 114 %val = call i16 @external_i16_zeroext_func_void() 115 %val.ext = zext i16 %val to i32 116 store volatile i32 %val.ext, i32 addrspace(1)* undef 117 ret void 118} 119 120; GCN-LABEL: {{^}}test_call_external_i16_signext_func_void: 121define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 { 122 %val = call i16 @external_i16_signext_func_void() 123 %val.ext = zext i16 %val to i32 124 store volatile i32 %val.ext, i32 addrspace(1)* undef 125 ret void 126} 127 128; GCN-LABEL: {{^}}test_call_external_i32_func_void: 129define amdgpu_kernel void @test_call_external_i32_func_void() #0 { 130 %val = call i32 @external_i32_func_void() 131 store volatile i32 %val, i32 addrspace(1)* undef 132 ret void 133} 134 135; GCN-LABEL: {{^}}test_call_external_i64_func_void: 136define amdgpu_kernel void @test_call_external_i64_func_void() #0 { 137 %val = call i64 @external_i64_func_void() 138 store volatile i64 %val, i64 addrspace(1)* undef 139 ret void 140} 141 142; GCN-LABEL: {{^}}test_call_external_f16_func_void: 143define amdgpu_kernel void @test_call_external_f16_func_void() #0 { 144 %val = call half @external_f16_func_void() 145 store volatile half %val, half addrspace(1)* undef 146 ret void 147} 148 149; GCN-LABEL: {{^}}test_call_external_f32_func_void: 150define amdgpu_kernel void @test_call_external_f32_func_void() #0 { 151 %val = call float @external_f32_func_void() 152 store volatile float %val, float addrspace(1)* undef 153 ret void 154} 155 156; GCN-LABEL: {{^}}test_call_external_f64_func_void: 157define amdgpu_kernel void @test_call_external_f64_func_void() #0 { 158 %val = call double @external_f64_func_void() 159 store volatile double %val, double addrspace(1)* undef 160 ret void 161} 162 163; GCN-LABEL: {{^}}test_call_external_v2f64_func_void: 164define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 { 165 %val = call <2 x double> @external_v2f64_func_void() 166 store volatile <2 x double> %val, <2 x double> addrspace(1)* undef 167 ret void 168} 169 170; GCN-LABEL: {{^}}test_call_external_v2i32_func_void: 171define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 { 172 %val = call <2 x i32> @external_v2i32_func_void() 173 store volatile <2 x i32> %val, <2 x i32> addrspace(1)* undef 174 ret void 175} 176 177; GCN-LABEL: {{^}}test_call_external_v3i32_func_void: 178; GCN: s_swappc 179; GFX7-DAG: flat_store_dwordx3 {{.*}}, v[0:2] 180; GFX89-DAG: buffer_store_dwordx3 v[0:2] 181define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 { 182 %val = call <3 x i32> @external_v3i32_func_void() 183 store volatile <3 x i32> %val, <3 x i32> addrspace(1)* undef, align 8 184 ret void 185} 186 187; GCN-LABEL: {{^}}test_call_external_v4i32_func_void: 188define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 { 189 %val = call <4 x i32> @external_v4i32_func_void() 190 store volatile <4 x i32> %val, <4 x i32> addrspace(1)* undef, align 8 191 ret void 192} 193 194; GCN-LABEL: {{^}}test_call_external_v5i32_func_void: 195; GCN: s_swappc 196; GFX7-DAG: flat_store_dwordx4 {{.*}}, v[0:3] 197; GFX7-DAG: flat_store_dword {{.*}}, v4 198; GFX89-DAG: buffer_store_dwordx4 v[0:3] 199; GFX89-DAG: buffer_store_dword v4 200define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 { 201 %val = call <5 x i32> @external_v5i32_func_void() 202 store volatile <5 x i32> %val, <5 x i32> addrspace(1)* undef, align 8 203 ret void 204} 205 206; GCN-LABEL: {{^}}test_call_external_v8i32_func_void: 207define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 { 208 %val = call <8 x i32> @external_v8i32_func_void() 209 store volatile <8 x i32> %val, <8 x i32> addrspace(1)* undef, align 8 210 ret void 211} 212 213; GCN-LABEL: {{^}}test_call_external_v16i32_func_void: 214define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 { 215 %val = call <16 x i32> @external_v16i32_func_void() 216 store volatile <16 x i32> %val, <16 x i32> addrspace(1)* undef, align 8 217 ret void 218} 219 220; GCN-LABEL: {{^}}test_call_external_v32i32_func_void: 221define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 { 222 %val = call <32 x i32> @external_v32i32_func_void() 223 store volatile <32 x i32> %val, <32 x i32> addrspace(1)* undef, align 8 224 ret void 225} 226 227; GCN-LABEL: {{^}}test_call_external_v2i16_func_void: 228define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 { 229 %val = call <2 x i16> @external_v2i16_func_void() 230 store volatile <2 x i16> %val, <2 x i16> addrspace(1)* undef 231 ret void 232} 233 234; GCN-LABEL: {{^}}test_call_external_v4i16_func_void: 235define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 { 236 %val = call <4 x i16> @external_v4i16_func_void() 237 store volatile <4 x i16> %val, <4 x i16> addrspace(1)* undef 238 ret void 239} 240 241; GCN-LABEL: {{^}}test_call_external_v2f16_func_void: 242define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 { 243 %val = call <2 x half> @external_v2f16_func_void() 244 store volatile <2 x half> %val, <2 x half> addrspace(1)* undef 245 ret void 246} 247 248; GCN-LABEL: {{^}}test_call_external_v4f16_func_void: 249define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 { 250 %val = call <4 x half> @external_v4f16_func_void() 251 store volatile <4 x half> %val, <4 x half> addrspace(1)* undef 252 ret void 253} 254 255; GCN-LABEL: {{^}}test_call_external_v2i24_func_void: 256; GCN: s_swappc_b64 257; GCN: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}v0, v1 258define amdgpu_kernel void @test_call_external_v2i24_func_void() #0 { 259 %val = call <2 x i24> @external_v2i24_func_void() 260 %elt0 = extractelement <2 x i24> %val, i32 0 261 %elt1 = extractelement <2 x i24> %val, i32 1 262 %add = add i24 %elt0, %elt1 263 store volatile i24 %add, i24 addrspace(1)* undef 264 ret void 265} 266 267; GCN-LABEL: {{^}}test_call_external_v3f32_func_void: 268; GCN: s_swappc 269; GFX7-DAG: flat_store_dwordx3 {{.*}}, v[0:2] 270; GFX89-DAG: buffer_store_dwordx3 v[0:2] 271define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 { 272 %val = call <3 x float> @external_v3f32_func_void() 273 store volatile <3 x float> %val, <3 x float> addrspace(1)* undef 274 ret void 275} 276 277; GCN-LABEL: {{^}}test_call_external_v5f32_func_void: 278; GCN: s_swappc 279; GFX7-DAG: flat_store_dwordx4 {{.*}}, v[0:3] 280; GFX7-DAG: flat_store_dword {{.*}}, v4 281; GFX89-DAG: buffer_store_dwordx4 v[0:3] 282; GFX89-DAG: buffer_store_dword v4 283define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 { 284 %val = call <5 x float> @external_v5f32_func_void() 285 store volatile <5 x float> %val, <5 x float> addrspace(1)* undef 286 ret void 287} 288 289; GCN-LABEL: {{^}}test_call_external_i32_i64_func_void: 290define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 { 291 %val = call { i32, i64 } @external_i32_i64_func_void() 292 %val.0 = extractvalue { i32, i64 } %val, 0 293 %val.1 = extractvalue { i32, i64 } %val, 1 294 store volatile i32 %val.0, i32 addrspace(1)* undef 295 store volatile i64 %val.1, i64 addrspace(1)* undef 296 ret void 297} 298 299; Requires writing results to stack 300; GCN-LABEL: {{^}}test_call_external_v32i32_i32_func_void: 301define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 { 302 %val = call { <32 x i32>, i32 } @external_v32i32_i32_func_void() 303 %val0 = extractvalue { <32 x i32>, i32 } %val, 0 304 %val1 = extractvalue { <32 x i32>, i32 } %val, 1 305 store volatile <32 x i32> %val0, <32 x i32> addrspace(1)* undef, align 8 306 store volatile i32 %val1, i32 addrspace(1)* undef 307 ret void 308} 309 310attributes #0 = { nounwind } 311attributes #1 = { nounwind readnone } 312attributes #2 = { nounwind noinline } 313