1; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,MESA %s 2; RUN: llc -march=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MESA %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,VI,MESA %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,HSA %s 5 6declare void @external_void_func_i1(i1) #0 7declare void @external_void_func_i1_signext(i1 signext) #0 8declare void @external_void_func_i1_zeroext(i1 zeroext) #0 9 10declare void @external_void_func_i8(i8) #0 11declare void @external_void_func_i8_signext(i8 signext) #0 12declare void @external_void_func_i8_zeroext(i8 zeroext) #0 13 14declare void @external_void_func_i16(i16) #0 15declare void @external_void_func_i16_signext(i16 signext) #0 16declare void @external_void_func_i16_zeroext(i16 zeroext) #0 17 18declare void @external_void_func_i32(i32) #0 19declare void @external_void_func_i64(i64) #0 20declare void @external_void_func_v2i64(<2 x i64>) #0 21declare void @external_void_func_v3i64(<3 x i64>) #0 22declare void @external_void_func_v4i64(<4 x i64>) #0 23 24declare void @external_void_func_f16(half) #0 25declare void @external_void_func_f32(float) #0 26declare void @external_void_func_f64(double) #0 27declare void @external_void_func_v2f32(<2 x float>) #0 28declare void @external_void_func_v2f64(<2 x double>) #0 29declare void @external_void_func_v3f64(<3 x double>) #0 30 31declare void @external_void_func_v2i16(<2 x i16>) #0 32declare void @external_void_func_v2f16(<2 x half>) #0 33declare void @external_void_func_v3i16(<3 x i16>) #0 34declare void @external_void_func_v3f16(<3 x half>) #0 35declare void @external_void_func_v4i16(<4 x i16>) #0 36declare void @external_void_func_v4f16(<4 x half>) #0 37 38declare void @external_void_func_v2i32(<2 x i32>) #0 39declare void @external_void_func_v3i32(<3 x i32>) #0 40declare void @external_void_func_v3i32_i32(<3 x i32>, i32) #0 41declare void @external_void_func_v4i32(<4 x i32>) #0 42declare void @external_void_func_v8i32(<8 x i32>) #0 43declare void @external_void_func_v16i32(<16 x i32>) #0 44declare void @external_void_func_v32i32(<32 x i32>) #0 45declare void @external_void_func_v32i32_i32(<32 x i32>, i32) #0 46 47; return value and argument 48declare i32 @external_i32_func_i32(i32) #0 49 50; Structs 51declare void @external_void_func_struct_i8_i32({ i8, i32 }) #0 52declare void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval) #0 53declare void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret, { i8, i32 } addrspace(5)* byval) #0 54 55declare void @external_void_func_v16i8(<16 x i8>) #0 56 57 58; FIXME: Should be passing -1 59; GCN-LABEL: {{^}}test_call_external_void_func_i1_imm: 60; MESA: s_mov_b32 s36, SCRATCH_RSRC_DWORD 61 62; MESA-DAG: s_mov_b64 s[0:1], s[36:37] 63 64; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} 65; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1@rel32@lo+4 66; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1@rel32@hi+4 67; GCN-DAG: v_mov_b32_e32 v0, 1{{$}} 68; MESA-DAG: s_mov_b64 s[2:3], s[38:39] 69 70; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} 71; GCN-NEXT: s_endpgm 72define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { 73 call void @external_void_func_i1(i1 true) 74 ret void 75} 76 77; GCN-LABEL: {{^}}test_call_external_void_func_i1_signext: 78; MESA: s_mov_b32 s33, s3{{$}} 79; HSA: s_mov_b32 s33, s9{{$}} 80 81; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} 82; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_signext@rel32@lo+4 83; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_signext@rel32@hi+4 84; GCN-NEXT: buffer_load_ubyte [[VAR:v[0-9]+]] 85; HSA-NEXT: s_mov_b32 s4, s33 86; HSA-NEXT: s_mov_b32 s32, s33 87 88; MESA-DAG: s_mov_b32 s4, s33{{$}} 89; MESA-DAG: s_mov_b32 s32, s33{{$}} 90 91; GCN: s_waitcnt vmcnt(0) 92; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1 93; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} 94; GCN-NEXT: s_endpgm 95define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { 96 %var = load volatile i1, i1 addrspace(1)* undef 97 call void @external_void_func_i1_signext(i1 %var) 98 ret void 99} 100 101; FIXME: load should be scheduled before getpc 102; GCN-LABEL: {{^}}test_call_external_void_func_i1_zeroext: 103; MESA: s_mov_b32 s33, s3{{$}} 104 105; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} 106; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_zeroext@rel32@lo+4 107; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_zeroext@rel32@hi+4 108; GCN-NEXT: buffer_load_ubyte v0 109 110; GCN-DAG: s_mov_b32 s4, s33{{$}} 111; GCN-DAG: s_mov_b32 s32, s33{{$}} 112 113; GCN: s_waitcnt vmcnt(0) 114; GCN-NEXT: v_and_b32_e32 v0, 1, v0 115; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} 116; GCN-NEXT: s_endpgm 117define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { 118 %var = load volatile i1, i1 addrspace(1)* undef 119 call void @external_void_func_i1_zeroext(i1 %var) 120 ret void 121} 122 123; GCN-LABEL: {{^}}test_call_external_void_func_i8_imm: 124; MESA-DAG: s_mov_b32 s33, s3{{$}} 125 126; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} 127; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8@rel32@lo+4 128; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8@rel32@hi+4 129; GCN-NEXT: v_mov_b32_e32 v0, 0x7b 130 131; HSA-DAG: s_mov_b32 s4, s33{{$}} 132; GCN-DAG: s_mov_b32 s32, s33{{$}} 133 134; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} 135; GCN-NEXT: s_endpgm 136define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { 137 call void @external_void_func_i8(i8 123) 138 ret void 139} 140 141; FIXME: don't wait before call 142; GCN-LABEL: {{^}}test_call_external_void_func_i8_signext: 143; HSA-DAG: s_mov_b32 s33, s9{{$}} 144; MESA-DAG: s_mov_b32 s33, s3{{$}} 145 146; GCN-DAG: buffer_load_sbyte v0 147; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} 148; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_signext@rel32@lo+4 149; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_signext@rel32@hi+4 150 151; GCN-DAG: s_mov_b32 s4, s33 152; GCN-DAG: s_mov_b32 s32, s3 153 154; GCN: s_waitcnt vmcnt(0) 155; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} 156; GCN-NEXT: s_endpgm 157define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { 158 %var = load volatile i8, i8 addrspace(1)* undef 159 call void @external_void_func_i8_signext(i8 %var) 160 ret void 161} 162 163; GCN-LABEL: {{^}}test_call_external_void_func_i8_zeroext: 164; MESA-DAG: s_mov_b32 s33, s3{{$}} 165; HSA-DAG: s_mov_b32 s33, s9{{$}} 166 167; GCN-DAG: buffer_load_ubyte v0 168; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} 169; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_zeroext@rel32@lo+4 170; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_zeroext@rel32@hi+4 171 172; GCN-DAG: s_mov_b32 s4, s33 173; GCN-DAG: s_mov_b32 s32, s33 174 175; GCN: s_waitcnt vmcnt(0) 176; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} 177; GCN-NEXT: s_endpgm 178define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { 179 %var = load volatile i8, i8 addrspace(1)* undef 180 call void @external_void_func_i8_zeroext(i8 %var) 181 ret void 182} 183 184; GCN-LABEL: {{^}}test_call_external_void_func_i16_imm: 185; GCN-DAG: v_mov_b32_e32 v0, 0x7b{{$}} 186 187; GCN-DAG: s_mov_b32 s4, s33 188; GCN-DAG: s_mov_b32 s32, s33 189 190; GCN: s_swappc_b64 191define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { 192 call void @external_void_func_i16(i16 123) 193 ret void 194} 195 196; GCN-LABEL: {{^}}test_call_external_void_func_i16_signext: 197; MESA-DAG: s_mov_b32 s33, s3{{$}} 198 199; GCN-DAG: buffer_load_sshort v0 200; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} 201; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_signext@rel32@lo+4 202; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_signext@rel32@hi+4 203 204; GCN-DAG: s_mov_b32 s4, s33 205; GCN-DAG: s_mov_b32 s32, s33 206 207; GCN: s_waitcnt vmcnt(0) 208; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} 209; GCN-NEXT: s_endpgm 210define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { 211 %var = load volatile i16, i16 addrspace(1)* undef 212 call void @external_void_func_i16_signext(i16 %var) 213 ret void 214} 215 216; GCN-LABEL: {{^}}test_call_external_void_func_i16_zeroext: 217; MESA-DAG: s_mov_b32 s33, s3{{$}} 218 219 220; GCN-DAG: buffer_load_ushort v0 221; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} 222; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_zeroext@rel32@lo+4 223; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_zeroext@rel32@hi+4 224 225; GCN-DAG: s_mov_b32 s4, s33 226; GCN-DAG: s_mov_b32 s32, s33 227 228; GCN: s_waitcnt vmcnt(0) 229; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} 230; GCN-NEXT: s_endpgm 231define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { 232 %var = load volatile i16, i16 addrspace(1)* undef 233 call void @external_void_func_i16_zeroext(i16 %var) 234 ret void 235} 236 237; GCN-LABEL: {{^}}test_call_external_void_func_i32_imm: 238; MESA-DAG: s_mov_b32 s33, s3{{$}} 239 240; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} 241; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i32@rel32@lo+4 242; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i32@rel32@hi+4 243; GCN: v_mov_b32_e32 v0, 42 244; GCN-DAG: s_mov_b32 s4, s33 245; GCN-DAG: s_mov_b32 s32, s33 246 247; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} 248; GCN-NEXT: s_endpgm 249define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { 250 call void @external_void_func_i32(i32 42) 251 ret void 252} 253 254; GCN-LABEL: {{^}}test_call_external_void_func_i64_imm: 255; GCN-DAG: s_movk_i32 [[K0:s[0-9]+]], 0x7b{{$}} 256; GCN-DAG: s_mov_b32 [[K1:s[0-9]+]], 0{{$}} 257; GCN-DAG: v_mov_b32_e32 v0, [[K0]] 258; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} 259; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i64@rel32@lo+4 260; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i64@rel32@hi+4 261; GCN-DAG: v_mov_b32_e32 v1, [[K1]] 262; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} 263; GCN-NEXT: s_endpgm 264define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { 265 call void @external_void_func_i64(i64 123) 266 ret void 267} 268 269; GCN-LABEL: {{^}}test_call_external_void_func_v2i64: 270; GCN: buffer_load_dwordx4 v[0:3] 271; GCN: s_waitcnt 272; GCN-NEXT: s_swappc_b64 273define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { 274 %val = load <2 x i64>, <2 x i64> addrspace(1)* null 275 call void @external_void_func_v2i64(<2 x i64> %val) 276 ret void 277} 278 279; GCN-LABEL: {{^}}test_call_external_void_func_v2i64_imm: 280; GCN-DAG: v_mov_b32_e32 v0, 1 281; GCN-DAG: v_mov_b32_e32 v1, 2 282; GCN-DAG: v_mov_b32_e32 v2, 3 283; GCN-DAG: v_mov_b32_e32 v3, 4 284; GCN: s_swappc_b64 285define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { 286 call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>) 287 ret void 288} 289 290; GCN-LABEL: {{^}}test_call_external_void_func_v3i64: 291; GCN: buffer_load_dwordx4 v[0:3] 292; GCN: v_mov_b32_e32 v4, 1 293; GCN: v_mov_b32_e32 v5, 2 294; GCN: s_waitcnt 295; GCN-NEXT: s_swappc_b64 296define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { 297 %load = load <2 x i64>, <2 x i64> addrspace(1)* null 298 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2> 299 300 call void @external_void_func_v3i64(<3 x i64> %val) 301 ret void 302} 303 304; GCN-LABEL: {{^}}test_call_external_void_func_v4i64: 305; GCN: buffer_load_dwordx4 v[0:3] 306; GCN-DAG: v_mov_b32_e32 v4, 1 307; GCN-DAG: v_mov_b32_e32 v5, 2 308; GCN-DAG: v_mov_b32_e32 v6, 3 309; GCN-DAG: v_mov_b32_e32 v7, 4 310 311; GCN: s_waitcnt 312; GCN-NEXT: s_swappc_b64 313define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { 314 %load = load <2 x i64>, <2 x i64> addrspace(1)* null 315 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 316 call void @external_void_func_v4i64(<4 x i64> %val) 317 ret void 318} 319 320; GCN-LABEL: {{^}}test_call_external_void_func_f16_imm: 321; VI: v_mov_b32_e32 v0, 0x4400 322; CI: v_mov_b32_e32 v0, 4.0 323; GCN-NOT: v0 324; GCN: s_swappc_b64 325define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { 326 call void @external_void_func_f16(half 4.0) 327 ret void 328} 329 330; GCN-LABEL: {{^}}test_call_external_void_func_f32_imm: 331; GCN: v_mov_b32_e32 v0, 4.0 332; GCN-NOT: v0 333; GCN: s_swappc_b64 334define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { 335 call void @external_void_func_f32(float 4.0) 336 ret void 337} 338 339; GCN-LABEL: {{^}}test_call_external_void_func_v2f32_imm: 340; GCN-DAG: v_mov_b32_e32 v0, 1.0 341; GCN-DAG: v_mov_b32_e32 v1, 2.0 342; GCN: s_swappc_b64 343define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { 344 call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>) 345 ret void 346} 347 348; GCN-LABEL: {{^}}test_call_external_void_func_f64_imm: 349; GCN: v_mov_b32_e32 v0, 0{{$}} 350; GCN: v_mov_b32_e32 v1, 0x40100000 351; GCN: s_swappc_b64 352define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { 353 call void @external_void_func_f64(double 4.0) 354 ret void 355} 356 357; GCN-LABEL: {{^}}test_call_external_void_func_v2f64_imm: 358; GCN: v_mov_b32_e32 v0, 0{{$}} 359; GCN: v_mov_b32_e32 v1, 2.0 360; GCN: v_mov_b32_e32 v2, 0{{$}} 361; GCN: v_mov_b32_e32 v3, 0x40100000 362; GCN: s_swappc_b64 363define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { 364 call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>) 365 ret void 366} 367 368; GCN-LABEL: {{^}}test_call_external_void_func_v3f64_imm: 369; GCN-DAG: v_mov_b32_e32 v0, 0{{$}} 370; GCN-DAG: v_mov_b32_e32 v1, 2.0 371; GCN-DAG: v_mov_b32_e32 v2, 0{{$}} 372; GCN-DAG: v_mov_b32_e32 v3, 0x40100000 373; GCN-DAG: v_mov_b32_e32 v4, 0{{$}} 374; GCN-DAG: v_mov_b32_e32 v5, 0x40200000 375; GCN-DAG: s_swappc_b64 376define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { 377 call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>) 378 ret void 379} 380 381; GCN-LABEL: {{^}}test_call_external_void_func_v2i16: 382; GFX9: buffer_load_dword v0 383; GFX9-NOT: v0 384; GFX9: s_swappc_b64 385define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { 386 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef 387 call void @external_void_func_v2i16(<2 x i16> %val) 388 ret void 389} 390 391; GCN-LABEL: {{^}}test_call_external_void_func_v3i16: 392; GFX9: buffer_load_dwordx2 v[0:1] 393; GFX9-NOT: v0 394; GFX9-NOT: v1 395; GFX9: s_swappc_b64 396define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { 397 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef 398 call void @external_void_func_v3i16(<3 x i16> %val) 399 ret void 400} 401 402; FIXME: materialize constant directly in VGPR 403; GCN-LABEL: {{^}}test_call_external_void_func_v3i16_imm: 404; GFX9-DAG: s_mov_b32 [[K01:s[0-9]+]], 0x20001 405; GFX9-DAG: s_pack_ll_b32_b16 [[K23:s[0-9]+]], 3, s{{[0-9]+}} 406; GFX9: v_mov_b32_e32 v0, [[K01]] 407; GFX9: v_mov_b32_e32 v1, [[K23]] 408; GFX9: s_swappc_b64 409define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { 410 call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>) 411 ret void 412} 413 414; GCN-LABEL: {{^}}test_call_external_void_func_v4i16: 415; GFX9: buffer_load_dwordx2 v[0:1] 416; GFX9-NOT: v0 417; GFX9-NOT: v1 418; GFX9: s_swappc_b64 419define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { 420 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef 421 call void @external_void_func_v4i16(<4 x i16> %val) 422 ret void 423} 424 425; GCN-LABEL: {{^}}test_call_external_void_func_v4i16_imm: 426; GFX9-DAG: v_mov_b32_e32 v0, 0x20001 427; GFX9-DAG: v_mov_b32_e32 v1, 0x40003 428; GFX9: s_swappc_b64 429define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { 430 call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>) 431 ret void 432} 433 434; GCN-LABEL: {{^}}test_call_external_void_func_v2f16: 435; GFX9: buffer_load_dword v0 436; GFX9-NOT: v0 437; GFX9: s_swappc_b64 438define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { 439 %val = load <2 x half>, <2 x half> addrspace(1)* undef 440 call void @external_void_func_v2f16(<2 x half> %val) 441 ret void 442} 443 444; GCN-LABEL: {{^}}test_call_external_void_func_v2i32: 445; GCN: buffer_load_dwordx2 v[0:1] 446; GCN: s_waitcnt 447; GCN-NEXT: s_swappc_b64 448define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { 449 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef 450 call void @external_void_func_v2i32(<2 x i32> %val) 451 ret void 452} 453 454; GCN-LABEL: {{^}}test_call_external_void_func_v2i32_imm: 455; GCN-DAG: v_mov_b32_e32 v0, 1 456; GCN-DAG: v_mov_b32_e32 v1, 2 457; GCN: s_swappc_b64 458define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { 459 call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>) 460 ret void 461} 462 463; GCN-LABEL: {{^}}test_call_external_void_func_v3i32_imm: 464; HSA-DAG: s_mov_b32 s33, s9 465; MESA-DAG: s_mov_b32 s33, s3{{$}} 466 467; GCN-DAG: v_mov_b32_e32 v0, 3 468; GCN-DAG: v_mov_b32_e32 v1, 4 469; GCN-DAG: v_mov_b32_e32 v2, 5 470; GCN-NOT: v3 471 472; GCN: s_swappc_b64 473define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { 474 call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>) 475 ret void 476} 477 478; GCN-LABEL: {{^}}test_call_external_void_func_v3i32_i32: 479; GCN-DAG: v_mov_b32_e32 v0, 3 480; GCN-DAG: v_mov_b32_e32 v1, 4 481; GCN-DAG: v_mov_b32_e32 v2, 5 482; GCN-DAG: v_mov_b32_e32 v3, 6 483define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { 484 call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6) 485 ret void 486} 487 488; GCN-LABEL: {{^}}test_call_external_void_func_v4i32: 489; GCN: buffer_load_dwordx4 v[0:3] 490; GCN: s_waitcnt 491; GCN-NEXT: s_swappc_b64 492define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { 493 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef 494 call void @external_void_func_v4i32(<4 x i32> %val) 495 ret void 496} 497 498; GCN-LABEL: {{^}}test_call_external_void_func_v4i32_imm: 499; GCN-DAG: v_mov_b32_e32 v0, 1 500; GCN-DAG: v_mov_b32_e32 v1, 2 501; GCN-DAG: v_mov_b32_e32 v2, 3 502; GCN-DAG: v_mov_b32_e32 v3, 4 503; GCN: s_swappc_b64 504define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { 505 call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>) 506 ret void 507} 508 509; GCN-LABEL: {{^}}test_call_external_void_func_v8i32: 510; GCN-DAG: buffer_load_dwordx4 v[0:3], off 511; GCN-DAG: buffer_load_dwordx4 v[4:7], off 512; GCN: s_waitcnt 513; GCN-NEXT: s_swappc_b64 514define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { 515 %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef 516 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr 517 call void @external_void_func_v8i32(<8 x i32> %val) 518 ret void 519} 520 521; GCN-LABEL: {{^}}test_call_external_void_func_v8i32_imm: 522; GCN-DAG: v_mov_b32_e32 v0, 1 523; GCN-DAG: v_mov_b32_e32 v1, 2 524; GCN-DAG: v_mov_b32_e32 v2, 3 525; GCN-DAG: v_mov_b32_e32 v3, 4 526; GCN-DAG: v_mov_b32_e32 v4, 5 527; GCN-DAG: v_mov_b32_e32 v5, 6 528; GCN-DAG: v_mov_b32_e32 v6, 7 529; GCN-DAG: v_mov_b32_e32 v7, 8 530; GCN: s_swappc_b64 531define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { 532 call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>) 533 ret void 534} 535 536; GCN-LABEL: {{^}}test_call_external_void_func_v16i32: 537; GCN-DAG: buffer_load_dwordx4 v[0:3], off 538; GCN-DAG: buffer_load_dwordx4 v[4:7], off 539; GCN-DAG: buffer_load_dwordx4 v[8:11], off 540; GCN-DAG: buffer_load_dwordx4 v[12:15], off 541; GCN: s_waitcnt 542; GCN-NEXT: s_swappc_b64 543define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { 544 %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef 545 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr 546 call void @external_void_func_v16i32(<16 x i32> %val) 547 ret void 548} 549 550; GCN-LABEL: {{^}}test_call_external_void_func_v32i32: 551; GCN-DAG: buffer_load_dwordx4 v[0:3], off 552; GCN-DAG: buffer_load_dwordx4 v[4:7], off 553; GCN-DAG: buffer_load_dwordx4 v[8:11], off 554; GCN-DAG: buffer_load_dwordx4 v[12:15], off 555; GCN-DAG: buffer_load_dwordx4 v[16:19], off 556; GCN-DAG: buffer_load_dwordx4 v[20:23], off 557; GCN-DAG: buffer_load_dwordx4 v[24:27], off 558; GCN-DAG: buffer_load_dwordx4 v[28:31], off 559; GCN: s_waitcnt 560; GCN-NEXT: s_swappc_b64 561define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { 562 %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef 563 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr 564 call void @external_void_func_v32i32(<32 x i32> %val) 565 ret void 566} 567 568; GCN-LABEL: {{^}}test_call_external_void_func_v32i32_i32: 569; HSA-DAG: s_mov_b32 s33, s9 570; HSA-NOT: s_add_u32 s32 571 572; MESA-DAG: s_mov_b32 s33, s3{{$}} 573; MESA-NOT: s_add_u32 s32 574 575; GCN-DAG: buffer_load_dword [[VAL1:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 576; GCN-DAG: buffer_load_dwordx4 v[0:3], off 577; GCN-DAG: buffer_load_dwordx4 v[4:7], off 578; GCN-DAG: buffer_load_dwordx4 v[8:11], off 579; GCN-DAG: buffer_load_dwordx4 v[12:15], off 580; GCN-DAG: buffer_load_dwordx4 v[16:19], off 581; GCN-DAG: buffer_load_dwordx4 v[20:23], off 582; GCN-DAG: buffer_load_dwordx4 v[24:27], off 583; GCN-DAG: buffer_load_dwordx4 v[28:31], off 584 585; GCN: s_waitcnt 586; GCN: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], s32 offset:4{{$}} 587; GCN: s_swappc_b64 588; GCN-NEXT: s_endpgm 589define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { 590 %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef 591 %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0 592 %val1 = load i32, i32 addrspace(1)* undef 593 call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1) 594 ret void 595} 596 597; FIXME: No wait after call 598; GCN-LABEL: {{^}}test_call_external_i32_func_i32_imm: 599; GCN: v_mov_b32_e32 v0, 42 600; GCN: s_swappc_b64 s[30:31], 601; GCN-NEXT: s_waitcnt lgkmcnt(0) 602; GCN-NEXT: buffer_store_dword v0, off, s[36:39], 0 603define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 { 604 %val = call i32 @external_i32_func_i32(i32 42) 605 store volatile i32 %val, i32 addrspace(1)* %out 606 ret void 607} 608 609; GCN-LABEL: {{^}}test_call_external_void_func_struct_i8_i32: 610; GCN: buffer_load_ubyte v0, off 611; GCN: buffer_load_dword v1, off 612; GCN: s_waitcnt vmcnt(0) 613; GCN-NEXT: s_swappc_b64 614define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { 615 %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef 616 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 617 call void @external_void_func_struct_i8_i32({ i8, i32 } %val) 618 ret void 619} 620 621; GCN-LABEL: {{^}}test_call_external_void_func_byval_struct_i8_i32: 622; GCN-DAG: s_add_u32 [[SP:s[0-9]+]], s33, 0x400{{$}} 623 624; GCN-DAG: v_mov_b32_e32 [[VAL0:v[0-9]+]], 3 625; GCN-DAG: v_mov_b32_e32 [[VAL1:v[0-9]+]], 8 626; MESA-DAG: buffer_store_byte [[VAL0]], off, s[36:39], s33 offset:8 627; MESA-DAG: buffer_store_dword [[VAL1]], off, s[36:39], s33 offset:12 628 629; HSA-DAG: buffer_store_byte [[VAL0]], off, s[0:3], s33 offset:8 630; HSA-DAG: buffer_store_dword [[VAL1]], off, s[0:3], s33 offset:12 631 632; GCN-NOT: s_add_u32 [[SP]], 633 634; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], s33 offset:8 635; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], s33 offset:12 636 637; HSA-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]] offset:4 638; HSA-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:8 639 640 641; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], s33 offset:8 642; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], s33 offset:12 643 644; MESA-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]] offset:4 645; MESA-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8 646 647; GCN-NEXT: s_swappc_b64 648; GCN-NOT: [[SP]] 649define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 { 650 %val = alloca { i8, i32 }, align 4, addrspace(5) 651 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0 652 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1 653 store i8 3, i8 addrspace(5)* %gep0 654 store i32 8, i32 addrspace(5)* %gep1 655 call void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %val) 656 ret void 657} 658 659; GCN-LABEL: {{^}}test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: 660; MESA-DAG: s_add_u32 [[SP:s[0-9]+]], [[FP_REG:s[0-9]+]], 0x800{{$}} 661; HSA-DAG: s_add_u32 [[SP:s[0-9]+]], [[FP_REG:s[0-9]+]], 0x800{{$}} 662 663; GCN-DAG: v_mov_b32_e32 [[VAL0:v[0-9]+]], 3 664; GCN-DAG: v_mov_b32_e32 [[VAL1:v[0-9]+]], 8 665; GCN-DAG: buffer_store_byte [[VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:8 666; GCN-DAG: buffer_store_dword [[VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12 667 668; GCN-DAG: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:8 669; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12 670 671; GCN-NOT: s_add_u32 [[SP]] 672; GCN-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4 673; GCN-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8 674; GCN-NEXT: s_swappc_b64 675; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:16 676; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:20 677; GCN-NOT: s_sub_u32 [[SP]] 678 679; GCN: buffer_store_byte [[LOAD_OUT_VAL0]], off 680; GCN: buffer_store_dword [[LOAD_OUT_VAL1]], off 681define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 { 682 %in.val = alloca { i8, i32 }, align 4, addrspace(5) 683 %out.val = alloca { i8, i32 }, align 4, addrspace(5) 684 %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0 685 %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1 686 store i8 3, i8 addrspace(5)* %in.gep0 687 store i32 8, i32 addrspace(5)* %in.gep1 688 call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %out.val, { i8, i32 } addrspace(5)* %in.val) 689 %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0 690 %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1 691 %out.val0 = load i8, i8 addrspace(5)* %out.gep0 692 %out.val1 = load i32, i32 addrspace(5)* %out.gep1 693 694 store volatile i8 %out.val0, i8 addrspace(1)* undef 695 store volatile i32 %out.val1, i32 addrspace(1)* undef 696 ret void 697} 698 699; GCN-LABEL: {{^}}test_call_external_void_func_v16i8: 700define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { 701 %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef 702 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 703 call void @external_void_func_v16i8(<16 x i8> %val) 704 ret void 705} 706 707attributes #0 = { nounwind } 708attributes #1 = { nounwind readnone } 709attributes #2 = { nounwind noinline } 710