1; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG 2; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG 3; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI 4 5; EG-LABEL: {{^}}i8_arg: 6; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 7; SI-LABEL: {{^}}i8_arg: 8; SI: buffer_load_ubyte 9 10define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { 11entry: 12 %0 = zext i8 %in to i32 13 store i32 %0, i32 addrspace(1)* %out, align 4 14 ret void 15} 16 17; EG-LABEL: {{^}}i8_zext_arg: 18; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 19; SI-LABEL: {{^}}i8_zext_arg: 20; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 21 22define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { 23entry: 24 %0 = zext i8 %in to i32 25 store i32 %0, i32 addrspace(1)* %out, align 4 26 ret void 27} 28 29; EG-LABEL: {{^}}i8_sext_arg: 30; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 31; SI-LABEL: {{^}}i8_sext_arg: 32; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 33 34define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { 35entry: 36 %0 = sext i8 %in to i32 37 store i32 %0, i32 addrspace(1)* %out, align 4 38 ret void 39} 40 41; EG-LABEL: {{^}}i16_arg: 42; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 43; SI-LABEL: {{^}}i16_arg: 44; SI: buffer_load_ushort 45 46define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { 47entry: 48 %0 = zext i16 %in to i32 49 store i32 %0, i32 addrspace(1)* %out, align 4 50 ret void 51} 52 53; EG-LABEL: {{^}}i16_zext_arg: 54; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 55; SI-LABEL: {{^}}i16_zext_arg: 56; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 57 58define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { 59entry: 60 %0 = zext i16 %in to i32 61 store i32 %0, i32 addrspace(1)* %out, align 4 62 ret void 63} 64 65; EG-LABEL: {{^}}i16_sext_arg: 66; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 67; SI-LABEL: {{^}}i16_sext_arg: 68; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 69 70define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { 71entry: 72 %0 = sext i16 %in to i32 73 store i32 %0, i32 addrspace(1)* %out, align 4 74 ret void 75} 76 77; EG-LABEL: {{^}}i32_arg: 78; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z 79; SI-LABEL: {{^}}i32_arg: 80; s_load_dword s{{[0-9]}}, s[0:1], 0xb 81define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { 82entry: 83 store i32 %in, i32 addrspace(1)* %out, align 4 84 ret void 85} 86 87; EG-LABEL: {{^}}f32_arg: 88; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z 89; SI-LABEL: {{^}}f32_arg: 90; s_load_dword s{{[0-9]}}, s[0:1], 0xb 91define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind { 92entry: 93 store float %in, float addrspace(1)* %out, align 4 94 ret void 95} 96 97; EG-LABEL: {{^}}v2i8_arg: 98; EG: VTX_READ_8 99; EG: VTX_READ_8 100; SI-LABEL: {{^}}v2i8_arg: 101; SI: buffer_load_ubyte 102; SI: buffer_load_ubyte 103define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { 104entry: 105 store <2 x i8> %in, <2 x i8> addrspace(1)* %out 106 ret void 107} 108 109; EG-LABEL: {{^}}v2i16_arg: 110; EG: VTX_READ_16 111; EG: VTX_READ_16 112; SI-LABEL: {{^}}v2i16_arg: 113; SI-DAG: buffer_load_ushort 114; SI-DAG: buffer_load_ushort 115define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { 116entry: 117 store <2 x i16> %in, <2 x i16> addrspace(1)* %out 118 ret void 119} 120 121; EG-LABEL: {{^}}v2i32_arg: 122; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X 123; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W 124; SI-LABEL: {{^}}v2i32_arg: 125; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb 126define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { 127entry: 128 store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 129 ret void 130} 131 132; EG-LABEL: {{^}}v2f32_arg: 133; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X 134; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W 135; SI-LABEL: {{^}}v2f32_arg: 136; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb 137define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { 138entry: 139 store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 140 ret void 141} 142 143; EG-LABEL: {{^}}v3i8_arg: 144; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40 145; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41 146; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42 147; SI-LABEL: {{^}}v3i8_arg: 148define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { 149entry: 150 store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 151 ret void 152} 153 154; EG-LABEL: {{^}}v3i16_arg: 155; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44 156; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46 157; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48 158; SI-LABEL: {{^}}v3i16_arg: 159define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { 160entry: 161 store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 162 ret void 163} 164; EG-LABEL: {{^}}v3i32_arg: 165; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 166; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 167; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 168; SI-LABEL: {{^}}v3i32_arg: 169; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd 170define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { 171entry: 172 store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 173 ret void 174} 175 176; EG-LABEL: {{^}}v3f32_arg: 177; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 178; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 179; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 180; SI-LABEL: {{^}}v3f32_arg: 181; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd 182define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { 183entry: 184 store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 185 ret void 186} 187 188; EG-LABEL: {{^}}v4i8_arg: 189; EG: VTX_READ_8 190; EG: VTX_READ_8 191; EG: VTX_READ_8 192; EG: VTX_READ_8 193; SI-LABEL: {{^}}v4i8_arg: 194; SI: buffer_load_ubyte 195; SI: buffer_load_ubyte 196; SI: buffer_load_ubyte 197; SI: buffer_load_ubyte 198define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { 199entry: 200 store <4 x i8> %in, <4 x i8> addrspace(1)* %out 201 ret void 202} 203 204; EG-LABEL: {{^}}v4i16_arg: 205; EG: VTX_READ_16 206; EG: VTX_READ_16 207; EG: VTX_READ_16 208; EG: VTX_READ_16 209; SI-LABEL: {{^}}v4i16_arg: 210; SI: buffer_load_ushort 211; SI: buffer_load_ushort 212; SI: buffer_load_ushort 213; SI: buffer_load_ushort 214define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { 215entry: 216 store <4 x i16> %in, <4 x i16> addrspace(1)* %out 217 ret void 218} 219 220; EG-LABEL: {{^}}v4i32_arg: 221; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 222; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 223; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 224; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X 225; SI-LABEL: {{^}}v4i32_arg: 226; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd 227define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { 228entry: 229 store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 230 ret void 231} 232 233; EG-LABEL: {{^}}v4f32_arg: 234; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 235; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 236; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 237; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X 238; SI-LABEL: {{^}}v4f32_arg: 239; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd 240define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { 241entry: 242 store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 243 ret void 244} 245 246; EG-LABEL: {{^}}v8i8_arg: 247; EG: VTX_READ_8 248; EG: VTX_READ_8 249; EG: VTX_READ_8 250; EG: VTX_READ_8 251; EG: VTX_READ_8 252; EG: VTX_READ_8 253; EG: VTX_READ_8 254; EG: VTX_READ_8 255; SI-LABEL: {{^}}v8i8_arg: 256; SI: buffer_load_ubyte 257; SI: buffer_load_ubyte 258; SI: buffer_load_ubyte 259; SI: buffer_load_ubyte 260; SI: buffer_load_ubyte 261; SI: buffer_load_ubyte 262; SI: buffer_load_ubyte 263define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { 264entry: 265 store <8 x i8> %in, <8 x i8> addrspace(1)* %out 266 ret void 267} 268 269; EG-LABEL: {{^}}v8i16_arg: 270; EG: VTX_READ_16 271; EG: VTX_READ_16 272; EG: VTX_READ_16 273; EG: VTX_READ_16 274; EG: VTX_READ_16 275; EG: VTX_READ_16 276; EG: VTX_READ_16 277; EG: VTX_READ_16 278; SI-LABEL: {{^}}v8i16_arg: 279; SI: buffer_load_ushort 280; SI: buffer_load_ushort 281; SI: buffer_load_ushort 282; SI: buffer_load_ushort 283; SI: buffer_load_ushort 284; SI: buffer_load_ushort 285; SI: buffer_load_ushort 286; SI: buffer_load_ushort 287define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { 288entry: 289 store <8 x i16> %in, <8 x i16> addrspace(1)* %out 290 ret void 291} 292 293; EG-LABEL: {{^}}v8i32_arg: 294; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y 295; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z 296; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W 297; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X 298; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y 299; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z 300; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W 301; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X 302; SI-LABEL: {{^}}v8i32_arg: 303; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 304define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { 305entry: 306 store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 307 ret void 308} 309 310; EG-LABEL: {{^}}v8f32_arg: 311; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y 312; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z 313; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W 314; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X 315; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y 316; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z 317; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W 318; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X 319; SI-LABEL: {{^}}v8f32_arg: 320; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 321define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { 322entry: 323 store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 324 ret void 325} 326 327; EG-LABEL: {{^}}v16i8_arg: 328; EG: VTX_READ_8 329; EG: VTX_READ_8 330; EG: VTX_READ_8 331; EG: VTX_READ_8 332; EG: VTX_READ_8 333; EG: VTX_READ_8 334; EG: VTX_READ_8 335; EG: VTX_READ_8 336; EG: VTX_READ_8 337; EG: VTX_READ_8 338; EG: VTX_READ_8 339; EG: VTX_READ_8 340; EG: VTX_READ_8 341; EG: VTX_READ_8 342; EG: VTX_READ_8 343; EG: VTX_READ_8 344; SI-LABEL: {{^}}v16i8_arg: 345; SI: buffer_load_ubyte 346; SI: buffer_load_ubyte 347; SI: buffer_load_ubyte 348; SI: buffer_load_ubyte 349; SI: buffer_load_ubyte 350; SI: buffer_load_ubyte 351; SI: buffer_load_ubyte 352; SI: buffer_load_ubyte 353; SI: buffer_load_ubyte 354; SI: buffer_load_ubyte 355; SI: buffer_load_ubyte 356; SI: buffer_load_ubyte 357; SI: buffer_load_ubyte 358; SI: buffer_load_ubyte 359; SI: buffer_load_ubyte 360; SI: buffer_load_ubyte 361define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { 362entry: 363 store <16 x i8> %in, <16 x i8> addrspace(1)* %out 364 ret void 365} 366 367; EG-LABEL: {{^}}v16i16_arg: 368; EG: VTX_READ_16 369; EG: VTX_READ_16 370; EG: VTX_READ_16 371; EG: VTX_READ_16 372; EG: VTX_READ_16 373; EG: VTX_READ_16 374; EG: VTX_READ_16 375; EG: VTX_READ_16 376; EG: VTX_READ_16 377; EG: VTX_READ_16 378; EG: VTX_READ_16 379; EG: VTX_READ_16 380; EG: VTX_READ_16 381; EG: VTX_READ_16 382; EG: VTX_READ_16 383; EG: VTX_READ_16 384; SI-LABEL: {{^}}v16i16_arg: 385; SI: buffer_load_ushort 386; SI: buffer_load_ushort 387; SI: buffer_load_ushort 388; SI: buffer_load_ushort 389; SI: buffer_load_ushort 390; SI: buffer_load_ushort 391; SI: buffer_load_ushort 392; SI: buffer_load_ushort 393; SI: buffer_load_ushort 394; SI: buffer_load_ushort 395; SI: buffer_load_ushort 396; SI: buffer_load_ushort 397; SI: buffer_load_ushort 398; SI: buffer_load_ushort 399; SI: buffer_load_ushort 400; SI: buffer_load_ushort 401define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { 402entry: 403 store <16 x i16> %in, <16 x i16> addrspace(1)* %out 404 ret void 405} 406 407; EG-LABEL: {{^}}v16i32_arg: 408; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y 409; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z 410; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W 411; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X 412; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y 413; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z 414; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W 415; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X 416; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y 417; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z 418; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W 419; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X 420; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y 421; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z 422; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W 423; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X 424; SI-LABEL: {{^}}v16i32_arg: 425; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 426define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { 427entry: 428 store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 429 ret void 430} 431 432; EG-LABEL: {{^}}v16f32_arg: 433; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y 434; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z 435; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W 436; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X 437; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y 438; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z 439; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W 440; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X 441; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y 442; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z 443; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W 444; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X 445; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y 446; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z 447; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W 448; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X 449; SI-LABEL: {{^}}v16f32_arg: 450; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 451define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { 452entry: 453 store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 454 ret void 455} 456 457; FUNC-LABEL: {{^}}kernel_arg_i64: 458; SI: s_load_dwordx2 459; SI: s_load_dwordx2 460; SI: buffer_store_dwordx2 461define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { 462 store i64 %a, i64 addrspace(1)* %out, align 8 463 ret void 464} 465 466; XFUNC-LABEL: {{^}}kernel_arg_v1i64: 467; XSI: s_load_dwordx2 468; XSI: s_load_dwordx2 469; XSI: buffer_store_dwordx2 470; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind { 471; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8 472; ret void 473; } 474