1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s 2; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=R600 -check-prefix=FUNC %s 3 4declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 5 6; FUNC-LABEL: {{^}}setcc_v2i32: 7; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z 8; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[2].W, KC0[3].Y 9 10; GCN: v_cmp_eq_u32_e32 11; GCN: v_cmp_eq_u32_e32 12define amdgpu_kernel void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 { 13 %result = icmp eq <2 x i32> %a, %b 14 %sext = sext <2 x i1> %result to <2 x i32> 15 store <2 x i32> %sext, <2 x i32> addrspace(1)* %out 16 ret void 17} 18 19; FUNC-LABEL: {{^}}setcc_v4i32: 20; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 21; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 22; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 23; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 24 25; GCN: v_cmp_eq_u32_e32 26; GCN: v_cmp_eq_u32_e32 27; GCN: v_cmp_eq_u32_e32 28; GCN: v_cmp_eq_u32_e32 29define amdgpu_kernel void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { 30 %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 31 %a = load <4 x i32>, <4 x i32> addrspace(1)* %in 32 %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr 33 %result = icmp eq <4 x i32> %a, %b 34 %sext = sext <4 x i1> %result to <4 x i32> 35 store <4 x i32> %sext, <4 x i32> addrspace(1)* %out 36 ret void 37} 38 39;;;==========================================================================;;; 40;; Float comparisons 41;;;==========================================================================;;; 42 43; FUNC-LABEL: {{^}}f32_oeq: 44; R600: SETE_DX10 45; GCN: v_cmp_eq_f32 46define amdgpu_kernel void @f32_oeq(i32 addrspace(1)* %out, float %a, float %b) #0 { 47entry: 48 %0 = fcmp oeq float %a, %b 49 %1 = sext i1 %0 to i32 50 store i32 %1, i32 addrspace(1)* %out 51 ret void 52} 53 54; FUNC-LABEL: {{^}}f32_ogt: 55; R600: SETGT_DX10 56; GCN: v_cmp_gt_f32 57define amdgpu_kernel void @f32_ogt(i32 addrspace(1)* %out, float %a, float %b) #0 { 58entry: 59 %0 = fcmp ogt float %a, %b 60 %1 = sext i1 %0 to i32 61 store i32 %1, i32 addrspace(1)* %out 62 ret void 63} 64 65; FUNC-LABEL: {{^}}f32_oge: 66; R600: SETGE_DX10 67; GCN: v_cmp_ge_f32 68define amdgpu_kernel void @f32_oge(i32 addrspace(1)* %out, float %a, float %b) #0 { 69entry: 70 %0 = fcmp oge float %a, %b 71 %1 = sext i1 %0 to i32 72 store i32 %1, i32 addrspace(1)* %out 73 ret void 74} 75 76; FUNC-LABEL: {{^}}f32_olt: 77; R600: SETGT_DX10 78; GCN: v_cmp_lt_f32 79define amdgpu_kernel void @f32_olt(i32 addrspace(1)* %out, float %a, float %b) #0 { 80entry: 81 %0 = fcmp olt float %a, %b 82 %1 = sext i1 %0 to i32 83 store i32 %1, i32 addrspace(1)* %out 84 ret void 85} 86 87; FUNC-LABEL: {{^}}f32_ole: 88; R600: SETGE_DX10 89; GCN: v_cmp_le_f32 90define amdgpu_kernel void @f32_ole(i32 addrspace(1)* %out, float %a, float %b) #0 { 91entry: 92 %0 = fcmp ole float %a, %b 93 %1 = sext i1 %0 to i32 94 store i32 %1, i32 addrspace(1)* %out 95 ret void 96} 97 98; FUNC-LABEL: {{^}}f32_one: 99; R600-DAG: SETGT_DX10 100; R600-DAG: SETGT_DX10 101; R600-DAG: OR_INT 102; R600-DAG: SETNE_INT 103 104; GCN: v_cmp_lg_f32_e32 vcc 105; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 106define amdgpu_kernel void @f32_one(i32 addrspace(1)* %out, float %a, float %b) #0 { 107entry: 108 %0 = fcmp one float %a, %b 109 %1 = sext i1 %0 to i32 110 store i32 %1, i32 addrspace(1)* %out 111 ret void 112} 113 114; FUNC-LABEL: {{^}}f32_ord: 115; R600-DAG: SETE_DX10 116; R600-DAG: SETE_DX10 117; R600-DAG: AND_INT 118; R600-DAG: SETNE_INT 119; GCN: v_cmp_o_f32 120define amdgpu_kernel void @f32_ord(i32 addrspace(1)* %out, float %a, float %b) #0 { 121entry: 122 %0 = fcmp ord float %a, %b 123 %1 = sext i1 %0 to i32 124 store i32 %1, i32 addrspace(1)* %out 125 ret void 126} 127 128; FUNC-LABEL: {{^}}f32_ueq: 129; R600-DAG: SETGT_DX10 130; R600-DAG: SETGT_DX10 131; R600-DAG: OR_INT 132; R600-DAG: SETE_INT 133 134; GCN: v_cmp_nlg_f32_e32 vcc 135; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 136define amdgpu_kernel void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) #0 { 137entry: 138 %0 = fcmp ueq float %a, %b 139 %1 = sext i1 %0 to i32 140 store i32 %1, i32 addrspace(1)* %out 141 ret void 142} 143 144; FUNC-LABEL: {{^}}f32_ugt: 145; R600: SETGE 146; R600: SETE_DX10 147; GCN: v_cmp_nle_f32_e32 vcc 148; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 149define amdgpu_kernel void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) #0 { 150entry: 151 %0 = fcmp ugt float %a, %b 152 %1 = sext i1 %0 to i32 153 store i32 %1, i32 addrspace(1)* %out 154 ret void 155} 156 157; FUNC-LABEL: {{^}}f32_uge: 158; R600: SETGT 159; R600: SETE_DX10 160 161; GCN: v_cmp_nlt_f32_e32 vcc 162; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 163define amdgpu_kernel void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) #0 { 164entry: 165 %0 = fcmp uge float %a, %b 166 %1 = sext i1 %0 to i32 167 store i32 %1, i32 addrspace(1)* %out 168 ret void 169} 170 171; FUNC-LABEL: {{^}}f32_ult: 172; R600: SETGE 173; R600: SETE_DX10 174 175; GCN: v_cmp_nge_f32_e32 vcc 176; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 177define amdgpu_kernel void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) #0 { 178entry: 179 %0 = fcmp ult float %a, %b 180 %1 = sext i1 %0 to i32 181 store i32 %1, i32 addrspace(1)* %out 182 ret void 183} 184 185; FUNC-LABEL: {{^}}f32_ule: 186; R600: SETGT 187; R600: SETE_DX10 188 189; GCN: v_cmp_ngt_f32_e32 vcc 190; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 191define amdgpu_kernel void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) #0 { 192entry: 193 %0 = fcmp ule float %a, %b 194 %1 = sext i1 %0 to i32 195 store i32 %1, i32 addrspace(1)* %out 196 ret void 197} 198 199; FUNC-LABEL: {{^}}f32_une: 200; R600: SETNE_DX10 201; GCN: v_cmp_neq_f32 202define amdgpu_kernel void @f32_une(i32 addrspace(1)* %out, float %a, float %b) #0 { 203entry: 204 %0 = fcmp une float %a, %b 205 %1 = sext i1 %0 to i32 206 store i32 %1, i32 addrspace(1)* %out 207 ret void 208} 209 210; FUNC-LABEL: {{^}}f32_uno: 211; R600: SETNE_DX10 212; R600: SETNE_DX10 213; R600: OR_INT 214; R600: SETNE_INT 215; GCN: v_cmp_u_f32 216define amdgpu_kernel void @f32_uno(i32 addrspace(1)* %out, float %a, float %b) #0 { 217entry: 218 %0 = fcmp uno float %a, %b 219 %1 = sext i1 %0 to i32 220 store i32 %1, i32 addrspace(1)* %out 221 ret void 222} 223 224;;;==========================================================================;;; 225;; 32-bit integer comparisons 226;;;==========================================================================;;; 227 228; FUNC-LABEL: {{^}}i32_eq: 229; R600: SETE_INT 230; GCN: v_cmp_eq_u32 231define amdgpu_kernel void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 232entry: 233 %0 = icmp eq i32 %a, %b 234 %1 = sext i1 %0 to i32 235 store i32 %1, i32 addrspace(1)* %out 236 ret void 237} 238 239; FUNC-LABEL: {{^}}i32_ne: 240; R600: SETNE_INT 241; GCN: v_cmp_ne_u32 242define amdgpu_kernel void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 243entry: 244 %0 = icmp ne i32 %a, %b 245 %1 = sext i1 %0 to i32 246 store i32 %1, i32 addrspace(1)* %out 247 ret void 248} 249 250; FUNC-LABEL: {{^}}i32_ugt: 251; R600: SETGT_UINT 252; GCN: v_cmp_gt_u32 253define amdgpu_kernel void @i32_ugt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 254entry: 255 %0 = icmp ugt i32 %a, %b 256 %1 = sext i1 %0 to i32 257 store i32 %1, i32 addrspace(1)* %out 258 ret void 259} 260 261; FUNC-LABEL: {{^}}i32_uge: 262; R600: SETGE_UINT 263; GCN: v_cmp_ge_u32 264define amdgpu_kernel void @i32_uge(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 265entry: 266 %0 = icmp uge i32 %a, %b 267 %1 = sext i1 %0 to i32 268 store i32 %1, i32 addrspace(1)* %out 269 ret void 270} 271 272; FUNC-LABEL: {{^}}i32_ult: 273; R600: SETGT_UINT 274; GCN: v_cmp_lt_u32 275define amdgpu_kernel void @i32_ult(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 276entry: 277 %0 = icmp ult i32 %a, %b 278 %1 = sext i1 %0 to i32 279 store i32 %1, i32 addrspace(1)* %out 280 ret void 281} 282 283; FUNC-LABEL: {{^}}i32_ule: 284; R600: SETGE_UINT 285; GCN: v_cmp_le_u32 286define amdgpu_kernel void @i32_ule(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 287entry: 288 %0 = icmp ule i32 %a, %b 289 %1 = sext i1 %0 to i32 290 store i32 %1, i32 addrspace(1)* %out 291 ret void 292} 293 294; FUNC-LABEL: {{^}}i32_sgt: 295; R600: SETGT_INT 296; GCN: v_cmp_gt_i32 297define amdgpu_kernel void @i32_sgt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 298entry: 299 %0 = icmp sgt i32 %a, %b 300 %1 = sext i1 %0 to i32 301 store i32 %1, i32 addrspace(1)* %out 302 ret void 303} 304 305; FUNC-LABEL: {{^}}i32_sge: 306; R600: SETGE_INT 307; GCN: v_cmp_ge_i32 308define amdgpu_kernel void @i32_sge(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 309entry: 310 %0 = icmp sge i32 %a, %b 311 %1 = sext i1 %0 to i32 312 store i32 %1, i32 addrspace(1)* %out 313 ret void 314} 315 316; FUNC-LABEL: {{^}}i32_slt: 317; R600: SETGT_INT 318; GCN: v_cmp_lt_i32 319define amdgpu_kernel void @i32_slt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 320entry: 321 %0 = icmp slt i32 %a, %b 322 %1 = sext i1 %0 to i32 323 store i32 %1, i32 addrspace(1)* %out 324 ret void 325} 326 327; FUNC-LABEL: {{^}}i32_sle: 328; R600: SETGE_INT 329; GCN: v_cmp_le_i32 330define amdgpu_kernel void @i32_sle(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 331entry: 332 %0 = icmp sle i32 %a, %b 333 %1 = sext i1 %0 to i32 334 store i32 %1, i32 addrspace(1)* %out 335 ret void 336} 337 338; FIXME: This does 4 compares 339; FUNC-LABEL: {{^}}v3i32_eq: 340; GCN-DAG: v_cmp_eq_u32 341; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 342; GCN-DAG: v_cmp_eq_u32 343; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 344; GCN-DAG: v_cmp_eq_u32 345; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 346; GCN: s_endpgm 347define amdgpu_kernel void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) #0 { 348 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 349 %gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid 350 %gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid 351 %gep.out = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid 352 %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep.a 353 %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep.b 354 %cmp = icmp eq <3 x i32> %a, %b 355 %ext = sext <3 x i1> %cmp to <3 x i32> 356 store <3 x i32> %ext, <3 x i32> addrspace(1)* %gep.out 357 ret void 358} 359 360; FUNC-LABEL: {{^}}v3i8_eq: 361; GCN-DAG: v_cmp_eq_u32 362; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 363; GCN-DAG: v_cmp_eq_u32 364; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 365; GCN-DAG: v_cmp_eq_u32 366; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 367; GCN: s_endpgm 368define amdgpu_kernel void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) #0 { 369 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 370 %gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid 371 %gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid 372 %gep.out = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %out, i32 %tid 373 %a = load <3 x i8>, <3 x i8> addrspace(1)* %gep.a 374 %b = load <3 x i8>, <3 x i8> addrspace(1)* %gep.b 375 %cmp = icmp eq <3 x i8> %a, %b 376 %ext = sext <3 x i1> %cmp to <3 x i8> 377 store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out 378 ret void 379} 380 381; Make sure we don't try to emit i1 setcc ops 382; FUNC-LABEL: setcc-i1 383; GCN: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 1 384; GCN: s_cmp_eq_u32 [[AND]], 0 385define amdgpu_kernel void @setcc-i1(i32 %in) #0 { 386 %and = and i32 %in, 1 387 %cmp = icmp eq i32 %and, 0 388 br i1 %cmp, label %endif, label %if 389if: 390 unreachable 391endif: 392 ret void 393} 394 395; FUNC-LABEL: setcc-i1-and-xor 396; GCN-DAG: v_cmp_nge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} 397; GCN-DAG: v_cmp_nle_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0 398; GCN: s_or_b64 s[2:3], [[A]], [[B]] 399define amdgpu_kernel void @setcc-i1-and-xor(i32 addrspace(1)* %out, float %cond) #0 { 400bb0: 401 %tmp5 = fcmp oge float %cond, 0.000000e+00 402 %tmp7 = fcmp ole float %cond, 1.000000e+00 403 %tmp9 = and i1 %tmp5, %tmp7 404 %tmp11 = xor i1 %tmp9, 1 405 br i1 %tmp11, label %bb2, label %bb1 406 407bb1: 408 store i32 0, i32 addrspace(1)* %out 409 br label %bb2 410 411bb2: 412 ret void 413} 414 415; FUNC-LABEL: setcc_v2i32_expand 416; GCN: v_cmp_gt_i32 417; GCN: v_cmp_gt_i32 418define amdgpu_kernel void @setcc_v2i32_expand( 419 <2 x i32> addrspace(1)* %a, 420 <2 x i32> addrspace(1)* %b, 421 <2 x i32> addrspace(1)* %c, 422 <2 x float> addrspace(1)* %r) { 423entry: 424 %a.val = load <2 x i32>, <2 x i32> addrspace(1)* %a 425 %b.val = load <2 x i32>, <2 x i32> addrspace(1)* %b 426 %c.val = load <2 x i32>, <2 x i32> addrspace(1)* %c 427 428 %icmp.val.1 = icmp sgt <2 x i32> %a.val, <i32 1, i32 1> 429 %zext.val.1 = zext <2 x i1> %icmp.val.1 to <2 x i32> 430 %shl.val.1 = shl nuw <2 x i32> %zext.val.1, <i32 31, i32 31> 431 %xor.val.1 = xor <2 x i32> %shl.val.1, %b.val 432 %bitcast.val.1 = bitcast <2 x i32> %xor.val.1 to <2 x float> 433 %icmp.val.2 = icmp sgt <2 x i32> %c.val, <i32 1199570944, i32 1199570944> 434 %select.val.1 = select <2 x i1> %icmp.val.2, <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> %bitcast.val.1 435 436 store <2 x float> %select.val.1, <2 x float> addrspace(1)* %r 437 ret void 438} 439 440; FUNC-LABEL: setcc_v4i32_expand 441; GCN: v_cmp_gt_i32 442; GCN: v_cmp_gt_i32 443; GCN: v_cmp_gt_i32 444; GCN: v_cmp_gt_i32 445define amdgpu_kernel void @setcc_v4i32_expand( 446 <4 x i32> addrspace(1)* %a, 447 <4 x i32> addrspace(1)* %b, 448 <4 x i32> addrspace(1)* %c, 449 <4 x float> addrspace(1)* %r) { 450entry: 451 %a.val = load <4 x i32>, <4 x i32> addrspace(1)* %a 452 %b.val = load <4 x i32>, <4 x i32> addrspace(1)* %b 453 %c.val = load <4 x i32>, <4 x i32> addrspace(1)* %c 454 455 %icmp.val.1 = icmp sgt <4 x i32> %a.val, <i32 1, i32 1, i32 1, i32 1> 456 %zext.val.1 = zext <4 x i1> %icmp.val.1 to <4 x i32> 457 %shl.val.1 = shl nuw <4 x i32> %zext.val.1, <i32 31, i32 31, i32 31, i32 31> 458 %xor.val.1 = xor <4 x i32> %shl.val.1, %b.val 459 %bitcast.val.1 = bitcast <4 x i32> %xor.val.1 to <4 x float> 460 %icmp.val.2 = icmp sgt <4 x i32> %c.val, <i32 1199570944, i32 1199570944, i32 1199570944, i32 1199570944> 461 %select.val.1 = select <4 x i1> %icmp.val.2, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %bitcast.val.1 462 463 store <4 x float> %select.val.1, <4 x float> addrspace(1)* %r 464 ret void 465} 466 467attributes #0 = { nounwind } 468