1; RUN: llc < %s -mtriple=x86_64-- -mcpu=corei7 -verify-machineinstrs | FileCheck %s
2
3define void @test(i8** %a, i64* %b, i64 %c, i64 %d) nounwind {
4entry:
5  %ptrtoarg4 = load i8*, i8** %a, align 8
6  %brglist1 = getelementptr i8*, i8** %a, i64 1
7  %ptrtoarg25 = load i8*, i8** %brglist1, align 8
8  %0 = load i64, i64* %b, align 8
9  %1 = mul i64 %0, 4
10  %scevgep = getelementptr i8, i8* %ptrtoarg25, i64 %1
11  %2 = mul i64 %d, 4
12  br label %loop.cond
13
14loop.cond:                                        ; preds = %test.exit, %entry
15  %asr.iv6 = phi i8* [ %29, %test.exit ], [ %scevgep, %entry ]
16  %iv = phi i64 [ %0, %entry ], [ %28, %test.exit ]
17  %3 = icmp eq i64 %iv, %c
18  br i1 %3, label %return, label %loop
19
20loop:                                             ; preds = %loop.cond
21  %4 = load i64*, i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
22  %5 = load i64, i64* %4, align 8
23  %vector.size.i = ashr i64 %5, 3
24  %num.vector.wi.i = shl i64 %vector.size.i, 3
25  %6 = icmp eq i64 %vector.size.i, 0
26  br i1 %6, label %scalarIf.i, label %dim_0_vector_pre_head.i
27
28dim_0_vector_pre_head.i:                          ; preds = %loop
29  %7 = trunc i64 %5 to i32
30  %tempvector_func.i = insertelement <8 x i32> undef, i32 %7, i32 0
31  %vectorvector_func.i = shufflevector <8 x i32> %tempvector_func.i, <8 x i32> undef, <8 x i32> zeroinitializer
32  br label %vector_kernel_entry.i
33
34vector_kernel_entry.i:                            ; preds = %vector_kernel_entry.i, %dim_0_vector_pre_head.i
35  %asr.iv9 = phi i8* [ %scevgep10, %vector_kernel_entry.i ], [ %asr.iv6, %dim_0_vector_pre_head.i ]
36  %asr.iv = phi i64 [ %asr.iv.next, %vector_kernel_entry.i ], [ %vector.size.i, %dim_0_vector_pre_head.i ]
37  %8 = addrspacecast i8* %ptrtoarg4 to i32 addrspace(1)*
38  %asr.iv911 = addrspacecast i8* %asr.iv9 to <8 x i32> addrspace(1)*
39  %9 = load <8 x i32>, <8 x i32> addrspace(1)* %asr.iv911, align 4
40  %extract8vector_func.i = extractelement <8 x i32> %9, i32 0
41  %extract9vector_func.i = extractelement <8 x i32> %9, i32 1
42  %extract10vector_func.i = extractelement <8 x i32> %9, i32 2
43  %extract11vector_func.i = extractelement <8 x i32> %9, i32 3
44  %extract12vector_func.i = extractelement <8 x i32> %9, i32 4
45  %extract13vector_func.i = extractelement <8 x i32> %9, i32 5
46  %extract14vector_func.i = extractelement <8 x i32> %9, i32 6
47  %extract15vector_func.i = extractelement <8 x i32> %9, i32 7
48  %10 = atomicrmw min i32 addrspace(1)* %8, i32 %extract8vector_func.i seq_cst
49  %11 = atomicrmw min i32 addrspace(1)* %8, i32 %extract9vector_func.i seq_cst
50  %12 = atomicrmw min i32 addrspace(1)* %8, i32 %extract10vector_func.i seq_cst
51  %13 = atomicrmw min i32 addrspace(1)* %8, i32 %extract11vector_func.i seq_cst
52  %14 = atomicrmw min i32 addrspace(1)* %8, i32 %extract12vector_func.i seq_cst
53  %15 = atomicrmw min i32 addrspace(1)* %8, i32 %extract13vector_func.i seq_cst
54  %16 = atomicrmw min i32 addrspace(1)* %8, i32 %extract14vector_func.i seq_cst
55  %17 = atomicrmw min i32 addrspace(1)* %8, i32 %extract15vector_func.i seq_cst
56  store <8 x i32> %vectorvector_func.i, <8 x i32> addrspace(1)* %asr.iv911, align 4
57  %asr.iv.next = add i64 %asr.iv, -1
58  %scevgep10 = getelementptr i8, i8* %asr.iv9, i64 32
59  %dim_0_vector_cmp.to.max.i = icmp eq i64 %asr.iv.next, 0
60  br i1 %dim_0_vector_cmp.to.max.i, label %scalarIf.i, label %vector_kernel_entry.i
61
62scalarIf.i:                                       ; preds = %vector_kernel_entry.i, %loop
63  %exec_wi.i = phi i64 [ 0, %loop ], [ %num.vector.wi.i, %vector_kernel_entry.i ]
64  %18 = icmp eq i64 %exec_wi.i, %5
65  br i1 %18, label %test.exit, label %dim_0_pre_head.i
66
67dim_0_pre_head.i:                                 ; preds = %scalarIf.i
68  %19 = load i64*, i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
69  %20 = load i64, i64* %19, align 8
70  %21 = trunc i64 %20 to i32
71  %22 = mul i64 %vector.size.i, 8
72  br label %scalar_kernel_entry.i
73
74scalar_kernel_entry.i:                            ; preds = %scalar_kernel_entry.i, %dim_0_pre_head.i
75  %asr.iv12 = phi i64 [ %asr.iv.next13, %scalar_kernel_entry.i ], [ %22, %dim_0_pre_head.i ]
76  %23 = addrspacecast i8* %asr.iv6 to i32 addrspace(1)*
77  %24 = addrspacecast i8* %ptrtoarg4 to i32 addrspace(1)*
78  %scevgep16 = getelementptr i32, i32 addrspace(1)* %23, i64 %asr.iv12
79  %25 = load i32, i32 addrspace(1)* %scevgep16, align 4
80  %26 = atomicrmw min i32 addrspace(1)* %24, i32 %25 seq_cst
81  %scevgep15 = getelementptr i32, i32 addrspace(1)* %23, i64 %asr.iv12
82  store i32 %21, i32 addrspace(1)* %scevgep15, align 4
83  %asr.iv.next13 = add i64 %asr.iv12, 1
84  %dim_0_cmp.to.max.i = icmp eq i64 %5, %asr.iv.next13
85  br i1 %dim_0_cmp.to.max.i, label %test.exit, label %scalar_kernel_entry.i
86
87test.exit:                     ; preds = %scalar_kernel_entry.i, %scalarIf.i
88  %27 = bitcast i8* %asr.iv6 to i1*
89  %28 = add i64 %iv, %d
90  store i64 %28, i64* %b, align 8
91  %scevgep8 = getelementptr i1, i1* %27, i64 %2
92  %29 = bitcast i1* %scevgep8 to i8*
93  br label %loop.cond
94
95return:                                           ; preds = %loop.cond
96  store i64 %0, i64* %b, align 8
97  ret void
98}
99
100; CHECK: test
101; CHECK: decq
102; CHECK-NOT: cmpxchgl
103; CHECK: jne
104; CHECK: ret
105