1; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 -disable-ppc-ilp-pref | FileCheck %s
2; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 -disable-ppc-ilp-pref | FileCheck %s
3
4; ModuleID = 'tsc.c'
5target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
6target triple = "powerpc64-unknown-linux-gnu"
7
8@a = common global [32000 x float] zeroinitializer, align 16
9@b = common global [32000 x float] zeroinitializer, align 16
10@c = common global [32000 x float] zeroinitializer, align 16
11@d = common global [32000 x float] zeroinitializer, align 16
12@e = common global [32000 x float] zeroinitializer, align 16
13@aa = common global [256 x [256 x float]] zeroinitializer, align 16
14@bb = common global [256 x [256 x float]] zeroinitializer, align 16
15@cc = common global [256 x [256 x float]] zeroinitializer, align 16
16
17@.str11 = private unnamed_addr constant [6 x i8] c"s122 \00", align 1
18@.str152 = private unnamed_addr constant [14 x i8] c"S122\09 %.2f \09\09\00", align 1
19
20declare i32 @printf(i8* nocapture, ...) nounwind
21declare i32 @init(i8* %name) nounwind
22declare i64 @clock() nounwind
23declare i32 @dummy(float*, float*, float*, float*, float*, [256 x float]*, [256 x float]*, [256 x float]*, float)
24declare void @check(i32 %name) nounwind
25
26; CHECK: mfcr
27; CHECK: mtcr
28
29define i32 @s122(i32 %n1, i32 %n3) nounwind {
30entry:
31  %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str11, i64 0, i64 0))
32  %call1 = tail call i64 @clock() nounwind
33  %sub = add nsw i32 %n1, -1
34  %cmp316 = icmp slt i32 %sub, 32000
35  br i1 %cmp316, label %entry.split.us, label %for.end.7
36
37entry.split.us:                                   ; preds = %entry
38  %0 = sext i32 %sub to i64
39  %1 = sext i32 %n3 to i64
40  br label %for.body4.lr.ph.us
41
42for.body4.us:                                     ; preds = %for.body4.lr.ph.us, %for.body4.us
43  %indvars.iv20 = phi i64 [ 0, %for.body4.lr.ph.us ], [ %indvars.iv.next21, %for.body4.us ]
44  %indvars.iv = phi i64 [ %0, %for.body4.lr.ph.us ], [ %indvars.iv.next, %for.body4.us ]
45  %indvars.iv.next21 = add i64 %indvars.iv20, 1
46  %sub5.us = sub i64 31999, %indvars.iv20
47  %sext = shl i64 %sub5.us, 32
48  %idxprom.us = ashr exact i64 %sext, 32
49  %arrayidx.us = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us
50  %2 = load float* %arrayidx.us, align 4
51  %arrayidx7.us = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv
52  %3 = load float* %arrayidx7.us, align 4
53  %add8.us = fadd float %3, %2
54  store float %add8.us, float* %arrayidx7.us, align 4
55  %indvars.iv.next = add i64 %indvars.iv, %1
56  %4 = trunc i64 %indvars.iv.next to i32
57  %cmp3.us = icmp slt i32 %4, 32000
58  br i1 %cmp3.us, label %for.body4.us, label %for.body4.lr.ph.us.1
59
60for.body4.lr.ph.us:                               ; preds = %entry.split.us, %for.end.us.4
61  %nl.019.us = phi i32 [ 0, %entry.split.us ], [ %inc.us.4, %for.end.us.4 ]
62  br label %for.body4.us
63
64for.end12:                                        ; preds = %for.end.7, %for.end.us.4
65  %call13 = tail call i64 @clock() nounwind
66  %sub14 = sub nsw i64 %call13, %call1
67  %conv = sitofp i64 %sub14 to double
68  %div = fdiv double %conv, 1.000000e+06
69  %call15 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str152, i64 0, i64 0), double %div) nounwind
70  tail call void @check(i32 1)
71  ret i32 0
72
73for.body4.lr.ph.us.1:                             ; preds = %for.body4.us
74  %call10.us = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
75  br label %for.body4.us.1
76
77for.body4.us.1:                                   ; preds = %for.body4.us.1, %for.body4.lr.ph.us.1
78  %indvars.iv20.1 = phi i64 [ 0, %for.body4.lr.ph.us.1 ], [ %indvars.iv.next21.1, %for.body4.us.1 ]
79  %indvars.iv.1 = phi i64 [ %0, %for.body4.lr.ph.us.1 ], [ %indvars.iv.next.1, %for.body4.us.1 ]
80  %indvars.iv.next21.1 = add i64 %indvars.iv20.1, 1
81  %sub5.us.1 = sub i64 31999, %indvars.iv20.1
82  %sext23 = shl i64 %sub5.us.1, 32
83  %idxprom.us.1 = ashr exact i64 %sext23, 32
84  %arrayidx.us.1 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.1
85  %5 = load float* %arrayidx.us.1, align 4
86  %arrayidx7.us.1 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.1
87  %6 = load float* %arrayidx7.us.1, align 4
88  %add8.us.1 = fadd float %6, %5
89  store float %add8.us.1, float* %arrayidx7.us.1, align 4
90  %indvars.iv.next.1 = add i64 %indvars.iv.1, %1
91  %7 = trunc i64 %indvars.iv.next.1 to i32
92  %cmp3.us.1 = icmp slt i32 %7, 32000
93  br i1 %cmp3.us.1, label %for.body4.us.1, label %for.body4.lr.ph.us.2
94
95for.body4.lr.ph.us.2:                             ; preds = %for.body4.us.1
96  %call10.us.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
97  br label %for.body4.us.2
98
99for.body4.us.2:                                   ; preds = %for.body4.us.2, %for.body4.lr.ph.us.2
100  %indvars.iv20.2 = phi i64 [ 0, %for.body4.lr.ph.us.2 ], [ %indvars.iv.next21.2, %for.body4.us.2 ]
101  %indvars.iv.2 = phi i64 [ %0, %for.body4.lr.ph.us.2 ], [ %indvars.iv.next.2, %for.body4.us.2 ]
102  %indvars.iv.next21.2 = add i64 %indvars.iv20.2, 1
103  %sub5.us.2 = sub i64 31999, %indvars.iv20.2
104  %sext24 = shl i64 %sub5.us.2, 32
105  %idxprom.us.2 = ashr exact i64 %sext24, 32
106  %arrayidx.us.2 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.2
107  %8 = load float* %arrayidx.us.2, align 4
108  %arrayidx7.us.2 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.2
109  %9 = load float* %arrayidx7.us.2, align 4
110  %add8.us.2 = fadd float %9, %8
111  store float %add8.us.2, float* %arrayidx7.us.2, align 4
112  %indvars.iv.next.2 = add i64 %indvars.iv.2, %1
113  %10 = trunc i64 %indvars.iv.next.2 to i32
114  %cmp3.us.2 = icmp slt i32 %10, 32000
115  br i1 %cmp3.us.2, label %for.body4.us.2, label %for.body4.lr.ph.us.3
116
117for.body4.lr.ph.us.3:                             ; preds = %for.body4.us.2
118  %call10.us.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
119  br label %for.body4.us.3
120
121for.body4.us.3:                                   ; preds = %for.body4.us.3, %for.body4.lr.ph.us.3
122  %indvars.iv20.3 = phi i64 [ 0, %for.body4.lr.ph.us.3 ], [ %indvars.iv.next21.3, %for.body4.us.3 ]
123  %indvars.iv.3 = phi i64 [ %0, %for.body4.lr.ph.us.3 ], [ %indvars.iv.next.3, %for.body4.us.3 ]
124  %indvars.iv.next21.3 = add i64 %indvars.iv20.3, 1
125  %sub5.us.3 = sub i64 31999, %indvars.iv20.3
126  %sext25 = shl i64 %sub5.us.3, 32
127  %idxprom.us.3 = ashr exact i64 %sext25, 32
128  %arrayidx.us.3 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.3
129  %11 = load float* %arrayidx.us.3, align 4
130  %arrayidx7.us.3 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.3
131  %12 = load float* %arrayidx7.us.3, align 4
132  %add8.us.3 = fadd float %12, %11
133  store float %add8.us.3, float* %arrayidx7.us.3, align 4
134  %indvars.iv.next.3 = add i64 %indvars.iv.3, %1
135  %13 = trunc i64 %indvars.iv.next.3 to i32
136  %cmp3.us.3 = icmp slt i32 %13, 32000
137  br i1 %cmp3.us.3, label %for.body4.us.3, label %for.body4.lr.ph.us.4
138
139for.body4.lr.ph.us.4:                             ; preds = %for.body4.us.3
140  %call10.us.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
141  br label %for.body4.us.4
142
143for.body4.us.4:                                   ; preds = %for.body4.us.4, %for.body4.lr.ph.us.4
144  %indvars.iv20.4 = phi i64 [ 0, %for.body4.lr.ph.us.4 ], [ %indvars.iv.next21.4, %for.body4.us.4 ]
145  %indvars.iv.4 = phi i64 [ %0, %for.body4.lr.ph.us.4 ], [ %indvars.iv.next.4, %for.body4.us.4 ]
146  %indvars.iv.next21.4 = add i64 %indvars.iv20.4, 1
147  %sub5.us.4 = sub i64 31999, %indvars.iv20.4
148  %sext26 = shl i64 %sub5.us.4, 32
149  %idxprom.us.4 = ashr exact i64 %sext26, 32
150  %arrayidx.us.4 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.4
151  %14 = load float* %arrayidx.us.4, align 4
152  %arrayidx7.us.4 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.4
153  %15 = load float* %arrayidx7.us.4, align 4
154  %add8.us.4 = fadd float %15, %14
155  store float %add8.us.4, float* %arrayidx7.us.4, align 4
156  %indvars.iv.next.4 = add i64 %indvars.iv.4, %1
157  %16 = trunc i64 %indvars.iv.next.4 to i32
158  %cmp3.us.4 = icmp slt i32 %16, 32000
159  br i1 %cmp3.us.4, label %for.body4.us.4, label %for.end.us.4
160
161for.end.us.4:                                     ; preds = %for.body4.us.4
162  %call10.us.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
163  %inc.us.4 = add nsw i32 %nl.019.us, 5
164  %exitcond.4 = icmp eq i32 %inc.us.4, 200000
165  br i1 %exitcond.4, label %for.end12, label %for.body4.lr.ph.us
166
167for.end.7:                                        ; preds = %entry, %for.end.7
168  %nl.019 = phi i32 [ %inc.7, %for.end.7 ], [ 0, %entry ]
169  %call10 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
170  %call10.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
171  %call10.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
172  %call10.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
173  %call10.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
174  %call10.5 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
175  %call10.6 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
176  %call10.7 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
177  %inc.7 = add nsw i32 %nl.019, 8
178  %exitcond.7 = icmp eq i32 %inc.7, 200000
179  br i1 %exitcond.7, label %for.end12, label %for.end.7
180}
181
182declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
183
184declare i32 @puts(i8* nocapture) nounwind
185
186!3 = !{!"branch_weights", i32 64, i32 4}
187