1
2 // loop3 generated by makeloops.py Thu Jun 30 16:44:56 2011
3
4 #include <blitz/vector2.h>
5 #include <blitz/array.h>
6 #include <random/uniform.h>
7 #include <blitz/benchext.h>
8
9 #ifdef BZ_HAVE_VALARRAY
10 #define BENCHMARK_VALARRAY
11 #endif
12
13 #ifdef BENCHMARK_VALARRAY
14 #include <valarray>
15 #endif
16
17 namespace blitz {
18 extern void sink();
19 }
20
21 using namespace blitz;
22 using namespace std;
23
24 #if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES)
25 #define loop3_f77 loop3_f77_
26 #define loop3_f77overhead loop3_f77overhead_
27 #define loop3_f90 loop3_f90_
28 #define loop3_f90overhead loop3_f90overhead_
29 #elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES)
30 #define loop3_f77 loop3_f77__
31 #define loop3_f77overhead loop3_f77overhead__
32 #define loop3_f90 loop3_f90__
33 #define loop3_f90overhead loop3_f90overhead__
34 #elif defined(BZ_FORTRAN_SYMBOLS_CAPS)
35 #define loop3_f77 LOOP3_F77
36 #define loop3_f77overhead LOOP3_F77OVERHEAD
37 #define loop3_f90 LOOP3_F90
38 #define loop3_f90overhead LOOP3_F90OVERHEAD
39 #endif
40
41 extern "C" {
42 void loop3_f77(const int& N, double* x, double* y, const double& a);
43 void loop3_f77overhead(const int& N, double* x, double* y, const double& a);
44 void loop3_f90(const int& N, double* x, double* y, const double& a);
45 void loop3_f90overhead(const int& N, double* x, double* y, const double& a);
46
47 }
48
49 void VectorVersion(BenchmarkExt<int>& bench, double a);
50 void ArrayVersion(BenchmarkExt<int>& bench, double a);
51 void ArrayVersion_unaligned(BenchmarkExt<int>& bench, double a);
52 void ArrayVersion_misaligned(BenchmarkExt<int>& bench, double a);
53 void ArrayVersion_index(BenchmarkExt<int>& bench, double a);
54 void doTinyVectorVersion(BenchmarkExt<int>& bench, double a);
55 void F77Version(BenchmarkExt<int>& bench, double a);
56 #ifdef FORTRAN_90
57 void F90Version(BenchmarkExt<int>& bench, double a);
58 #endif
59 #ifdef BENCHMARK_VALARRAY
60 void ValarrayVersion(BenchmarkExt<int>& bench, double a);
61 #endif
62
63 const int numSizes = 80;
64 const bool runvector=false; // no point as long as Vector is Array<1>
65
main()66 int main()
67 {
68 int numBenchmarks = 5;
69 if (runvector) numBenchmarks++;
70 #ifdef BENCHMARK_VALARRAY
71 numBenchmarks++;
72 #endif
73 #ifdef FORTRAN_90
74 numBenchmarks++;
75 #endif
76
77 BenchmarkExt<int> bench("loop3: $y = $y + a*$x", numBenchmarks);
78
79 bench.setNumParameters(numSizes);
80
81 Array<int,1> parameters(numSizes);
82 Array<long,1> iters(numSizes);
83 Array<double,1> flops(numSizes);
84
85 parameters=pow(pow(2.,0.25),tensor::i)+tensor::i;
86 flops = 2 * parameters;
87 iters = 100000000L / flops;
88 iters = where(iters<2, 2, iters);
89 cout << iters << endl;
90
91 bench.setParameterVector(parameters);
92 bench.setIterations(iters);
93 bench.setOpsPerIteration(flops);
94 bench.setDependentVariable("flops");
95 bench.beginBenchmarking();
96
97 double a = 0.39123982498157938742;
98
99
100 ArrayVersion(bench, a);
101 ArrayVersion_unaligned(bench, a);
102 ArrayVersion_misaligned(bench, a);
103 ArrayVersion_index(bench, a);
104 //doTinyVectorVersion(bench, a);
105 F77Version(bench, a);
106 #ifdef FORTRAN_90
107 F90Version(bench, a);
108 #endif
109 #ifdef BENCHMARK_VALARRAY
110 ValarrayVersion(bench, a);
111 #endif
112
113 if(runvector)
114 VectorVersion(bench, a);
115
116 bench.endBenchmarking();
117
118 bench.saveMatlabGraph("loop3.m");
119 return 0;
120 }
121
122 template<class T>
initializeRandomDouble(T * data,int numElements,int stride=1)123 void initializeRandomDouble(T* data, int numElements, int stride = 1)
124 {
125 ranlib::Uniform<T> rnd;
126
127 for (int i=0; i < numElements; ++i)
128 data[size_t(i*stride)] = rnd.random();
129 }
130
131 template<class T>
initializeRandomDouble(valarray<T> & data,int numElements,int stride=1)132 void initializeRandomDouble(valarray<T>& data, int numElements, int stride = 1)
133 {
134 ranlib::Uniform<T> rnd;
135
136 for (int i=0; i < numElements; ++i)
137 data[size_t(i*stride)] = rnd.random();
138 }
139
VectorVersion(BenchmarkExt<int> & bench,double a)140 void VectorVersion(BenchmarkExt<int>& bench, double a)
141 {
142 bench.beginImplementation("Vector<T>");
143
144 while (!bench.doneImplementationBenchmark())
145 {
146 int N = bench.getParameter();
147 long iters = bench.getIterations();
148
149 cout << bench.currentImplementation() << ": N = " << N << endl;
150
151 Vector<double> x(N);
152 initializeRandomDouble(x.data(), N);
153 Vector<double> y(N);
154 initializeRandomDouble(y.data(), N);
155
156
157 bench.start();
158 for (long i=0; i < iters; ++i)
159 {
160 y = y + a*x;
161 sink();
162 }
163 bench.stop();
164
165 bench.startOverhead();
166 for (long i=0; i < iters; ++i) {
167 sink();
168 }
169
170 bench.stopOverhead();
171 }
172
173 bench.endImplementation();
174 }
175
176
ArrayVersion(BenchmarkExt<int> & bench,double a)177 void ArrayVersion(BenchmarkExt<int>& bench, double a)
178 {
179 bench.beginImplementation("Array<T,1>");
180
181 while (!bench.doneImplementationBenchmark())
182 {
183 int N = bench.getParameter();
184 long iters = bench.getIterations();
185
186 cout << bench.currentImplementation() << ": N = " << N << endl;
187
188 Array<double,1> x(N);
189 initializeRandomDouble(x.dataFirst(), N);
190 Array<double,1> y(N);
191 initializeRandomDouble(y.dataFirst(), N);
192
193
194 bench.start();
195 for (long i=0; i < iters; ++i)
196 {
197 y = y + a*x;
198 sink();
199 }
200 bench.stop();
201
202 bench.startOverhead();
203 for (long i=0; i < iters; ++i) {
204 sink();
205 }
206
207 bench.stopOverhead();
208 }
209
210 bench.endImplementation();
211 }
212
213
ArrayVersion_index(BenchmarkExt<int> & bench,double a)214 void ArrayVersion_index(BenchmarkExt<int>& bench, double a)
215 {
216 bench.beginImplementation("Array<T,1> (indexexpr.)");
217
218 while (!bench.doneImplementationBenchmark())
219 {
220 int N = bench.getParameter();
221 long iters = bench.getIterations();
222
223 cout << bench.currentImplementation() << ": N = " << N << endl;
224
225 Array<double,1> x(N);
226 initializeRandomDouble(x.dataFirst(), N);
227 Array<double,1> y(N);
228 initializeRandomDouble(y.dataFirst(), N);
229
230
231 bench.start();
232 for (long i=0; i < iters; ++i)
233 {
234 y = y(tensor::i) + a*x(tensor::i);;
235 sink();
236 }
237 bench.stop();
238
239 bench.startOverhead();
240 for (long i=0; i < iters; ++i) {
241 sink();
242 }
243
244 bench.stopOverhead();
245 }
246
247 bench.endImplementation();
248 }
249
ArrayVersion_unaligned(BenchmarkExt<int> & bench,double a)250 void ArrayVersion_unaligned(BenchmarkExt<int>& bench, double a)
251 {
252 bench.beginImplementation("Array<T,1> (unal.)");
253
254 while (!bench.doneImplementationBenchmark())
255 {
256 int N = bench.getParameter();
257 long iters = bench.getIterations();
258
259 cout << bench.currentImplementation() << ": N = " << N << endl;
260
261
262 Array<double,1> xfill(N+1);
263 Array<double,1> x(xfill(Range(1,N)));
264 initializeRandomDouble(x.dataFirst(), N);
265
266 Array<double,1> yfill(N+1);
267 Array<double,1> y(yfill(Range(1,N)));
268 initializeRandomDouble(y.dataFirst(), N);
269
270
271 bench.start();
272 for (long i=0; i < iters; ++i)
273 {
274 y = y + a*x;
275 sink();
276 }
277 bench.stop();
278
279 bench.startOverhead();
280 for (long i=0; i < iters; ++i) {
281 sink();
282 }
283
284 bench.stopOverhead();
285 }
286
287 bench.endImplementation();
288 }
289
ArrayVersion_misaligned(BenchmarkExt<int> & bench,double a)290 void ArrayVersion_misaligned(BenchmarkExt<int>& bench, double a)
291 {
292 bench.beginImplementation("Array<T,1> (misal.)");
293
294 while (!bench.doneImplementationBenchmark())
295 {
296 int N = bench.getParameter();
297 long iters = bench.getIterations();
298
299 cout << bench.currentImplementation() << ": N = " << N << endl;
300
301
302 Array<double,1> xfill(N+2);
303 Array<double,1> x(xfill(Range(0,N+0-1)));
304 initializeRandomDouble(x.dataFirst(), N);
305
306 Array<double,1> yfill(N+2);
307 Array<double,1> y(yfill(Range(1,N+1-1)));
308 initializeRandomDouble(y.dataFirst(), N);
309
310
311 bench.start();
312 for (long i=0; i < iters; ++i)
313 {
314 y = y + a*x;
315 sink();
316 }
317 bench.stop();
318
319 bench.startOverhead();
320 for (long i=0; i < iters; ++i) {
321 sink();
322 }
323
324 bench.stopOverhead();
325 }
326
327 bench.endImplementation();
328 }
329
330 #ifdef BENCHMARK_VALARRAY
ValarrayVersion(BenchmarkExt<int> & bench,double a)331 void ValarrayVersion(BenchmarkExt<int>& bench, double a)
332 {
333 bench.beginImplementation("valarray<T>");
334
335 while (!bench.doneImplementationBenchmark())
336 {
337 int N = bench.getParameter();
338 cout << bench.currentImplementation() << ": N = " << N << endl;
339
340 long iters = bench.getIterations();
341
342 valarray<double> x(N);
343 initializeRandomDouble(x, N);
344 valarray<double> y(N);
345 initializeRandomDouble(y, N);
346
347
348 bench.start();
349 for (long i=0; i < iters; ++i)
350 {
351 y = y + a*x;
352 sink();
353 }
354 bench.stop();
355
356 bench.startOverhead();
357 for (long i=0; i < iters; ++i) {
358 sink();
359 }
360 bench.stopOverhead();
361 }
362
363 bench.endImplementation();
364 }
365 #endif
366
F77Version(BenchmarkExt<int> & bench,double a)367 void F77Version(BenchmarkExt<int>& bench, double a)
368 {
369 bench.beginImplementation("Fortran 77");
370
371 while (!bench.doneImplementationBenchmark())
372 {
373 int N = bench.getParameter();
374 cout << bench.currentImplementation() << ": N = " << N << endl;
375
376 int iters = bench.getIterations();
377
378 double* x = new double[N];
379 initializeRandomDouble(x, N);
380 double* y = new double[N];
381 initializeRandomDouble(y, N);
382
383
384 bench.start();
385 for (int iter=0; iter < iters; ++iter)
386 loop3_f77(N, x, y, a);
387 bench.stop();
388
389 bench.startOverhead();
390 for (int iter=0; iter < iters; ++iter)
391 loop3_f77overhead(N, x, y, a);
392
393 bench.stopOverhead();
394
395 delete [] x;
396 delete [] y;
397
398 }
399
400 bench.endImplementation();
401 }
402
403 #ifdef FORTRAN_90
F90Version(BenchmarkExt<int> & bench,double a)404 void F90Version(BenchmarkExt<int>& bench, double a)
405 {
406 bench.beginImplementation("Fortran 90");
407
408 while (!bench.doneImplementationBenchmark())
409 {
410 int N = bench.getParameter();
411 cout << bench.currentImplementation() << ": N = " << N << endl;
412
413 int iters = bench.getIterations();
414
415 double* x = new double[N];
416 initializeRandomDouble(x, N);
417 double* y = new double[N];
418 initializeRandomDouble(y, N);
419
420
421 bench.start();
422 for (int iter=0; iter < iters; ++iter)
423 loop3_f90(N, x, y, a);
424 bench.stop();
425
426 bench.startOverhead();
427 for (int iter=0; iter < iters; ++iter)
428 loop3_f90overhead(N, x, y, a);
429
430 bench.stopOverhead();
431 delete [] x;
432 delete [] y;
433
434 }
435
436 bench.endImplementation();
437 }
438 #endif
439
440