1 
2 // loop10 generated by makeloops.py Thu Jun 30 16:44:56 2011
3 
4 #include <blitz/vector2.h>
5 #include <blitz/array.h>
6 #include <random/uniform.h>
7 #include <blitz/benchext.h>
8 
9 #ifdef BZ_HAVE_VALARRAY
10  #define BENCHMARK_VALARRAY
11 #endif
12 
13 #ifdef BENCHMARK_VALARRAY
14 #include <valarray>
15 #endif
16 
17 namespace blitz {
18 extern void sink();
19 }
20 
21 using namespace blitz;
22 using namespace std;
23 
24 #if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES)
25  #define loop10_f77 loop10_f77_
26  #define loop10_f77overhead loop10_f77overhead_
27  #define loop10_f90 loop10_f90_
28  #define loop10_f90overhead loop10_f90overhead_
29 #elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES)
30  #define loop10_f77 loop10_f77__
31  #define loop10_f77overhead loop10_f77overhead__
32  #define loop10_f90 loop10_f90__
33  #define loop10_f90overhead loop10_f90overhead__
34 #elif defined(BZ_FORTRAN_SYMBOLS_CAPS)
35  #define loop10_f77 LOOP10_F77
36  #define loop10_f77overhead LOOP10_F77OVERHEAD
37  #define loop10_f90 LOOP10_F90
38  #define loop10_f90overhead LOOP10_F90OVERHEAD
39 #endif
40 
41 extern "C" {
42   void loop10_f77(const int& N, double* x, double* a, double* b, double* c, const double& u);
43   void loop10_f77overhead(const int& N, double* x, double* a, double* b, double* c, const double& u);
44   void loop10_f90(const int& N, double* x, double* a, double* b, double* c, const double& u);
45   void loop10_f90overhead(const int& N, double* x, double* a, double* b, double* c, const double& u);
46 
47 }
48 
49 void VectorVersion(BenchmarkExt<int>& bench, double u);
50 void ArrayVersion(BenchmarkExt<int>& bench, double u);
51 void ArrayVersion_unaligned(BenchmarkExt<int>& bench, double u);
52 void ArrayVersion_misaligned(BenchmarkExt<int>& bench, double u);
53 void ArrayVersion_index(BenchmarkExt<int>& bench, double u);
54 void doTinyVectorVersion(BenchmarkExt<int>& bench, double u);
55 void F77Version(BenchmarkExt<int>& bench, double u);
56 #ifdef FORTRAN_90
57 void F90Version(BenchmarkExt<int>& bench, double u);
58 #endif
59 #ifdef BENCHMARK_VALARRAY
60 void ValarrayVersion(BenchmarkExt<int>& bench, double u);
61 #endif
62 
63 const int numSizes = 80;
64 const bool runvector=false; // no point as long as Vector is Array<1>
65 
main()66 int main()
67 {
68     int numBenchmarks = 5;
69     if (runvector) numBenchmarks++;
70 #ifdef BENCHMARK_VALARRAY
71     numBenchmarks++;
72 #endif
73 #ifdef FORTRAN_90
74     numBenchmarks++;
75 #endif
76 
77     BenchmarkExt<int> bench("loop10: $x = u+$a+$b+$c", numBenchmarks);
78 
79     bench.setNumParameters(numSizes);
80 
81     Array<int,1> parameters(numSizes);
82     Array<long,1> iters(numSizes);
83     Array<double,1> flops(numSizes);
84 
85     parameters=pow(pow(2.,0.25),tensor::i)+tensor::i;
86     flops = 3 * parameters;
87     iters = 100000000L / flops;
88     iters = where(iters<2, 2, iters);
89     cout << iters << endl;
90 
91     bench.setParameterVector(parameters);
92     bench.setIterations(iters);
93     bench.setOpsPerIteration(flops);
94     bench.setDependentVariable("flops");
95     bench.beginBenchmarking();
96 
97     double u = 0.39123982498157938742;
98 
99 
100     ArrayVersion(bench, u);
101     ArrayVersion_unaligned(bench, u);
102     ArrayVersion_misaligned(bench, u);
103     ArrayVersion_index(bench, u);
104     //doTinyVectorVersion(bench, u);
105     F77Version(bench, u);
106 #ifdef FORTRAN_90
107     F90Version(bench, u);
108 #endif
109 #ifdef BENCHMARK_VALARRAY
110     ValarrayVersion(bench, u);
111 #endif
112 
113     if(runvector)
114       VectorVersion(bench, u);
115 
116     bench.endBenchmarking();
117 
118     bench.saveMatlabGraph("loop10.m");
119     return 0;
120 }
121 
122 template<class T>
initializeRandomDouble(T * data,int numElements,int stride=1)123 void initializeRandomDouble(T* data, int numElements, int stride = 1)
124 {
125     ranlib::Uniform<T> rnd;
126 
127     for (int i=0; i < numElements; ++i)
128         data[size_t(i*stride)] = rnd.random();
129 }
130 
131 template<class T>
initializeRandomDouble(valarray<T> & data,int numElements,int stride=1)132 void initializeRandomDouble(valarray<T>& data, int numElements, int stride = 1)
133 {
134     ranlib::Uniform<T> rnd;
135 
136     for (int i=0; i < numElements; ++i)
137         data[size_t(i*stride)] = rnd.random();
138 }
139 
VectorVersion(BenchmarkExt<int> & bench,double u)140 void VectorVersion(BenchmarkExt<int>& bench, double u)
141 {
142     bench.beginImplementation("Vector<T>");
143 
144     while (!bench.doneImplementationBenchmark())
145     {
146         int N = bench.getParameter();
147         long iters = bench.getIterations();
148 
149         cout << bench.currentImplementation() << ": N = " << N << endl;
150 
151         Vector<double> x(N);
152         initializeRandomDouble(x.data(), N);
153         Vector<double> a(N);
154         initializeRandomDouble(a.data(), N);
155         Vector<double> b(N);
156         initializeRandomDouble(b.data(), N);
157         Vector<double> c(N);
158         initializeRandomDouble(c.data(), N);
159 
160 
161         bench.start();
162         for (long i=0; i < iters; ++i)
163         {
164             x = u+a+b+c;
165             sink();
166         }
167         bench.stop();
168 
169         bench.startOverhead();
170         for (long i=0; i < iters; ++i) {
171             sink();
172 	}
173 
174         bench.stopOverhead();
175     }
176 
177     bench.endImplementation();
178 }
179 
180 
ArrayVersion(BenchmarkExt<int> & bench,double u)181   void ArrayVersion(BenchmarkExt<int>& bench, double u)
182 {
183     bench.beginImplementation("Array<T,1>");
184 
185     while (!bench.doneImplementationBenchmark())
186     {
187         int N = bench.getParameter();
188         long iters = bench.getIterations();
189 
190         cout << bench.currentImplementation() << ": N = " << N << endl;
191 
192         Array<double,1> x(N);
193         initializeRandomDouble(x.dataFirst(), N);
194         Array<double,1> a(N);
195         initializeRandomDouble(a.dataFirst(), N);
196         Array<double,1> b(N);
197         initializeRandomDouble(b.dataFirst(), N);
198         Array<double,1> c(N);
199         initializeRandomDouble(c.dataFirst(), N);
200 
201 
202         bench.start();
203         for (long i=0; i < iters; ++i)
204         {
205             x = u+a+b+c;
206             sink();
207         }
208         bench.stop();
209 
210         bench.startOverhead();
211         for (long i=0; i < iters; ++i) {
212             sink();
213 	}
214 
215         bench.stopOverhead();
216     }
217 
218     bench.endImplementation();
219 }
220 
221 
ArrayVersion_index(BenchmarkExt<int> & bench,double u)222   void ArrayVersion_index(BenchmarkExt<int>& bench, double u)
223 {
224     bench.beginImplementation("Array<T,1> (indexexpr.)");
225 
226     while (!bench.doneImplementationBenchmark())
227     {
228         int N = bench.getParameter();
229         long iters = bench.getIterations();
230 
231         cout << bench.currentImplementation() << ": N = " << N << endl;
232 
233         Array<double,1> x(N);
234         initializeRandomDouble(x.dataFirst(), N);
235         Array<double,1> a(N);
236         initializeRandomDouble(a.dataFirst(), N);
237         Array<double,1> b(N);
238         initializeRandomDouble(b.dataFirst(), N);
239         Array<double,1> c(N);
240         initializeRandomDouble(c.dataFirst(), N);
241 
242 
243         bench.start();
244         for (long i=0; i < iters; ++i)
245         {
246             x = u+a(tensor::i)+b(tensor::i)+c(tensor::i);;
247             sink();
248         }
249         bench.stop();
250 
251         bench.startOverhead();
252         for (long i=0; i < iters; ++i) {
253             sink();
254 	}
255 
256         bench.stopOverhead();
257     }
258 
259     bench.endImplementation();
260 }
261 
ArrayVersion_unaligned(BenchmarkExt<int> & bench,double u)262   void ArrayVersion_unaligned(BenchmarkExt<int>& bench, double u)
263 {
264     bench.beginImplementation("Array<T,1> (unal.)");
265 
266     while (!bench.doneImplementationBenchmark())
267     {
268         int N = bench.getParameter();
269         long iters = bench.getIterations();
270 
271         cout << bench.currentImplementation() << ": N = " << N << endl;
272 
273 
274     Array<double,1> xfill(N+1);
275     Array<double,1> x(xfill(Range(1,N)));
276     initializeRandomDouble(x.dataFirst(), N);
277 
278     Array<double,1> afill(N+1);
279     Array<double,1> a(afill(Range(1,N)));
280     initializeRandomDouble(a.dataFirst(), N);
281 
282     Array<double,1> bfill(N+1);
283     Array<double,1> b(bfill(Range(1,N)));
284     initializeRandomDouble(b.dataFirst(), N);
285 
286     Array<double,1> cfill(N+1);
287     Array<double,1> c(cfill(Range(1,N)));
288     initializeRandomDouble(c.dataFirst(), N);
289 
290 
291         bench.start();
292         for (long i=0; i < iters; ++i)
293         {
294             x = u+a+b+c;
295             sink();
296         }
297         bench.stop();
298 
299         bench.startOverhead();
300         for (long i=0; i < iters; ++i) {
301             sink();
302 	}
303 
304         bench.stopOverhead();
305     }
306 
307     bench.endImplementation();
308 }
309 
ArrayVersion_misaligned(BenchmarkExt<int> & bench,double u)310   void ArrayVersion_misaligned(BenchmarkExt<int>& bench, double u)
311 {
312     bench.beginImplementation("Array<T,1> (misal.)");
313 
314     while (!bench.doneImplementationBenchmark())
315     {
316         int N = bench.getParameter();
317         long iters = bench.getIterations();
318 
319         cout << bench.currentImplementation() << ": N = " << N << endl;
320 
321 
322     Array<double,1> xfill(N+4);
323     Array<double,1> x(xfill(Range(0,N+0-1)));
324     initializeRandomDouble(x.dataFirst(), N);
325 
326     Array<double,1> afill(N+4);
327     Array<double,1> a(afill(Range(1,N+1-1)));
328     initializeRandomDouble(a.dataFirst(), N);
329 
330     Array<double,1> bfill(N+4);
331     Array<double,1> b(bfill(Range(2,N+2-1)));
332     initializeRandomDouble(b.dataFirst(), N);
333 
334     Array<double,1> cfill(N+4);
335     Array<double,1> c(cfill(Range(3,N+3-1)));
336     initializeRandomDouble(c.dataFirst(), N);
337 
338 
339         bench.start();
340         for (long i=0; i < iters; ++i)
341         {
342             x = u+a+b+c;
343             sink();
344         }
345         bench.stop();
346 
347         bench.startOverhead();
348         for (long i=0; i < iters; ++i) {
349             sink();
350 	}
351 
352         bench.stopOverhead();
353     }
354 
355     bench.endImplementation();
356 }
357 
358 #ifdef BENCHMARK_VALARRAY
ValarrayVersion(BenchmarkExt<int> & bench,double u)359 void ValarrayVersion(BenchmarkExt<int>& bench, double u)
360 {
361     bench.beginImplementation("valarray<T>");
362 
363     while (!bench.doneImplementationBenchmark())
364     {
365         int N = bench.getParameter();
366         cout << bench.currentImplementation() << ": N = " << N << endl;
367 
368         long iters = bench.getIterations();
369 
370         valarray<double> x(N);
371         initializeRandomDouble(x, N);
372         valarray<double> a(N);
373         initializeRandomDouble(a, N);
374         valarray<double> b(N);
375         initializeRandomDouble(b, N);
376         valarray<double> c(N);
377         initializeRandomDouble(c, N);
378 
379 
380         bench.start();
381         for (long i=0; i < iters; ++i)
382         {
383             x = u+a+b+c;
384             sink();
385         }
386         bench.stop();
387 
388         bench.startOverhead();
389         for (long i=0; i < iters; ++i) {
390 	  sink();
391 	}
392         bench.stopOverhead();
393     }
394 
395     bench.endImplementation();
396 }
397 #endif
398 
F77Version(BenchmarkExt<int> & bench,double u)399 void F77Version(BenchmarkExt<int>& bench, double u)
400 {
401     bench.beginImplementation("Fortran 77");
402 
403     while (!bench.doneImplementationBenchmark())
404     {
405         int N = bench.getParameter();
406         cout << bench.currentImplementation() << ": N = " << N << endl;
407 
408         int iters = bench.getIterations();
409 
410         double* x = new double[N];
411         initializeRandomDouble(x, N);
412         double* a = new double[N];
413         initializeRandomDouble(a, N);
414         double* b = new double[N];
415         initializeRandomDouble(b, N);
416         double* c = new double[N];
417         initializeRandomDouble(c, N);
418 
419 
420         bench.start();
421         for (int iter=0; iter < iters; ++iter)
422             loop10_f77(N, x, a, b, c, u);
423         bench.stop();
424 
425         bench.startOverhead();
426         for (int iter=0; iter < iters; ++iter)
427             loop10_f77overhead(N, x, a, b, c, u);
428 
429         bench.stopOverhead();
430 
431         delete [] x;
432         delete [] a;
433         delete [] b;
434         delete [] c;
435 
436     }
437 
438     bench.endImplementation();
439 }
440 
441 #ifdef FORTRAN_90
F90Version(BenchmarkExt<int> & bench,double u)442 void F90Version(BenchmarkExt<int>& bench, double u)
443 {
444     bench.beginImplementation("Fortran 90");
445 
446     while (!bench.doneImplementationBenchmark())
447     {
448         int N = bench.getParameter();
449         cout << bench.currentImplementation() << ": N = " << N << endl;
450 
451         int iters = bench.getIterations();
452 
453         double* x = new double[N];
454         initializeRandomDouble(x, N);
455         double* a = new double[N];
456         initializeRandomDouble(a, N);
457         double* b = new double[N];
458         initializeRandomDouble(b, N);
459         double* c = new double[N];
460         initializeRandomDouble(c, N);
461 
462 
463         bench.start();
464         for (int iter=0; iter < iters; ++iter)
465             loop10_f90(N, x, a, b, c, u);
466         bench.stop();
467 
468         bench.startOverhead();
469         for (int iter=0; iter < iters; ++iter)
470             loop10_f90overhead(N, x, a, b, c, u);
471 
472         bench.stopOverhead();
473         delete [] x;
474         delete [] a;
475         delete [] b;
476         delete [] c;
477 
478     }
479 
480     bench.endImplementation();
481 }
482 #endif
483 
484