1 
2 // loop3 generated by makeloops.py Thu Jun 30 16:44:56 2011
3 
4 #include <blitz/vector2.h>
5 #include <blitz/array.h>
6 #include <random/uniform.h>
7 #include <blitz/benchext.h>
8 
9 #ifdef BZ_HAVE_VALARRAY
10  #define BENCHMARK_VALARRAY
11 #endif
12 
13 #ifdef BENCHMARK_VALARRAY
14 #include <valarray>
15 #endif
16 
17 namespace blitz {
18 extern void sink();
19 }
20 
21 using namespace blitz;
22 using namespace std;
23 
24 #if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES)
25  #define loop3_f77 loop3_f77_
26  #define loop3_f77overhead loop3_f77overhead_
27  #define loop3_f90 loop3_f90_
28  #define loop3_f90overhead loop3_f90overhead_
29 #elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES)
30  #define loop3_f77 loop3_f77__
31  #define loop3_f77overhead loop3_f77overhead__
32  #define loop3_f90 loop3_f90__
33  #define loop3_f90overhead loop3_f90overhead__
34 #elif defined(BZ_FORTRAN_SYMBOLS_CAPS)
35  #define loop3_f77 LOOP3_F77
36  #define loop3_f77overhead LOOP3_F77OVERHEAD
37  #define loop3_f90 LOOP3_F90
38  #define loop3_f90overhead LOOP3_F90OVERHEAD
39 #endif
40 
41 extern "C" {
42   void loop3_f77(const int& N, double* x, double* y, const double& a);
43   void loop3_f77overhead(const int& N, double* x, double* y, const double& a);
44   void loop3_f90(const int& N, double* x, double* y, const double& a);
45   void loop3_f90overhead(const int& N, double* x, double* y, const double& a);
46 
47 }
48 
49 void VectorVersion(BenchmarkExt<int>& bench, double a);
50 void ArrayVersion(BenchmarkExt<int>& bench, double a);
51 void ArrayVersion_unaligned(BenchmarkExt<int>& bench, double a);
52 void ArrayVersion_misaligned(BenchmarkExt<int>& bench, double a);
53 void ArrayVersion_index(BenchmarkExt<int>& bench, double a);
54 void doTinyVectorVersion(BenchmarkExt<int>& bench, double a);
55 void F77Version(BenchmarkExt<int>& bench, double a);
56 #ifdef FORTRAN_90
57 void F90Version(BenchmarkExt<int>& bench, double a);
58 #endif
59 #ifdef BENCHMARK_VALARRAY
60 void ValarrayVersion(BenchmarkExt<int>& bench, double a);
61 #endif
62 
63 const int numSizes = 80;
64 const bool runvector=false; // no point as long as Vector is Array<1>
65 
main()66 int main()
67 {
68     int numBenchmarks = 5;
69     if (runvector) numBenchmarks++;
70 #ifdef BENCHMARK_VALARRAY
71     numBenchmarks++;
72 #endif
73 #ifdef FORTRAN_90
74     numBenchmarks++;
75 #endif
76 
77     BenchmarkExt<int> bench("loop3: $y = $y + a*$x", numBenchmarks);
78 
79     bench.setNumParameters(numSizes);
80 
81     Array<int,1> parameters(numSizes);
82     Array<long,1> iters(numSizes);
83     Array<double,1> flops(numSizes);
84 
85     parameters=pow(pow(2.,0.25),tensor::i)+tensor::i;
86     flops = 2 * parameters;
87     iters = 100000000L / flops;
88     iters = where(iters<2, 2, iters);
89     cout << iters << endl;
90 
91     bench.setParameterVector(parameters);
92     bench.setIterations(iters);
93     bench.setOpsPerIteration(flops);
94     bench.setDependentVariable("flops");
95     bench.beginBenchmarking();
96 
97     double a = 0.39123982498157938742;
98 
99 
100     ArrayVersion(bench, a);
101     ArrayVersion_unaligned(bench, a);
102     ArrayVersion_misaligned(bench, a);
103     ArrayVersion_index(bench, a);
104     //doTinyVectorVersion(bench, a);
105     F77Version(bench, a);
106 #ifdef FORTRAN_90
107     F90Version(bench, a);
108 #endif
109 #ifdef BENCHMARK_VALARRAY
110     ValarrayVersion(bench, a);
111 #endif
112 
113     if(runvector)
114       VectorVersion(bench, a);
115 
116     bench.endBenchmarking();
117 
118     bench.saveMatlabGraph("loop3.m");
119     return 0;
120 }
121 
122 template<class T>
initializeRandomDouble(T * data,int numElements,int stride=1)123 void initializeRandomDouble(T* data, int numElements, int stride = 1)
124 {
125     ranlib::Uniform<T> rnd;
126 
127     for (int i=0; i < numElements; ++i)
128         data[size_t(i*stride)] = rnd.random();
129 }
130 
131 template<class T>
initializeRandomDouble(valarray<T> & data,int numElements,int stride=1)132 void initializeRandomDouble(valarray<T>& data, int numElements, int stride = 1)
133 {
134     ranlib::Uniform<T> rnd;
135 
136     for (int i=0; i < numElements; ++i)
137         data[size_t(i*stride)] = rnd.random();
138 }
139 
VectorVersion(BenchmarkExt<int> & bench,double a)140 void VectorVersion(BenchmarkExt<int>& bench, double a)
141 {
142     bench.beginImplementation("Vector<T>");
143 
144     while (!bench.doneImplementationBenchmark())
145     {
146         int N = bench.getParameter();
147         long iters = bench.getIterations();
148 
149         cout << bench.currentImplementation() << ": N = " << N << endl;
150 
151         Vector<double> x(N);
152         initializeRandomDouble(x.data(), N);
153         Vector<double> y(N);
154         initializeRandomDouble(y.data(), N);
155 
156 
157         bench.start();
158         for (long i=0; i < iters; ++i)
159         {
160             y = y + a*x;
161             sink();
162         }
163         bench.stop();
164 
165         bench.startOverhead();
166         for (long i=0; i < iters; ++i) {
167             sink();
168 	}
169 
170         bench.stopOverhead();
171     }
172 
173     bench.endImplementation();
174 }
175 
176 
ArrayVersion(BenchmarkExt<int> & bench,double a)177   void ArrayVersion(BenchmarkExt<int>& bench, double a)
178 {
179     bench.beginImplementation("Array<T,1>");
180 
181     while (!bench.doneImplementationBenchmark())
182     {
183         int N = bench.getParameter();
184         long iters = bench.getIterations();
185 
186         cout << bench.currentImplementation() << ": N = " << N << endl;
187 
188         Array<double,1> x(N);
189         initializeRandomDouble(x.dataFirst(), N);
190         Array<double,1> y(N);
191         initializeRandomDouble(y.dataFirst(), N);
192 
193 
194         bench.start();
195         for (long i=0; i < iters; ++i)
196         {
197             y = y + a*x;
198             sink();
199         }
200         bench.stop();
201 
202         bench.startOverhead();
203         for (long i=0; i < iters; ++i) {
204             sink();
205 	}
206 
207         bench.stopOverhead();
208     }
209 
210     bench.endImplementation();
211 }
212 
213 
ArrayVersion_index(BenchmarkExt<int> & bench,double a)214   void ArrayVersion_index(BenchmarkExt<int>& bench, double a)
215 {
216     bench.beginImplementation("Array<T,1> (indexexpr.)");
217 
218     while (!bench.doneImplementationBenchmark())
219     {
220         int N = bench.getParameter();
221         long iters = bench.getIterations();
222 
223         cout << bench.currentImplementation() << ": N = " << N << endl;
224 
225         Array<double,1> x(N);
226         initializeRandomDouble(x.dataFirst(), N);
227         Array<double,1> y(N);
228         initializeRandomDouble(y.dataFirst(), N);
229 
230 
231         bench.start();
232         for (long i=0; i < iters; ++i)
233         {
234             y = y(tensor::i) + a*x(tensor::i);;
235             sink();
236         }
237         bench.stop();
238 
239         bench.startOverhead();
240         for (long i=0; i < iters; ++i) {
241             sink();
242 	}
243 
244         bench.stopOverhead();
245     }
246 
247     bench.endImplementation();
248 }
249 
ArrayVersion_unaligned(BenchmarkExt<int> & bench,double a)250   void ArrayVersion_unaligned(BenchmarkExt<int>& bench, double a)
251 {
252     bench.beginImplementation("Array<T,1> (unal.)");
253 
254     while (!bench.doneImplementationBenchmark())
255     {
256         int N = bench.getParameter();
257         long iters = bench.getIterations();
258 
259         cout << bench.currentImplementation() << ": N = " << N << endl;
260 
261 
262     Array<double,1> xfill(N+1);
263     Array<double,1> x(xfill(Range(1,N)));
264     initializeRandomDouble(x.dataFirst(), N);
265 
266     Array<double,1> yfill(N+1);
267     Array<double,1> y(yfill(Range(1,N)));
268     initializeRandomDouble(y.dataFirst(), N);
269 
270 
271         bench.start();
272         for (long i=0; i < iters; ++i)
273         {
274             y = y + a*x;
275             sink();
276         }
277         bench.stop();
278 
279         bench.startOverhead();
280         for (long i=0; i < iters; ++i) {
281             sink();
282 	}
283 
284         bench.stopOverhead();
285     }
286 
287     bench.endImplementation();
288 }
289 
ArrayVersion_misaligned(BenchmarkExt<int> & bench,double a)290   void ArrayVersion_misaligned(BenchmarkExt<int>& bench, double a)
291 {
292     bench.beginImplementation("Array<T,1> (misal.)");
293 
294     while (!bench.doneImplementationBenchmark())
295     {
296         int N = bench.getParameter();
297         long iters = bench.getIterations();
298 
299         cout << bench.currentImplementation() << ": N = " << N << endl;
300 
301 
302     Array<double,1> xfill(N+2);
303     Array<double,1> x(xfill(Range(0,N+0-1)));
304     initializeRandomDouble(x.dataFirst(), N);
305 
306     Array<double,1> yfill(N+2);
307     Array<double,1> y(yfill(Range(1,N+1-1)));
308     initializeRandomDouble(y.dataFirst(), N);
309 
310 
311         bench.start();
312         for (long i=0; i < iters; ++i)
313         {
314             y = y + a*x;
315             sink();
316         }
317         bench.stop();
318 
319         bench.startOverhead();
320         for (long i=0; i < iters; ++i) {
321             sink();
322 	}
323 
324         bench.stopOverhead();
325     }
326 
327     bench.endImplementation();
328 }
329 
330 #ifdef BENCHMARK_VALARRAY
ValarrayVersion(BenchmarkExt<int> & bench,double a)331 void ValarrayVersion(BenchmarkExt<int>& bench, double a)
332 {
333     bench.beginImplementation("valarray<T>");
334 
335     while (!bench.doneImplementationBenchmark())
336     {
337         int N = bench.getParameter();
338         cout << bench.currentImplementation() << ": N = " << N << endl;
339 
340         long iters = bench.getIterations();
341 
342         valarray<double> x(N);
343         initializeRandomDouble(x, N);
344         valarray<double> y(N);
345         initializeRandomDouble(y, N);
346 
347 
348         bench.start();
349         for (long i=0; i < iters; ++i)
350         {
351             y = y + a*x;
352             sink();
353         }
354         bench.stop();
355 
356         bench.startOverhead();
357         for (long i=0; i < iters; ++i) {
358 	  sink();
359 	}
360         bench.stopOverhead();
361     }
362 
363     bench.endImplementation();
364 }
365 #endif
366 
F77Version(BenchmarkExt<int> & bench,double a)367 void F77Version(BenchmarkExt<int>& bench, double a)
368 {
369     bench.beginImplementation("Fortran 77");
370 
371     while (!bench.doneImplementationBenchmark())
372     {
373         int N = bench.getParameter();
374         cout << bench.currentImplementation() << ": N = " << N << endl;
375 
376         int iters = bench.getIterations();
377 
378         double* x = new double[N];
379         initializeRandomDouble(x, N);
380         double* y = new double[N];
381         initializeRandomDouble(y, N);
382 
383 
384         bench.start();
385         for (int iter=0; iter < iters; ++iter)
386             loop3_f77(N, x, y, a);
387         bench.stop();
388 
389         bench.startOverhead();
390         for (int iter=0; iter < iters; ++iter)
391             loop3_f77overhead(N, x, y, a);
392 
393         bench.stopOverhead();
394 
395         delete [] x;
396         delete [] y;
397 
398     }
399 
400     bench.endImplementation();
401 }
402 
403 #ifdef FORTRAN_90
F90Version(BenchmarkExt<int> & bench,double a)404 void F90Version(BenchmarkExt<int>& bench, double a)
405 {
406     bench.beginImplementation("Fortran 90");
407 
408     while (!bench.doneImplementationBenchmark())
409     {
410         int N = bench.getParameter();
411         cout << bench.currentImplementation() << ": N = " << N << endl;
412 
413         int iters = bench.getIterations();
414 
415         double* x = new double[N];
416         initializeRandomDouble(x, N);
417         double* y = new double[N];
418         initializeRandomDouble(y, N);
419 
420 
421         bench.start();
422         for (int iter=0; iter < iters; ++iter)
423             loop3_f90(N, x, y, a);
424         bench.stop();
425 
426         bench.startOverhead();
427         for (int iter=0; iter < iters; ++iter)
428             loop3_f90overhead(N, x, y, a);
429 
430         bench.stopOverhead();
431         delete [] x;
432         delete [] y;
433 
434     }
435 
436     bench.endImplementation();
437 }
438 #endif
439 
440