1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <unistd.h>
22 
23 #include "config.h"
24 
25 #include "nfft3.h"
26 #include "infft.h"
27 
28 #define NREPEAT 5
29 
30 #if defined(_WIN32) || defined(_WIN64)
31 const char *CMD_CREATEDATASET = "nfft_benchomp_createdataset.exe";
32 const char *CMD_DETAIL_SINGLE = "nfft_benchomp_detail_single.exe";
33 const char *CMD_DETAIL_THREADS = "nfft_benchomp_detail_threads.exe";
34 #else
35 const char *CMD_CREATEDATASET = "./nfft_benchomp_createdataset";
36 const char *CMD_DETAIL_SINGLE = "./nfft_benchomp_detail_single";
37 const char *CMD_DETAIL_THREADS = "./nfft_benchomp_detail_threads";
38 #endif
39 
40 static FILE* file_out_tex = NULL;
41 
get_nthreads_array(int ** arr)42 int get_nthreads_array(int **arr)
43 {
44   int max_threads = NFFT(get_num_threads)();
45   int alloc_num = 2;
46   int k;
47   int ret_number = 0;
48   int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
49 
50   if (max_threads <= 5)
51   {
52     *arr = (int*) malloc(max_threads*sizeof(int));
53     for (k = 0; k < max_threads; k++)
54       *(*arr + k) = k+1;
55     return max_threads;
56   }
57 
58   for (k = 1; k <= max_threads; k*=2, alloc_num++);
59 
60   *arr = (int*) malloc(alloc_num*sizeof(int));
61 
62   for (k = 1; k <= max_threads; k*=2)
63   {
64     if (k != max_threads && 2*k > max_threads && max_threads_pw2)
65     {
66       *(*arr + ret_number) = max_threads/2;
67       ret_number++;
68     }
69 
70     *(*arr + ret_number) = k;
71     ret_number++;
72 
73     if (k != max_threads && 2*k > max_threads)
74     {
75       *(*arr + ret_number) = max_threads;
76       ret_number++;
77       break;
78     }
79   }
80 
81   return ret_number;
82 }
83 
84 
check_result_value(const int val,const int ok,const char * msg)85 void check_result_value(const int val, const int ok, const char *msg)
86 {
87   if (val != ok)
88   {
89     fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok);
90 
91     exit(1);
92   }
93 }
94 
run_test_create(int d,int trafo_adjoint,int N,int M,double sigma)95 void run_test_create(int d, int trafo_adjoint, int N, int M, double sigma)
96 {
97   char cmd[1025];
98 
99   if (d==1)
100     snprintf(cmd, 1024, "%s %d %d %d %d %lg > nfft_benchomp_test.data", CMD_CREATEDATASET, d, trafo_adjoint, N, M, sigma);
101   else if (d==2)
102     snprintf(cmd, 1024, "%s %d %d %d %d %d %lg > nfft_benchomp_test.data", CMD_CREATEDATASET, d, trafo_adjoint, N, N, M, sigma);
103   else if (d==3)
104     snprintf(cmd, 1024, "%s %d %d %d %d %d %d %lg > nfft_benchomp_test.data", CMD_CREATEDATASET, d, trafo_adjoint, N, N, N, M, sigma);
105   else if (d==4)
106     snprintf(cmd, 1024, "%s %d %d %d %d %d %d %d %lg > nfft_benchomp_test.data", CMD_CREATEDATASET, d, trafo_adjoint, N, N, N, N, M, sigma);
107   else
108     exit(1);
109   fprintf(stderr, "%s\n", cmd);
110   check_result_value(system(cmd), 0, "createdataset");
111 }
112 
run_test_init_output()113 void run_test_init_output()
114 {
115   FILE *f = fopen("nfft_benchomp_test.result", "w");
116   if (f!= NULL)
117     fclose(f);
118 }
119 
120 typedef struct
121 {
122   int d;
123   int trafo_adjoint;
124   int N;
125   int M;
126   double sigma;
127   int m;
128   int flags;
129 } s_param;
130 
131 typedef struct
132 {
133   double avg;
134   double min;
135   double max;
136 } s_resval;
137 
138 typedef struct
139 {
140   int nthreads;
141   s_resval resval[6];
142 } s_result;
143 
144 typedef struct
145 {
146   s_param param;
147   s_result *results;
148   int nresults;
149 } s_testset;
150 
run_test(s_resval * res,int nrepeat,int m,int flags,int nthreads)151 void run_test(s_resval *res, int nrepeat, int m, int flags, int nthreads)
152 {
153   char cmd[1025];
154   int r,t;
155 
156   for (t = 0; t < 6; t++)
157   {
158     res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
159   }
160 
161   if (nthreads < 2)
162     snprintf(cmd, 1024, "%s %d %d < nfft_benchomp_test.data > nfft_benchomp_test.out", CMD_DETAIL_SINGLE, m, flags);
163   else
164     snprintf(cmd, 1024, "%s %d %d %d < nfft_benchomp_test.data > nfft_benchomp_test.out", CMD_DETAIL_THREADS, m, flags, nthreads);
165   fprintf(stderr, "%s\n", cmd);
166   check_result_value(system(cmd), 0, cmd);
167 
168   for (r = 0; r < nrepeat; r++)
169   {
170     int retval;
171     double v[6];
172     FILE *f;
173     check_result_value(system(cmd), 0, cmd);
174     f = fopen("nfft_benchomp_test.out", "r");
175     retval = fscanf(f, "%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5);
176     check_result_value(retval, 6, "read nfft_benchomp_test.out");
177     fclose(f);
178 
179     for (t = 0; t < 6; t++)
180     {
181       res[t].avg += v[t];
182       if (res[t].min > v[t])
183         res[t].min = v[t];
184       if (res[t].max < v[t])
185         res[t].max = v[t];
186     }
187   }
188 
189   for (t = 0; t < 6; t++)
190     res[t].avg /= nrepeat;
191 
192   fprintf(stderr, "%d %d: ", nthreads, nrepeat);
193   for (t = 0; t < 6; t++)
194     fprintf(stderr, "%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
195   fprintf(stderr, "\n");
196 }
197 
get_psi_string(int flags)198 const char *get_psi_string(int flags)
199 {
200   if (flags & PRE_ONE_PSI)
201     return "unknownPSI";
202 
203   return "nopsi";
204 }
get_sort_string(int flags)205 const char *get_sort_string(int flags)
206 {
207   if (flags & NFFT_SORT_NODES)
208     return "sorted";
209 
210     return "unsorted";
211 }
212 
get_adjoint_omp_string(int flags)213 const char *get_adjoint_omp_string(int flags)
214 {
215   if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
216     return "blockwise";
217 
218     return "";
219 }
220 
221 #define MASK_D (1U<<0)
222 #define MASK_TA (1U<<1)
223 #define MASK_N (1U<<2)
224 #define MASK_SIGMA (1U<<3)
225 #define MASK_M (1U<<4)
226 #define MASK_WINM (1U<<5)
227 #define MASK_FLAGS_PSI (1U<<6)
228 #define MASK_FLAGS_SORT (1U<<7)
229 #define MASK_FLAGS_BW (1U<<8)
230 
determine_different_parameters(s_testset * testsets,int ntestsets)231 unsigned int determine_different_parameters(s_testset *testsets, int ntestsets)
232 {
233   int t;
234   unsigned int mask = 0;
235 
236   if (ntestsets < 2)
237     return 0;
238 
239   for (t = 1; t < ntestsets; t++)
240   {
241     if (testsets[t-1].param.d != testsets[t].param.d)
242       mask |= MASK_D;
243     if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint)
244       mask |= MASK_TA;
245     if (testsets[t-1].param.N != testsets[t].param.N)
246       mask |= MASK_N;
247     if (testsets[t-1].param.sigma != testsets[t].param.sigma)
248       mask |= MASK_SIGMA;
249     if (testsets[t-1].param.M != testsets[t].param.M)
250       mask |= MASK_M;
251     if (testsets[t-1].param.m != testsets[t].param.m)
252       mask |= MASK_WINM;
253     if ((testsets[t-1].param.flags & PRE_ONE_PSI) != (testsets[t].param.flags & PRE_ONE_PSI))
254       mask |= MASK_FLAGS_PSI;
255     if ((testsets[t-1].param.flags & NFFT_SORT_NODES) != (testsets[t].param.flags & NFFT_SORT_NODES))
256       mask |= MASK_FLAGS_SORT;
257     if ((testsets[t-1].param.flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.flags & NFFT_OMP_BLOCKWISE_ADJOINT))
258       mask |= MASK_FLAGS_BW;
259   }
260 
261   return mask;
262 }
263 
get_plot_title(char * outstr,int maxlen,char * hostname,s_param param,unsigned int diff_mask)264 void get_plot_title(char *outstr, int maxlen, char *hostname, s_param param, unsigned int diff_mask)
265 {
266   unsigned int mask = ~diff_mask;
267   int offset = 0;
268   int len;
269 
270   len = snprintf(outstr, maxlen, "%s", hostname);
271   if (len < 0 || len+offset >= maxlen-1) return;
272   offset += len;
273 
274   if (mask & MASK_D)
275   {
276     len = snprintf(outstr+offset, maxlen-offset, " %dd", param.d);
277     if (len < 0 || len+offset >= maxlen-1) return;
278     offset += len;
279   }
280 
281   if (mask & MASK_TA)
282   {
283     len = snprintf(outstr+offset, maxlen-offset, " $\\mathrm{NFFT}%s$", param.trafo_adjoint==0?"":"^\\top");
284     if (len < 0 || len+offset >= maxlen-1) return;
285     offset += len;
286   }
287 
288   if (mask & MASK_N)
289   {
290     len = snprintf(outstr+offset, maxlen-offset, " N=%d", param.N);
291     if (len < 0 || len+offset >= maxlen-1) return;
292     offset += len;
293   }
294 
295   if (mask & MASK_SIGMA)
296   {
297     len = snprintf(outstr+offset, maxlen-offset, " N=%g", param.sigma);
298     if (len < 0 || len+offset >= maxlen-1) return;
299     offset += len;
300   }
301 
302   if (mask & MASK_M)
303   {
304     len = snprintf(outstr+offset, maxlen-offset, " M=%d", param.M);
305     if (len < 0 || len+offset >= maxlen-1) return;
306     offset += len;
307   }
308 
309   if (mask & MASK_WINM)
310   {
311     len = snprintf(outstr+offset, maxlen-offset, " m=%d", param.m);
312     if (len < 0 || len+offset >= maxlen-1) return;
313     offset += len;
314   }
315 
316   if (mask & MASK_FLAGS_PSI)
317   {
318     len = snprintf(outstr+offset, maxlen-offset, " %s", get_psi_string(param.flags));
319     if (len < 0 || len+offset >= maxlen-1) return;
320     offset += len;
321   }
322 
323   if (mask & MASK_FLAGS_SORT)
324   {
325     len = snprintf(outstr+offset, maxlen-offset, " %s", get_sort_string(param.flags));
326     if (len < 0 || len+offset >= maxlen-1) return;
327     offset += len;
328   }
329 
330   if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.flags)) > 0)
331   {
332     len = snprintf(outstr+offset, maxlen-offset, " %s", get_adjoint_omp_string(param.flags));
333     if (len < 0 || len+offset >= maxlen-1) return;
334     offset += len;
335   }
336 }
337 
print_output_speedup_total_tref(FILE * out,s_testset * testsets,int ntestsets,double tref)338 void print_output_speedup_total_tref(FILE *out, s_testset *testsets, int ntestsets, double tref)
339 {
340   int i, t;
341   char hostname[1025];
342   char plottitle[1025];
343   unsigned int diff_mask = determine_different_parameters(testsets, ntestsets);
344 
345 #ifdef HAVE_GETHOSTNAME
346   if (gethostname(hostname, 1024) != 0)
347 #endif
348     strncpy(hostname, "unnamed", 1024);
349 
350   get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask | MASK_FLAGS_SORT);
351 
352   fprintf(out, "\\begin{tikzpicture}\n");
353   fprintf(out, "\\begin{axis}[");
354   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
355   fprintf(out, " title={%s}", plottitle);
356   fprintf(out, " ]\n");
357 
358   for (t = 0; t < ntestsets; t++)
359   {
360     s_testset testset = testsets[t];
361     fprintf(stderr, "%s %dd $\\mathrm{NFFT}%s$ N=%d $\\sigma$=%g M=%d m=%d %s %s %s}", hostname, testset.param.d, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.sigma, testset.param.M, testset.param.m, get_psi_string(testset.param.flags), get_sort_string(testset.param.flags), get_adjoint_omp_string(testset.param.flags));
362     fprintf(stderr, "\n");
363 
364     fprintf(out, "\\addplot coordinates {");
365     for (i = 0; i < testset.nresults; i++)
366       fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
367     fprintf(out, "};\n");
368 
369     for (i = 0; i < testset.nresults; i++)
370     {
371       fprintf(stderr, "%d:%.3f  ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
372     }
373     fprintf(stderr, "\n\n");
374   }
375 
376   fprintf(out, "\\legend{{");
377   for (t = 0; t < ntestsets; t++)
378   {
379     char title[256];
380     if (t > 0)
381       fprintf(out, "},{");
382     get_plot_title(title, 255, "", testsets[t].param, ~(diff_mask | MASK_FLAGS_SORT));
383     fprintf(out, "%s", title);
384   }
385   fprintf(out, "}}\n");
386   fprintf(out, "\\end{axis}\n");
387   fprintf(out, "\\end{tikzpicture}\n");
388   fprintf(out, "\n\n");
389 
390   fflush(out);
391 }
392 
print_output_speedup_total(FILE * out,s_testset * testsets,int ntestsets)393 void print_output_speedup_total(FILE *out, s_testset *testsets, int ntestsets)
394 {
395   double tref = 1.0/0.0;
396   int t, k;
397 
398   for (t = 0; t < ntestsets; t++)
399     for (k = 0; k < testsets[t].nresults; k++)
400       if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref)
401         tref = testsets[t].results[k].resval[5].avg;
402 
403   print_output_speedup_total_tref(out, testsets, ntestsets, tref);
404 }
405 
print_output_histo_DFBRT(FILE * out,s_testset testset)406 void print_output_histo_DFBRT(FILE *out, s_testset testset)
407 {
408   int i, size = testset.nresults;
409   char hostname[1025];
410 
411 #ifdef HAVE_GETHOSTNAME
412   if (gethostname(hostname, 1024) != 0)
413 #endif
414     strncpy(hostname, "unnamed", 1024);
415 
416   fprintf(out, "\\begin{tikzpicture}\n");
417   fprintf(out, "\\begin{axis}[");
418   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
419   fprintf(out, "symbolic x coords={");
420   for (i = 0; i < size; i++)
421     if (i > 0)
422       fprintf(out, ",%d", testset.results[i].nthreads);
423     else
424       fprintf(out, "%d", testset.results[i].nthreads);
425 fprintf(stderr, "FLAGS: %d\n", testset.param.flags);
426 
427   fprintf(out, "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
428   fprintf(out, " title={%s %dd $\\mathrm{NFFT}%s$ N=%d $\\sigma$=%g M=%d m=%d %s %s %s}", hostname, testset.param.d, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.sigma, testset.param.M, testset.param.m, get_psi_string(testset.param.flags), get_sort_string(testset.param.flags), get_adjoint_omp_string(testset.param.flags));
429   fprintf(out, " ]\n");
430   fprintf(out, "\\addplot coordinates {");
431   for (i = 0; i < size; i++)
432     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
433   fprintf(out, "};\n");
434 
435   fprintf(out, "\\addplot coordinates {");
436   for (i = 0; i < size; i++)
437     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
438   fprintf(out, "};\n");
439 
440   fprintf(out, "\\addplot coordinates {");
441   for (i = 0; i < size; i++)
442     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
443   fprintf(out, "};\n");
444 
445   fprintf(out, "\\addplot coordinates {");
446   for (i = 0; i < size; i++)
447     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg);
448   fprintf(out, "};\n");
449 
450   fprintf(out, "\\addplot coordinates {");
451   for (i = 0; i < size; i++)
452     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg);
453   fprintf(out, "};\n");
454   fprintf(out, "\\legend{D,F,B,rest,total}\n");
455   fprintf(out, "\\end{axis}\n");
456   fprintf(out, "\\end{tikzpicture}\n");
457   fprintf(out, "\n\n");
458 
459   fflush(out);
460 }
461 
run_testset(s_testset * testset,int d,int trafo_adjoint,int N,int M,double sigma,int m,int flags,int * nthreads_array,int n_threads_array_size)462 void run_testset(s_testset *testset, int d, int trafo_adjoint, int N, int M, double sigma, int m, int flags, int *nthreads_array, int n_threads_array_size)
463 {
464   int i;
465   testset->param.d = d;
466   testset->param.trafo_adjoint = trafo_adjoint;
467   testset->param.N = N;
468   testset->param.M = M;
469   testset->param.sigma = sigma;
470   testset->param.m = m;
471   testset->param.flags = flags;
472 
473   testset->results = (s_result*) malloc(n_threads_array_size*sizeof(s_result));
474   testset->nresults = n_threads_array_size;
475 
476   run_test_create(testset->param.d, testset->param.trafo_adjoint, testset->param.N, testset->param.M, testset->param.sigma);
477   for (i = 0; i < n_threads_array_size; i++)
478   {
479     testset->results[i].nthreads = nthreads_array[i];
480     run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.flags, testset->results[i].nthreads = nthreads_array[i]);
481   }
482 
483 }
484 
test1(int * nthreads_array,int n_threads_array_size,int m)485 void test1(int *nthreads_array, int n_threads_array_size, int m)
486 {
487   s_testset testsets[15];
488 
489   run_testset(&testsets[0], 1, 0, 2097152, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
490 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
491   print_output_histo_DFBRT(file_out_tex, testsets[0]);
492 #endif
493 
494   run_testset(&testsets[1], 1, 0, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
495 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
496   print_output_histo_DFBRT(file_out_tex, testsets[1]);
497 #endif
498 
499   print_output_speedup_total(file_out_tex, testsets, 2);
500 
501   run_testset(&testsets[2], 1, 1, 2097152, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
502 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
503   print_output_histo_DFBRT(file_out_tex, testsets[2]);
504 #endif
505 
506   run_testset(&testsets[3], 1, 1, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
507 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
508   print_output_histo_DFBRT(file_out_tex, testsets[3]);
509 #endif
510 
511   run_testset(&testsets[4], 1, 1, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
512 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
513   print_output_histo_DFBRT(file_out_tex, testsets[4]);
514 #endif
515 
516   print_output_speedup_total(file_out_tex, testsets+2, 3);
517 
518   run_testset(&testsets[5], 2, 0, 1024, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size);
519 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
520   print_output_histo_DFBRT(file_out_tex, testsets[5]);
521 #endif
522 
523   run_testset(&testsets[6], 2, 0, 1024, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
524 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
525   print_output_histo_DFBRT(file_out_tex, testsets[6]);
526 #endif
527 
528   print_output_speedup_total(file_out_tex, testsets+5, 2);
529 
530   run_testset(&testsets[7], 2, 1, 1024, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size);
531 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
532   print_output_histo_DFBRT(file_out_tex, testsets[7]);
533 #endif
534 
535   run_testset(&testsets[8], 2, 1, 1024, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
536 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
537   print_output_histo_DFBRT(file_out_tex, testsets[8]);
538 #endif
539 
540   run_testset(&testsets[9], 2, 1, 1024, 1048576, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
541 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
542   print_output_histo_DFBRT(file_out_tex, testsets[9]);
543 #endif
544 
545   print_output_speedup_total(file_out_tex, testsets+7, 3);
546 
547   run_testset(&testsets[10], 3, 0, 128, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
548 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
549   print_output_histo_DFBRT(file_out_tex, testsets[10]);
550 #endif
551 
552   run_testset(&testsets[11], 3, 0, 128, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
553 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
554   print_output_histo_DFBRT(file_out_tex, testsets[11]);
555 #endif
556 
557   print_output_speedup_total(file_out_tex, testsets+10, 2);
558 
559   run_testset(&testsets[12], 3, 1, 128, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
560 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
561   print_output_histo_DFBRT(file_out_tex, testsets[12]);
562 #endif
563 
564   run_testset(&testsets[13], 3, 1, 128, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
565 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
566   print_output_histo_DFBRT(file_out_tex, testsets[13]);
567 #endif
568 
569   run_testset(&testsets[14], 3, 1, 128, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
570 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
571   print_output_histo_DFBRT(file_out_tex, testsets[14]);
572 #endif
573 
574   print_output_speedup_total(file_out_tex, testsets+12, 3);
575 
576 }
577 
test2(int * nthreads_array,int n_threads_array_size,int m)578 void test2(int *nthreads_array, int n_threads_array_size, int m)
579 {
580   s_testset testsets[15];
581 
582   run_testset(&testsets[0], 1, 0, 16777216, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
583 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
584   print_output_histo_DFBRT(file_out_tex, testsets[0]);
585 #endif
586 
587   run_testset(&testsets[1], 1, 0, 16777216, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
588 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
589   print_output_histo_DFBRT(file_out_tex, testsets[1]);
590 #endif
591 
592   print_output_speedup_total(file_out_tex, testsets, 2);
593 
594   run_testset(&testsets[2], 1, 1, 16777216, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
595 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
596   print_output_histo_DFBRT(file_out_tex, testsets[2]);
597 #endif
598 
599   run_testset(&testsets[3], 1, 1, 16777216, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
600 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
601   print_output_histo_DFBRT(file_out_tex, testsets[3]);
602 #endif
603 
604   run_testset(&testsets[4], 1, 1, 16777216, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
605 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
606   print_output_histo_DFBRT(file_out_tex, testsets[4]);
607 #endif
608 
609   print_output_speedup_total(file_out_tex, testsets+2, 3);
610 
611   run_testset(&testsets[5], 2, 0, 4096, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size);
612 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
613   print_output_histo_DFBRT(file_out_tex, testsets[5]);
614 #endif
615 
616   run_testset(&testsets[6], 2, 0, 4096, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
617 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
618   print_output_histo_DFBRT(file_out_tex, testsets[6]);
619 #endif
620 
621   print_output_speedup_total(file_out_tex, testsets+5, 2);
622 
623   run_testset(&testsets[7], 2, 1, 4096, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size);
624 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
625   print_output_histo_DFBRT(file_out_tex, testsets[7]);
626 #endif
627 
628   run_testset(&testsets[8], 2, 1, 4096, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
629 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
630   print_output_histo_DFBRT(file_out_tex, testsets[8]);
631 #endif
632 
633   run_testset(&testsets[9], 2, 1, 4096, 1048576, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
634 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
635   print_output_histo_DFBRT(file_out_tex, testsets[9]);
636 #endif
637 
638   print_output_speedup_total(file_out_tex, testsets+7, 3);
639 
640   run_testset(&testsets[10], 3, 0, 256, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
641 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
642   print_output_histo_DFBRT(file_out_tex, testsets[10]);
643 #endif
644 
645   run_testset(&testsets[11], 3, 0, 256, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
646 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
647   print_output_histo_DFBRT(file_out_tex, testsets[11]);
648 #endif
649 
650   print_output_speedup_total(file_out_tex, testsets+10, 2);
651 
652   run_testset(&testsets[12], 3, 1, 256, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
653 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
654   print_output_histo_DFBRT(file_out_tex, testsets[12]);
655 #endif
656 
657   run_testset(&testsets[13], 3, 1, 256, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
658 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
659   print_output_histo_DFBRT(file_out_tex, testsets[13]);
660 #endif
661 
662   run_testset(&testsets[14], 3, 1, 256, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
663 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
664   print_output_histo_DFBRT(file_out_tex, testsets[14]);
665 #endif
666 
667   print_output_speedup_total(file_out_tex, testsets+12, 3);
668 
669 }
670 
main(int argc,char ** argv)671 int main(int argc, char** argv)
672 {
673   int *nthreads_array;
674   int n_threads_array_size = get_nthreads_array(&nthreads_array);
675   int k;
676 
677 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
678   fprintf(stderr, "WARNING: Detailed time measurements for NFFT are not activated.\n");
679   fprintf(stderr, "For more detailed plots, please re-run the configure script with options\n");
680   fprintf(stderr, "--enable-measure-time --enable-measure-time-fftw --enable-openmp\n");
681   fprintf(stderr, "and run \"make clean all\"\n\n");
682 #endif
683 
684   for (k = 0; k < n_threads_array_size; k++)
685     fprintf(stderr, "%d ", nthreads_array[k]);
686   fprintf(stderr, "\n");
687 
688   file_out_tex = fopen("nfft_benchomp_results_plots.tex", "w");
689 
690   test1(nthreads_array, n_threads_array_size, 2);
691   test1(nthreads_array, n_threads_array_size, 4);
692   test1(nthreads_array, n_threads_array_size, 6);
693 //  test2(nthreads_array, n_threads_array_size, 2);
694 
695   fclose(file_out_tex);
696 
697   return 0;
698 }
699