1 /*--------------------------------------------------------------------
2 *
3 * Copyright (c) 1991-2021 by the GMT Team (https://www.generic-mapping-tools.org/team.html)
4 * See LICENSE.TXT file for copying and redistribution conditions.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation; version 3 or any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * Contact info: www.generic-mapping-tools.org
16 *--------------------------------------------------------------------*/
17 /*
18 * Here are various ways to perform 1-D and 2-D Fourier transforms.
19 * Most of these were provided by other people, as indicated below.
20 *
21 *
22 * Author: Paul Wessel
23 * Date: 1-APR-2011
24 * Version: 5
25 *
26 * Major overhaul, Florian Wobbe, 2012-07-09:
27 * Added free Kiss FFT (kissfft) to GMT code base.
28 * Superseded Norman Brenner's ancient Cooley-Tukey FFT implementation
29 * Kiss FFT. Brenner still available as a legacy/compativility choice.
30 * Support for Paul Swarztrauber's ancient FFTPACK and for Sun Performance
31 * Library (perflib) have been removed too because they are not maintained
32 * anymore.
33 *
34 * FFTW : FFTW library (supplied externally)
35 * vDSP : OSX Accelerate Framework (OSX only)
36 * Kiss FFT: Free FFT, based on the principle "Keep It Simple, Stupid"
37 * Configure the implementation with gmtset GMT_FFT.
38 *
39 *--------------------------------------------------------------------------
40 * Public functions declared in gmt_dev.h ():
41 *
42 *
43 */
44
45 #include "gmt_dev.h"
46 #include "gmt_internals.h"
47
48 static char *GMT_fft_algo[] = {
49 "Auto-Select",
50 "Accelerate Framework",
51 "FFTW",
52 "Kiss FFT",
53 "Brenner FFT [Legacy]"
54 };
55
56 /* Functions that fascilitating setting up FFT operations, like determining optimal dimension,
57 * setting wavenumber functions, apply interior or exterior taper to grids, save pre-fft grid
58 * to file, save fft grid (real and imag or mag,hypot) to two files. Programs that wish to
59 * operate on data in the wavenumber domain will need to use some of these functions; see
60 * grdfft.c and potential/gravfft.c as examples.
61 */
62
gmtfft_propose_radix2(uint64_t n)63 static inline uint64_t gmtfft_propose_radix2 (uint64_t n) {
64 /* Returns the smallest base 2 exponent, log2n, that satisfies: 2^log2n >= n */
65 uint64_t log2n = 1;
66 while ( 1ULL<<log2n < n ) ++log2n; /* log2n = 1<<(unsigned)ceil(log2(n)); */
67 return log2n;
68 }
69
gmtfft_radix2(uint64_t n)70 static inline uint64_t gmtfft_radix2 (uint64_t n) {
71 /* Returns the base 2 exponent that represents 'n' if 'n' is a power of 2,
72 * 0 otherwise */
73 uint64_t log2n = 1ULL;
74 while ( 1ULL<<log2n < n ) ++log2n; /* log2n = 1<<(unsigned)ceil(log2(n)); */
75 if (n == 1ULL<<log2n)
76 return log2n;
77 return 0ULL;
78 }
79
gmtfft_get_fftwave_ptr(struct GMT_FFT_WAVENUMBER * ptr)80 static inline struct GMT_FFT_WAVENUMBER * gmtfft_get_fftwave_ptr (struct GMT_FFT_WAVENUMBER *ptr) {return (ptr);}
gmtfft_get_api_ptr(struct GMTAPI_CTRL * ptr)81 static inline struct GMTAPI_CTRL * gmtfft_get_api_ptr (struct GMTAPI_CTRL *ptr) {return (ptr);}
82
gmtfft_get_non_symmetric_f(unsigned int * f,unsigned int n)83 GMT_LOCAL uint64_t gmtfft_get_non_symmetric_f (unsigned int *f, unsigned int n) {
84 /* Return the product of the non-symmetric factors in f[] */
85 unsigned int i = 0, j = 1, retval = 1;
86
87 if (n == 1) return (f[0]);
88
89 while (i < n) {
90 while (j < n && f[j] == f[i]) j++;
91 if ((j-i)%2) retval *= f[i];
92 i = j;
93 j = i + 1;
94 }
95 if (retval == 1) retval = 0; /* There are no non-sym factors */
96 return (retval);
97 }
98
99 #ifndef FSIGNIF
100 #define FSIGNIF 24
101 #endif
102
gmtlib_fourt_stats(struct GMT_CTRL * GMT,unsigned int n_columns,unsigned int n_rows,unsigned int * f,double * r,size_t * s,double * t)103 void gmtlib_fourt_stats (struct GMT_CTRL *GMT, unsigned int n_columns, unsigned int n_rows, unsigned int *f, double *r, size_t *s, double *t) {
104 /* Find the proportional run time, t, and rms relative error, r,
105 * of a Fourier transform of size n_columns,n_rows. Also gives s, the size
106 * of the workspace that will be needed by the transform.
107 * To use this routine for a 1-D transform, set n_rows = 1.
108 *
109 * This is all based on the comments in Norman Brenner's code
110 * FOURT, from which our C codes are translated.
111 * Brenner says:
112 * r = 3 * pow(2, -FSIGNIF) * sum{ pow(prime_factors, 1.5) }
113 * where FSIGNIF is the smallest bit in the floating point fraction.
114 *
115 * Let m = largest prime factor in the list of factors.
116 * Let p = product of all primes which appear an odd number of
117 * times in the list of prime factors. Then the worksize needed
118 * s = max(m,p). However, we know that if n is radix 2, then no
119 * work is required; yet this formula would say we need a worksize
120 * of at least 2. So I will return s = 0 when max(m,p) = 2.
121 *
122 * I have two different versions of the comments in FOURT, with
123 * different formulae for t. The simple formula says
124 * t = n * (sum of prime factors of n).
125 * The more complicated formula gives coefficients in microsecs
126 * on a cdc3300 (ancient history, but perhaps proportional):
127 * t = 3000 + n*(500 + 43*s2 + 68*sf + 320*nf),
128 * where s2 is the sum of all factors of 2, sf is the sum of all
129 * factors greater than 2, and nf is the number of factors != 2.
130 * We know that factors of 2 are very good to have, and indeed,
131 * Brenner's code calls different routines depending on whether
132 * the transform is of size 2 or not, so I think that the second
133 * formula is more correct, using proportions of 43:68 for 2 and
134 * non-2 factors. So I will use the more complicated formula.
135 * However, I realize that the actual numbers are wrong for today's
136 * architectures, and the relative proportions may be wrong as well.
137 *
138 * W. H. F. Smith, 26 February 1992.
139 * */
140
141 unsigned int n_factors, i, sum2, sumnot2, nnot2;
142 uint64_t nonsymx, nonsymy, nonsym, storage, ntotal;
143 double err_scale;
144
145 /* Find workspace needed. First find non_symmetric factors in n_columns, n_rows */
146 n_factors = gmt_get_prime_factors (GMT, n_columns, f);
147 nonsymx = gmtfft_get_non_symmetric_f (f, n_factors);
148 n_factors = gmt_get_prime_factors (GMT, n_rows, f);
149 nonsymy = gmtfft_get_non_symmetric_f (f, n_factors);
150 nonsym = MAX (nonsymx, nonsymy);
151
152 /* Now get factors of ntotal */
153 ntotal = gmt_M_get_nm (GMT, n_columns, n_rows);
154 n_factors = gmt_get_prime_factors (GMT, ntotal, f);
155 storage = MAX (nonsym, f[n_factors-1]);
156 *s = (storage == 2) ? 0 : storage;
157
158 /* Now find time and error estimates */
159
160 err_scale = 0.0;
161 sum2 = sumnot2 = nnot2 = 0;
162 for(i = 0; i < n_factors; i++) {
163 if (f[i] == 2)
164 sum2 += f[i];
165 else {
166 sumnot2 += f[i];
167 nnot2++;
168 }
169 err_scale += pow ((double)f[i], 1.5);
170 }
171 *t = 1.0e-06 * (3000.0 + ntotal * (500.0 + 43.0 * sum2 + 68.0 * sumnot2 + 320.0 * nnot2));
172 *r = err_scale * 3.0 * pow (2.0, -FSIGNIF);
173
174 return;
175 }
176
gmtlib_suggest_fft_dim(struct GMT_CTRL * GMT,unsigned int n_columns,unsigned int n_rows,struct GMT_FFT_SUGGESTION * fft_sug,bool do_print)177 void gmtlib_suggest_fft_dim (struct GMT_CTRL *GMT, unsigned int n_columns, unsigned int n_rows, struct GMT_FFT_SUGGESTION *fft_sug, bool do_print) {
178 unsigned int f[32], xstop, ystop;
179 unsigned int nx_best_t, ny_best_t;
180 unsigned int nx_best_e, ny_best_e;
181 unsigned int nx_best_s, ny_best_s;
182 unsigned int nxg, nyg; /* Guessed by this routine */
183 unsigned int nx2, ny2, nx3, ny3, nx5, ny5; /* For powers */
184 size_t current_space, best_space, e_space, t_space, given_space;
185 double current_time, best_time, given_time, s_time, e_time;
186 double current_err, best_err, given_err, s_err, t_err;
187
188 gmtlib_fourt_stats (GMT, n_columns, n_rows, f, &given_err, &given_space, &given_time);
189 given_space += n_columns * n_rows;
190 given_space *= 8;
191 if (do_print)
192 GMT_Report (GMT->parent, GMT_MSG_INFORMATION, " Data dimension\t%d %d\ttime factor %.8g\trms error %.8e\tbytes %" PRIuS "\n", n_columns, n_rows, given_time, given_err, given_space);
193
194 best_err = s_err = t_err = given_err;
195 best_time = s_time = e_time = given_time;
196 best_space = t_space = e_space = given_space;
197 nx_best_e = nx_best_t = nx_best_s = n_columns;
198 ny_best_e = ny_best_t = ny_best_s = n_rows;
199
200 xstop = 2 * n_columns;
201 ystop = 2 * n_rows;
202
203 for (nx2 = 2; nx2 <= xstop; nx2 *= 2) {
204 for (nx3 = 1; nx3 <= xstop; nx3 *= 3) {
205 for (nx5 = 1; nx5 <= xstop; nx5 *= 5) {
206 nxg = nx2 * nx3 * nx5;
207 if (nxg < n_columns || nxg > xstop) continue;
208
209 for (ny2 = 2; ny2 <= ystop; ny2 *= 2) {
210 for (ny3 = 1; ny3 <= ystop; ny3 *= 3) {
211 for (ny5 = 1; ny5 <= ystop; ny5 *= 5) {
212 nyg = ny2 * ny3 * ny5;
213 if (nyg < n_rows || nyg > ystop) continue;
214
215 gmtlib_fourt_stats (GMT, nxg, nyg, f, ¤t_err, ¤t_space, ¤t_time);
216 current_space += nxg*nyg;
217 current_space *= 8;
218 if (current_err < best_err) {
219 best_err = current_err;
220 nx_best_e = nxg;
221 ny_best_e = nyg;
222 e_time = current_time;
223 e_space = current_space;
224 }
225 if (current_time < best_time) {
226 best_time = current_time;
227 nx_best_t = nxg;
228 ny_best_t = nyg;
229 t_err = current_err;
230 t_space = current_space;
231 }
232 if (current_space < best_space) {
233 best_space = current_space;
234 nx_best_s = nxg;
235 ny_best_s = nyg;
236 s_time = current_time;
237 s_err = current_err;
238 }
239
240 }
241 }
242 }
243
244 }
245 }
246 }
247
248 if (do_print) {
249 GMT_Report (GMT->parent, GMT_MSG_INFORMATION, " Highest speed\t%d %d\ttime factor %.8g\trms error %.8e\tbytes %" PRIuS "\n",
250 nx_best_t, ny_best_t, best_time, t_err, t_space);
251 GMT_Report (GMT->parent, GMT_MSG_INFORMATION, " Most accurate\t%d %d\ttime factor %.8g\trms error %.8e\tbytes %" PRIuS "\n",
252 nx_best_e, ny_best_e, e_time, best_err, e_space);
253 GMT_Report (GMT->parent, GMT_MSG_INFORMATION, " Least storage\t%d %d\ttime factor %.8g\trms error %.8e\tbytes %" PRIuS "\n",
254 nx_best_s, ny_best_s, s_time, s_err, best_space);
255 }
256 /* Fastest solution */
257 fft_sug[GMT_FFT_FAST].n_columns = nx_best_t;
258 fft_sug[GMT_FFT_FAST].n_rows = ny_best_t;
259 fft_sug[GMT_FFT_FAST].worksize = (t_space/8) - (nx_best_t * ny_best_t);
260 fft_sug[GMT_FFT_FAST].totalbytes = t_space;
261 fft_sug[GMT_FFT_FAST].run_time = best_time;
262 fft_sug[GMT_FFT_FAST].rms_rel_err = t_err;
263 /* Most accurate solution */
264 fft_sug[GMT_FFT_ACCURATE].n_columns = nx_best_e;
265 fft_sug[GMT_FFT_ACCURATE].n_rows = ny_best_e;
266 fft_sug[GMT_FFT_ACCURATE].worksize = (e_space/8) - (nx_best_e * ny_best_e);
267 fft_sug[GMT_FFT_ACCURATE].totalbytes = e_space;
268 fft_sug[GMT_FFT_ACCURATE].run_time = e_time;
269 fft_sug[GMT_FFT_ACCURATE].rms_rel_err = best_err;
270 /* Least storage solution */
271 fft_sug[GMT_FFT_STORAGE].n_columns = nx_best_s;
272 fft_sug[GMT_FFT_STORAGE].n_rows = ny_best_s;
273 fft_sug[GMT_FFT_STORAGE].worksize = (best_space/8) - (nx_best_s * ny_best_s);
274 fft_sug[GMT_FFT_STORAGE].totalbytes = best_space;
275 fft_sug[GMT_FFT_STORAGE].run_time = s_time;
276 fft_sug[GMT_FFT_STORAGE].rms_rel_err = s_err;
277
278 return;
279 }
280
gmtfft_kx(uint64_t k,struct GMT_FFT_WAVENUMBER * K)281 GMT_LOCAL double gmtfft_kx (uint64_t k, struct GMT_FFT_WAVENUMBER *K) {
282 /* Return the value of kx given k,
283 * where kx = 2 pi / lambda x,
284 * and k refers to the position
285 * in the complex data array Grid->data[k]. */
286
287 int64_t ii = (k/2)%(K->nx2);
288 if (ii > (K->nx2)/2) ii -= (K->nx2);
289 return (ii * K->delta_kx);
290 }
291
gmtfft_ky(uint64_t k,struct GMT_FFT_WAVENUMBER * K)292 GMT_LOCAL double gmtfft_ky (uint64_t k, struct GMT_FFT_WAVENUMBER *K) {
293 /* Return the value of ky given k,
294 * where ky = 2 pi / lambda y,
295 * and k refers to the position
296 * in the complex data array Grid->data[k]. */
297
298 int64_t jj = (k/2)/(K->nx2);
299 if (jj > (K->ny2)/2) jj -= (K->ny2);
300 return (jj * K->delta_ky);
301 }
302
gmtfft_kr(uint64_t k,struct GMT_FFT_WAVENUMBER * K)303 GMT_LOCAL double gmtfft_kr (uint64_t k, struct GMT_FFT_WAVENUMBER *K) {
304 /* Return the value of sqrt(kx*kx + ky*ky),
305 * where k refers to the position
306 * in the complex data array Grid->data[k]. */
307
308 return (hypot (gmtfft_kx (k, K), gmtfft_ky (k, K)));
309 }
310
gmt_fft_get_wave(uint64_t k,struct GMT_FFT_WAVENUMBER * K)311 double gmt_fft_get_wave (uint64_t k, struct GMT_FFT_WAVENUMBER *K) {
312 /* Return the value of kx, ky. or kr,
313 * where k refers to the position
314 * in the complex data array Grid->data[k].
315 * GMT_fft_init sets the pointer */
316
317 return (K->k_ptr (k, K));
318 }
319
gmt_fft_any_wave(uint64_t k,unsigned int mode,struct GMT_FFT_WAVENUMBER * K)320 double gmt_fft_any_wave (uint64_t k, unsigned int mode, struct GMT_FFT_WAVENUMBER *K) {
321 /* Lets you specify which wavenumber you want */
322 double wave = 0.0;
323
324 switch (mode) { /* Select which wavenumber we need */
325 case GMT_FFT_K_IS_KX: wave = gmtfft_kx (k, K); break;
326 case GMT_FFT_K_IS_KY: wave = gmtfft_ky (k, K); break;
327 case GMT_FFT_K_IS_KR: wave = gmtfft_kr (k, K); break;
328 }
329 return (wave);
330 }
331
gmt_fft_set_wave(struct GMT_CTRL * GMT,unsigned int mode,struct GMT_FFT_WAVENUMBER * K)332 int gmt_fft_set_wave (struct GMT_CTRL *GMT, unsigned int mode, struct GMT_FFT_WAVENUMBER *K) {
333 /* Change wavenumber selection */
334 switch (mode) { /* Select which wavenumber we need */
335 case GMT_FFT_K_IS_KX: K->k_ptr = gmtfft_kx; break;
336 case GMT_FFT_K_IS_KY: K->k_ptr = gmtfft_ky; break;
337 case GMT_FFT_K_IS_KR: K->k_ptr = gmtfft_kr; break;
338 default:
339 GMT_Report (GMT->parent, GMT_MSG_ERROR, "Bad mode passed to gmt_fft_set_wave (%u)!\n", mode);
340 return GMT_RUNTIME_ERROR;
341 break;
342 }
343 return GMT_OK;
344 }
345
346 /*! . */
GMT_FFT_Wavenumber(void * V_API,uint64_t k,unsigned int mode,void * v_K)347 double GMT_FFT_Wavenumber (void *V_API, uint64_t k, unsigned int mode, void *v_K) {
348 /* Lets you specify which 1-D or 2-D wavenumber you want */
349 struct GMT_FFT_WAVENUMBER *K = gmtfft_get_fftwave_ptr (v_K);
350 gmt_M_unused(V_API);
351 if (K->dim == 2) return (gmt_fft_any_wave (k, mode, K));
352 else return (gmtfft_kx (k, K));
353 }
354
355 #ifdef HAVE_FFTW3F
356
357 #include <fftw3.h>
358 #ifdef _WIN32
359 #include <winsock.h>
360 #endif
361
362 #define FFTWF_WISDOM_FILENAME "fftwf_wisdom"
363
gmtfft_fftwf_wisdom_filename(struct GMT_CTRL * GMT)364 GMT_LOCAL char *gmtfft_fftwf_wisdom_filename (struct GMT_CTRL *GMT) {
365 static char wisdom_file[PATH_MAX+256] = "\0";
366 char hostname[257];
367 if (*wisdom_file == '\0') { /* wisdom_file has not been set yet */
368 if (GMT->session.CACHEDIR == NULL || access (GMT->session.CACHEDIR, R_OK|W_OK|X_OK))
369 /* CACHEDIR does not exist, or not writable */
370 return NULL;
371 else {
372 /* create wisdom file in CACHEDIR */
373 strncpy (wisdom_file, GMT->session.CACHEDIR, PATH_MAX);
374 strcat (wisdom_file, "/" FFTWF_WISDOM_FILENAME "_");
375 /* cat hostname */
376 memset (hostname, '\0', 257); /* in case gethostname does not null-terminate string */
377 gethostname (hostname, 256);
378 strcat (wisdom_file, hostname);
379 }
380 }
381 return wisdom_file;
382 }
383
384 /* Wrapper around fftwf_import_wisdom_from_filename */
gmtfft_fftwf_import_wisdom_from_filename(struct GMT_CTRL * GMT)385 GMT_LOCAL void gmtfft_fftwf_import_wisdom_from_filename (struct GMT_CTRL *GMT) {
386 static bool already_imported = false;
387 char *filenames[3], **filename = filenames;
388 int status;
389
390 if (already_imported) /* nothing to do */
391 return;
392
393 fftwf_import_system_wisdom (); /* read wisdom from implementation-defined standard file */
394
395 /* Initialize filenames */
396 filenames[0] = FFTWF_WISDOM_FILENAME; /* 1st try importing wisdom from file in current dir */
397 filenames[1] = gmtfft_fftwf_wisdom_filename(GMT); /* 2nd try wisdom file in CACHEDIR */
398 filenames[2] = NULL; /* end of array */
399
400 while (*filename != NULL) {
401 if (!access (*filename, R_OK)) {
402 status = fftwf_import_wisdom_from_filename (*filename);
403 if (status)
404 GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "Imported FFTW Wisdom from file: %s\n", *filename);
405 else
406 GMT_Report (GMT->parent, GMT_MSG_ERROR, "Importing FFTW Wisdom from file failed: %s\n", *filename);
407 }
408 ++filename; /* advance to next file in array */
409 }
410
411 already_imported = true;
412 }
413
414 /* Wrapper around fftwf_export_wisdom_to_filename */
gmtfft_fftwf_export_wisdom_to_filename(struct GMT_CTRL * GMT)415 GMT_LOCAL void gmtfft_fftwf_export_wisdom_to_filename (struct GMT_CTRL *GMT) {
416 char *filename = gmtfft_fftwf_wisdom_filename(GMT);
417 int status;
418
419 if (filename == NULL)
420 /* CACHEDIR does not exist, write wisdom to file in current directory */
421 filename = FFTWF_WISDOM_FILENAME;
422
423 status = fftwf_export_wisdom_to_filename (filename);
424 if (status)
425 GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "Exported FFTW Wisdom to file: %s\n", filename);
426 else
427 GMT_Report (GMT->parent, GMT_MSG_ERROR, "Exporting FFTW Wisdom to file failed: %s\n", filename);
428 }
429
gmtfft_gmt_fftwf_plan_dft(struct GMT_CTRL * GMT,unsigned n_rows,unsigned n_columns,fftwf_complex * data,int direction)430 GMT_LOCAL fftwf_plan gmtfft_gmt_fftwf_plan_dft(struct GMT_CTRL *GMT, unsigned n_rows, unsigned n_columns, fftwf_complex *data, int direction) {
431 /* The first two arguments, n0 and n1, are the size of the two-dimensional
432 * transform you are trying to compute. The size n can be any positive
433 * integer, but sizes that are products of small factors are transformed
434 * most efficiently.
435 *
436 * The next two arguments are pointers to the input and output arrays of the
437 * transform. These pointers can be equal, indicating an in-place transform.
438 *
439 * The fourth argument, sign, can be either FFTW_FORWARD (-1) or
440 * FFTW_BACKWARD (+1), and indicates the direction of the transform you are
441 * interested in; technically, it is the sign of the exponent in the
442 * transform.
443 *
444 * The flags argument is usually either FFTW_MEASURE or FFTW_ESTIMATE.
445 * FFTW_MEASURE instructs FFTW to run and measure the execution time of
446 * several FFTs in order to find the best way to compute the transform of
447 * size n. This process takes some time (usually a few seconds), depending
448 * on your machine and on the size of the transform.
449 *
450 * FFTW planner flags supported by the planner routines in FFTW
451 * FFTW_ESTIMATE: pick a (probably sub-optimal) plan quickly
452 * FFTW_MEASURE: find optimal plan by computing several FFTs and measuring
453 * their execution time
454 * FFTW_PATIENT: like FFTW_MEASURE, but considers a wider range of algorithms
455 * FFTW_EXHAUSTIVE: like FFTW_PATIENT, but considers an even wider range of
456 * algorithms
457 *
458 * Important: the planner overwrites the input array during planning unless
459 * a saved plan (see Wisdom) is available for that problem, so you should
460 * initialize your input data after creating the plan. The only exceptions
461 * to this are the FFTW_ESTIMATE and FFTW_WISDOM_ONLY flags. */
462
463 int sign;
464 fftwf_complex *cin, *cout;
465 fftwf_plan plan = NULL;
466
467 sign = direction == GMT_FFT_FWD ? FFTW_FORWARD : FFTW_BACKWARD;
468 cin = data;
469 cout = cin; /* in-place transform */
470
471 if (GMT->current.setting.fftw_plan != FFTW_ESTIMATE) {
472 gmtfft_fftwf_import_wisdom_from_filename (GMT);
473 if (n_rows == 0) /* 1d DFT */
474 plan = fftwf_plan_dft_1d(n_columns, cin, cout, sign, FFTW_WISDOM_ONLY | GMT->current.setting.fftw_plan);
475 else /* 2d DFT */
476 plan = fftwf_plan_dft_2d(n_rows, n_columns, cin, cout, sign, FFTW_WISDOM_ONLY | GMT->current.setting.fftw_plan);
477 if (plan == NULL) {
478 /* No Wisdom available
479 * Need extra memory to prevent overwriting data while planning */
480 fftwf_complex *in_place_tmp = fftwf_malloc (2 * (n_rows == 0 ? 1 : n_rows) * n_columns * sizeof(gmt_grdfloat));
481 GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "Generating new FFTW Wisdom, be patient...\n");
482 if (n_rows == 0) /* 1d DFT */
483 plan = fftwf_plan_dft_1d(n_columns, in_place_tmp, in_place_tmp, sign, GMT->current.setting.fftw_plan);
484 else /* 2d DFT */
485 plan = fftwf_plan_dft_2d(n_rows, n_columns, in_place_tmp, in_place_tmp, sign, GMT->current.setting.fftw_plan);
486 fftwf_destroy_plan(plan); /* deallocate plan */
487 plan = NULL;
488 fftwf_free (in_place_tmp);
489 /* Save new Wisdom */
490 gmtfft_fftwf_export_wisdom_to_filename (GMT);
491 }
492 else
493 GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "Using preexisting FFTW Wisdom.\n");
494 } /* GMT->current.setting.fftw_plan != FFTW_ESTIMATE */
495 else
496 GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "Picking a (probably sub-optimal) FFTW plan quickly.\n");
497
498 if (plan == NULL) { /* If either FFTW_ESTIMATE or new Wisdom generated */
499 if (n_rows == 0) /* 1d DFT */
500 plan = fftwf_plan_dft_1d(n_columns, cin, cout, sign, GMT->current.setting.fftw_plan);
501 else /* 2d DFT */
502 plan = fftwf_plan_dft_2d(n_rows, n_columns, cin, cout, sign, GMT->current.setting.fftw_plan);
503 }
504
505 if (plan == NULL) /* There was a problem creating a plan */
506 GMT_Report (GMT->parent, GMT_MSG_ERROR, "Could not create FFTW plan!\n");
507
508 return plan;
509 }
510
gmtfft_1d_fftwf(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n,int direction,unsigned int mode)511 GMT_LOCAL int gmtfft_1d_fftwf (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n, int direction, unsigned int mode) {
512 fftwf_plan plan = NULL;
513 gmt_M_unused(mode);
514
515 /* Generate FFTW plan for complex 1d DFT */
516 plan = gmtfft_gmt_fftwf_plan_dft(GMT, 0, n, (fftwf_complex*)data, direction);
517 fftwf_execute(plan); /* do transform */
518 fftwf_destroy_plan(plan); /* deallocate plan */
519
520 return GMT_NOERROR;
521 }
522
gmtfft_2d_fftwf(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n_columns,unsigned int n_rows,int direction,unsigned int mode)523 GMT_LOCAL int gmtfft_2d_fftwf (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n_columns, unsigned int n_rows, int direction, unsigned int mode) {
524 fftwf_plan plan = NULL;
525 gmt_M_unused(mode);
526
527 /* Generate FFTW plan for complex 2d DFT */
528 plan = gmtfft_gmt_fftwf_plan_dft(GMT, n_rows, n_columns, (fftwf_complex*)data, direction);
529 fftwf_execute(plan); /* do transform */
530 fftwf_destroy_plan(plan); /* deallocate plan */
531
532 return GMT_NOERROR;
533 }
534
535 #endif /* HAVE_FFTW3F */
536
537 #ifdef __APPLE__ /* Accelerate framework */
538
gmtfft_gmtfft_1d_vDSP_reset(struct GMT_FFT_HIDDEN * Z)539 GMT_LOCAL void gmtfft_gmtfft_1d_vDSP_reset (struct GMT_FFT_HIDDEN *Z) {
540 if (Z->setup_1d) { /* Free single-precision FFT data structure and arrays */
541 vDSP_destroy_fftsetup (Z->setup_1d);
542 gmt_M_str_free (Z->dsp_split_complex_1d.realp);
543 gmt_M_str_free (Z->dsp_split_complex_1d.imagp);
544 }
545 }
546
gmtfft_1d_vDSP(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n,int direction,unsigned int mode)547 GMT_LOCAL int gmtfft_1d_vDSP (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n, int direction, unsigned int mode) {
548 FFTDirection fft_direction = direction == GMT_FFT_FWD ?
549 kFFTDirection_Forward : kFFTDirection_Inverse;
550 DSPComplex *dsp_complex = (DSPComplex *)data;
551
552 /* Base 2 exponent that specifies the largest power of
553 * two that can be processed by fft: */
554 vDSP_Length log2n = gmtfft_radix2 (n);
555 gmt_M_unused(mode);
556
557 if (log2n == 0) {
558 GMT_Report (GMT->parent, GMT_MSG_ERROR, "Need Radix-2 input try: %u [n]\n", 1U<<gmtfft_propose_radix2 (n));
559 return -1;
560 }
561
562 if (GMT->current.fft.n_1d != n) { /* Must update the FFT setup arrays */
563 /* Build data structure that contains precalculated data for use by
564 * single-precision FFT functions: */
565 gmtfft_gmtfft_1d_vDSP_reset (&GMT->current.fft);
566 GMT->current.fft.setup_1d = vDSP_create_fftsetup (log2n, kFFTRadix2);
567 GMT->current.fft.dsp_split_complex_1d.realp = calloc (n, sizeof(gmt_grdfloat));
568 GMT->current.fft.dsp_split_complex_1d.imagp = calloc (n, sizeof(gmt_grdfloat));
569 if (GMT->current.fft.dsp_split_complex_1d.realp == NULL || GMT->current.fft.dsp_split_complex_1d.imagp == NULL) {
570 GMT_Report (GMT->parent, GMT_MSG_ERROR, "Unable to allocate dsp_split_complex array of length %u\n", n);
571 return -1; /* out of memory */
572 }
573 GMT->current.fft.n_1d = n;
574 }
575 vDSP_ctoz (dsp_complex, 2, &GMT->current.fft.dsp_split_complex_1d, 1, n);
576
577 vDSP_fft_zip (GMT->current.fft.setup_1d, &GMT->current.fft.dsp_split_complex_1d, 1, log2n, fft_direction);
578
579 vDSP_ztoc (&GMT->current.fft.dsp_split_complex_1d, 1, dsp_complex, 2, n);
580
581 return GMT_NOERROR;
582 }
583
gmtfft_gmtfft_2d_vDSP_reset(struct GMT_FFT_HIDDEN * Z)584 GMT_LOCAL void gmtfft_gmtfft_2d_vDSP_reset (struct GMT_FFT_HIDDEN *Z) {
585 if (Z->setup_2d) { /* Free single-precision 2D FFT data structure and arrays */
586 vDSP_destroy_fftsetup (Z->setup_2d);
587 gmt_M_str_free (Z->dsp_split_complex_2d.realp);
588 gmt_M_str_free (Z->dsp_split_complex_2d.imagp);
589 }
590 }
591
gmtfft_2d_vDSP(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n_columns,unsigned int n_rows,int direction,unsigned int mode)592 GMT_LOCAL int gmtfft_2d_vDSP (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n_columns, unsigned int n_rows, int direction, unsigned int mode) {
593 FFTDirection fft_direction = direction == GMT_FFT_FWD ?
594 kFFTDirection_Forward : kFFTDirection_Inverse;
595 DSPComplex *dsp_complex = (DSPComplex *)data;
596
597 /* Base 2 exponent that specifies the largest power of
598 * two that can be processed by fft: */
599 vDSP_Length log2nx = gmtfft_radix2 (n_columns);
600 vDSP_Length log2ny = gmtfft_radix2 (n_rows);
601 unsigned int n_xy = n_columns * n_rows;
602 gmt_M_unused(mode);
603
604 if (log2nx == 0 || log2ny == 0) {
605 GMT_Report (GMT->parent, GMT_MSG_ERROR, "Need Radix-2 input try: %u/%u [n_columns/n_rows]\n",
606 1U<<gmtfft_propose_radix2 (n_columns), 1U<<gmtfft_propose_radix2 (n_rows));
607 return -1;
608 }
609
610 if (GMT->current.fft.n_2d != n_xy) { /* Must update the 2-D FFT setup arrays */
611 /* Build data structure that contains precalculated data for use by
612 * single-precision FFT functions: */
613 gmtfft_gmtfft_2d_vDSP_reset (&GMT->current.fft);
614 GMT->current.fft.setup_2d = vDSP_create_fftsetup (MAX (log2nx, log2ny), kFFTRadix2);
615 GMT->current.fft.dsp_split_complex_2d.realp = calloc (n_xy, sizeof(gmt_grdfloat));
616 GMT->current.fft.dsp_split_complex_2d.imagp = calloc (n_xy, sizeof(gmt_grdfloat));
617 if (GMT->current.fft.dsp_split_complex_2d.realp == NULL || GMT->current.fft.dsp_split_complex_2d.imagp == NULL) {
618 GMT_Report (GMT->parent, GMT_MSG_ERROR, "Unable to allocate dsp_split_complex array of length %u\n", n_xy);
619 return -1; /* out of memory */
620 }
621 GMT->current.fft.n_2d = n_xy;
622 }
623 vDSP_ctoz (dsp_complex, 2, &GMT->current.fft.dsp_split_complex_2d, 1, n_xy);
624
625 /* complex: */
626 /* PW note 10/26/2014: We used to pass log2ny, log2nx to vDSP_fft2d_zipbut that gave bad results for n_columns != n_rows.
627 * I assume this is because Accelerate expects columns but we pass rows. Now matches KISS, FFTW, etc. */
628 vDSP_fft2d_zip (GMT->current.fft.setup_2d, &GMT->current.fft.dsp_split_complex_2d, 1, 0, log2nx, log2ny, fft_direction);
629 /* real:
630 vDSP_fft2d_zrip (setup, &GMT->current.fft.dsp_split_complex_2d, 1, 0, log2nx, log2ny, fft_direction); */
631
632 vDSP_ztoc (&GMT->current.fft.dsp_split_complex_2d, 1, dsp_complex, 2, n_xy);
633
634 return GMT_NOERROR;
635 }
636 #endif /* APPLE Accelerate framework */
637
638 /* Kiss FFT */
639
640 #include "kiss_fft/kiss_fftnd.h"
641
gmtfft_1d_kiss(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n,int direction,unsigned int mode)642 GMT_LOCAL int gmtfft_1d_kiss (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n, int direction, unsigned int mode) {
643 kiss_fft_cpx *fin, *fout;
644 kiss_fft_cfg config;
645 gmt_M_unused(GMT); gmt_M_unused(mode);
646
647 /* Initialize a FFT (or IFFT) config/state data structure */
648 config = kiss_fft_alloc(n, direction == GMT_FFT_INV, NULL, NULL);
649 fin = fout = (kiss_fft_cpx *)data;
650 kiss_fft (config, fin, fout); /* do transform */
651 gmt_M_str_free (config); /* Free config data structure */
652
653 return GMT_NOERROR;
654 }
655
gmtfft_2d_kiss(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n_columns,unsigned int n_rows,int direction,unsigned int mode)656 GMT_LOCAL int gmtfft_2d_kiss (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n_columns, unsigned int n_rows, int direction, unsigned int mode) {
657 const int dim[2] = {n_rows, n_columns}; /* dimensions of fft */
658 const int dimcount = 2; /* number of dimensions */
659 kiss_fft_cpx *fin, *fout;
660 kiss_fftnd_cfg config;
661 gmt_M_unused(GMT); gmt_M_unused(mode);
662
663 /* Initialize a FFT (or IFFT) config/state data structure */
664 config = kiss_fftnd_alloc (dim, dimcount, direction == GMT_FFT_INV, NULL, NULL);
665
666 fin = fout = (kiss_fft_cpx *)data;
667 kiss_fftnd (config, fin, fout); /* do transform */
668 gmt_M_str_free (config); /* Free config data structure */
669
670 return GMT_NOERROR;
671 }
672
673
gmtfft_brenner_fourt_f(gmt_grdfloat * data,int * nn,int * ndim,int * ksign,int * iform,gmt_grdfloat * work)674 GMT_LOCAL int gmtfft_brenner_fourt_f (gmt_grdfloat *data, int *nn, int *ndim, int *ksign, int *iform, gmt_grdfloat *work) {
675
676 /* System generated locals */
677 int i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9, i__10, i__11, i__12;
678
679 /* Builtin functions */
680
681 double cos(double), sin(double);
682
683 /* Local variables */
684
685 static int j1rg2, idiv, irem, ipar, kmin, imin, jmin, lmax, mmax, imax, jmax;
686 static int ntwo, j1cnj, np1hf, np2hf, j1min, i1max, i1rng, j1rng, j2min, j3max;
687 static int j1max, j2max, i2max, non2t, j2stp, i, j, k, l, m, n, icase, ifact[32];
688 static int nhalf, krang, kconj, kdif, idim, ntot, kstep, k2, k3, k4, nprev, iquot;
689 static int i2, i1, i3, j3, k1, j2, j1, if_, np0, np1, np2, ifp1, ifp2, non2;
690
691 static gmt_grdfloat theta, oldsi, tempi, oldsr, sinth, difi, difr, sumi, sumr, tempr, twopi;
692 static gmt_grdfloat wstpi, wstpr, twowr, wi, wr, u1i, w2i, w3i, u2i, u3i, u4i, t2i, u1r;
693 static gmt_grdfloat u2r, u3r, w2r, w3r, u4r, t2r, t3r, t3i, t4r, t4i;
694 static double wrd, wid;
695
696 /*---------------------------------------------------------------------------
697 ARGUMENTS :
698 DATA - COMPLEX ARRAY, LENGTH NN
699 NN - ARRAY OF NUMBER OF POINTS IN EACH DIMENSION
700 NDIM - NUMBER OF DIMENSIONS (FOR OUR PURPOSES, NDIM=1)
701 KSIGN - +1 FOR INVERSE TRANSFORM (FREQ TO GMT_TIME DOMAIN)
702 -1 FOR FORWARD TRANSFORM (GMT_TIME TO FREQ DOMAIN)
703 IFORM - 0 REAL DATA
704 +1 COMPLEX DATA
705 WORK - 0 IF ALL DIMENSIONS ARE RADIX 2
706 COMPLEX ARRAY, LARGE AS LARGEST NON-RADIX 2 DIMENSI0N
707
708 PROGRAM BY NORMAN BRENNER FROM THE BASIC PROGRAM BY CHARLES
709 RADER. RALPH ALTER SUGGESTED THE IDEA FOR THE DIGIT REVERSAL.
710 MIT LINCOLN LABORATORY, AUGUST 1967.
711
712 ---------------------------------------------------------------------------*/
713
714 /* Parameter adjustments */
715 if (work) --work;
716 --nn;
717 --data;
718
719 /* Function Body */
720 wr = wi = wstpr = wstpi = (gmt_grdfloat)0.0;
721 twopi = (gmt_grdfloat)6.283185307;
722 if (*ndim - 1 >= 0) {
723 goto L1;
724 } else {
725 goto L920;
726 }
727 L1:
728 ntot = 2;
729 i__1 = *ndim;
730 for (idim = 1; idim <= i__1; ++idim) {
731 if (nn[idim] <= 0) {
732 goto L920;
733 } else {
734 goto L2;
735 }
736 L2:
737 ntot *= nn[idim];
738 }
739 np1 = 2;
740 i__1 = *ndim;
741 for (idim = 1; idim <= i__1; ++idim) {
742 n = nn[idim];
743 np2 = np1 * n;
744 if ((i__2 = n - 1) < 0) {
745 goto L920;
746 } else if (i__2 == 0) {
747 goto L900;
748 } else {
749 goto L5;
750 }
751 L5:
752 m = n;
753 ntwo = np1;
754 if_ = 1;
755 idiv = 2;
756 L10:
757 iquot = m / idiv;
758 irem = m - idiv * iquot;
759 if (iquot - idiv >= 0) {
760 goto L11;
761 } else {
762 goto L50;
763 }
764 L11:
765 if (irem != 0) {
766 goto L20;
767 } else {
768 goto L12;
769 }
770 L12:
771 ntwo += ntwo;
772 m = iquot;
773 goto L10;
774 L20:
775 idiv = 3;
776 L30:
777 iquot = m / idiv;
778 irem = m - idiv * iquot;
779 if (iquot - idiv >= 0) {
780 goto L31;
781 } else {
782 goto L60;
783 }
784 L31:
785 if (irem != 0) {
786 goto L40;
787 } else {
788 goto L32;
789 }
790 L32:
791 ifact[if_ - 1] = idiv;
792 ++if_;
793 m = iquot;
794 goto L30;
795 L40:
796 idiv += 2;
797 goto L30;
798 L50:
799 if (irem != 0) {
800 goto L60;
801 } else {
802 goto L51;
803 }
804 L51:
805 ntwo += ntwo;
806 goto L70;
807 L60:
808 ifact[if_ - 1] = m;
809 L70:
810 non2 = np1 * (np2 / ntwo);
811 icase = 1;
812 if (idim - 4 >= 0) {
813 goto L90;
814 } else {
815 goto L71;
816 }
817 L71:
818 if (*iform <= 0) {
819 goto L72;
820 } else {
821 goto L90;
822 }
823 L72:
824 icase = 2;
825 if (idim - 1 <= 0) {
826 goto L73;
827 } else {
828 goto L90;
829 }
830 L73:
831 icase = 3;
832 if (ntwo - np1 <= 0) {
833 goto L90;
834 } else {
835 goto L74;
836 }
837 L74:
838 icase = 4;
839 ntwo /= 2;
840 n /= 2;
841 np2 /= 2;
842 ntot /= 2;
843 i = 3;
844 i__2 = ntot;
845 for (j = 2; j <= i__2; ++j) {
846 data[j] = data[i];
847 i += 2;
848 }
849 L90:
850 i1rng = np1;
851 if (icase - 2 != 0) {
852 goto L100;
853 } else {
854 goto L95;
855 }
856 L95:
857 i1rng = np0 * (nprev / 2 + 1);
858 L100:
859 if (ntwo - np1 <= 0) {
860 goto L600;
861 } else {
862 goto L110;
863 }
864 L110:
865 np2hf = np2 / 2;
866 j = 1;
867 i__2 = np2;
868 i__3 = non2;
869 for (i2 = 1; i__3 < 0 ? i2 >= i__2 : i2 <= i__2; i2 += i__3) {
870 if (j - i2 >= 0) {
871 goto L130;
872 } else {
873 goto L120;
874 }
875 L120:
876 i1max = i2 + non2 - 2;
877 i__4 = i1max;
878 for (i1 = i2; i1 <= i__4; i1 += 2) {
879 i__5 = ntot;
880 i__6 = np2;
881 for (i3 = i1; i__6 < 0 ? i3 >= i__5 : i3 <= i__5; i3 += i__6) {
882 j3 = j + i3 - i2;
883 tempr = data[i3];
884 tempi = data[i3 + 1];
885 data[i3] = data[j3];
886 data[i3 + 1] = data[j3 + 1];
887 data[j3] = tempr;
888 data[j3 + 1] = tempi;
889 }
890 }
891 L130:
892 m = np2hf;
893 L140:
894 if (j - m <= 0) {
895 goto L150;
896 } else {
897 goto L145;
898 }
899 L145:
900 j -= m;
901 m /= 2;
902 if (m - non2 >= 0) {
903 goto L140;
904 } else {
905 goto L150;
906 }
907 L150:
908 j += m;
909 }
910 non2t = non2 + non2;
911 ipar = ntwo / np1;
912 L310:
913 if ((i__3 = ipar - 2) < 0) {
914 goto L350;
915 } else if (i__3 == 0) {
916 goto L330;
917 } else {
918 goto L320;
919 }
920 L320:
921 ipar /= 4;
922 goto L310;
923 L330:
924 i__3 = i1rng;
925 for (i1 = 1; i1 <= i__3; i1 += 2) {
926 i__2 = non2;
927 i__6 = np1;
928 for (j3 = i1; i__6 < 0 ? j3 >= i__2 : j3 <= i__2; j3 += i__6) {
929 i__5 = ntot;
930 i__4 = non2t;
931 for (k1 = j3; i__4 < 0 ? k1 >= i__5 : k1 <= i__5; k1 += i__4) {
932 k2 = k1 + non2;
933 tempr = data[k2];
934 tempi = data[k2 + 1];
935 data[k2] = data[k1] - tempr;
936 data[k2 + 1] = data[k1 + 1] - tempi;
937 data[k1] += tempr;
938 data[k1 + 1] += tempi;
939 }
940 }
941 }
942 L350:
943 mmax = non2;
944 L360:
945 if (mmax - np2hf >= 0) {
946 goto L600;
947 } else {
948 goto L370;
949 }
950 L370:
951 /* Computing MAX */
952 i__4 = non2t, i__5 = mmax / 2;
953 lmax = MAX(i__4,i__5);
954 if (mmax - non2 <= 0) {
955 goto L405;
956 } else {
957 goto L380;
958 }
959 L380:
960 theta = -twopi * (gmt_grdfloat) non2 / (gmt_grdfloat) (mmax << 2);
961 if (*ksign >= 0) {
962 goto L390;
963 } else {
964 goto L400;
965 }
966 L390:
967 theta = -theta;
968 L400:
969 sincos ((double)theta, &wid, &wrd);
970 wr = (gmt_grdfloat)wrd;
971 wi = (gmt_grdfloat)wid;
972 wstpr = (gmt_grdfloat)-2.0 * wi * wi;
973 wstpi = (gmt_grdfloat)2.0 * wr * wi;
974 L405:
975 i__4 = lmax;
976 i__5 = non2t;
977 for (l = non2; i__5 < 0 ? l >= i__4 : l <= i__4; l += i__5) {
978 m = l;
979 if (mmax - non2 <= 0) {
980 goto L420;
981 } else {
982 goto L410;
983 }
984 L410:
985 w2r = wr * wr - wi * wi;
986 w2i = (gmt_grdfloat)(wr * 2.0 * wi);
987 w3r = w2r * wr - w2i * wi;
988 w3i = w2r * wi + w2i * wr;
989 L420:
990 i__6 = i1rng;
991 for (i1 = 1; i1 <= i__6; i1 += 2) {
992 i__2 = non2;
993 i__3 = np1;
994 for (j3 = i1; i__3 < 0 ? j3 >= i__2 : j3 <= i__2; j3 += i__3) {
995 kmin = j3 + ipar * m;
996 if (mmax - non2 <= 0) {
997 goto L430;
998 } else {
999 goto L440;
1000 }
1001 L430:
1002 kmin = j3;
1003 L440:
1004 kdif = ipar * mmax;
1005 L450:
1006 kstep = kdif << 2;
1007 i__7 = ntot;
1008 i__8 = kstep;
1009 for (k1 = kmin; i__8 < 0 ? k1 >= i__7 : k1 <= i__7; k1 += i__8) {
1010 k2 = k1 + kdif;
1011 k3 = k2 + kdif;
1012 k4 = k3 + kdif;
1013 if (mmax - non2 <= 0) {
1014 goto L460;
1015 } else {
1016 goto L480;
1017 }
1018 L460:
1019 u1r = data[k1] + data[k2];
1020 u1i = data[k1 + 1] + data[k2 + 1];
1021 u2r = data[k3] + data[k4];
1022 u2i = data[k3 + 1] + data[k4 + 1];
1023 u3r = data[k1] - data[k2];
1024 u3i = data[k1 + 1] - data[k2 + 1];
1025 if (*ksign >= 0) {
1026 goto L475;
1027 } else {
1028 goto L470;
1029 }
1030 L470:
1031 u4r = data[k3 + 1] - data[k4 + 1];
1032 u4i = data[k4] - data[k3];
1033 goto L510;
1034 L475:
1035 u4r = data[k4 + 1] - data[k3 + 1];
1036 u4i = data[k3] - data[k4];
1037 goto L510;
1038 L480:
1039 t2r = w2r * data[k2] - w2i * data[k2 + 1];
1040 t2i = w2r * data[k2 + 1] + w2i * data[k2];
1041 t3r = wr * data[k3] - wi * data[k3 + 1];
1042 t3i = wr * data[k3 + 1] + wi * data[k3];
1043 t4r = w3r * data[k4] - w3i * data[k4 + 1];
1044 t4i = w3r * data[k4 + 1] + w3i * data[k4];
1045 u1r = data[k1] + t2r;
1046 u1i = data[k1 + 1] + t2i;
1047 u2r = t3r + t4r;
1048 u2i = t3i + t4i;
1049 u3r = data[k1] - t2r;
1050 u3i = data[k1 + 1] - t2i;
1051 if (*ksign >= 0) {
1052 goto L500;
1053 } else {
1054 goto L490;
1055 }
1056 L490:
1057 u4r = t3i - t4i;
1058 u4i = t4r - t3r;
1059 goto L510;
1060 L500:
1061 u4r = t4i - t3i;
1062 u4i = t3r - t4r;
1063 L510:
1064 data[k1] = u1r + u2r;
1065 data[k1 + 1] = u1i + u2i;
1066 data[k2] = u3r + u4r;
1067 data[k2 + 1] = u3i + u4i;
1068 data[k3] = u1r - u2r;
1069 data[k3 + 1] = u1i - u2i;
1070 data[k4] = u3r - u4r;
1071 data[k4 + 1] = u3i - u4i;
1072 }
1073 kmin = ((kmin - j3) << 2) + j3;
1074 kdif = kstep;
1075 if (kdif - np2 >= 0) {
1076 goto L530;
1077 } else {
1078 goto L450;
1079 }
1080 L530:
1081 ;
1082 }
1083 }
1084 m = mmax - m;
1085 if (*ksign >= 0) {
1086 goto L550;
1087 } else {
1088 goto L540;
1089 }
1090 L540:
1091 tempr = wr;
1092 wr = -wi;
1093 wi = -tempr;
1094 goto L560;
1095 L550:
1096 tempr = wr;
1097 wr = wi;
1098 wi = tempr;
1099 L560:
1100 if (m - lmax <= 0) {
1101 goto L565;
1102 } else {
1103 goto L410;
1104 }
1105 L565:
1106 tempr = wr;
1107 wr = wr * wstpr - wi * wstpi + wr;
1108 wi = wi * wstpr + tempr * wstpi + wi;
1109 }
1110 ipar = 3 - ipar;
1111 mmax += mmax;
1112 goto L360;
1113 L600:
1114 if (ntwo - np2 >= 0) {
1115 goto L700;
1116 } else {
1117 goto L605;
1118 }
1119 L605:
1120 ifp1 = non2;
1121 if_ = 1;
1122 np1hf = np1 / 2;
1123 L610:
1124 ifp2 = ifp1 / ifact[if_ - 1];
1125 j1rng = np2;
1126 if (icase - 3 != 0) {
1127 goto L612;
1128 } else {
1129 goto L611;
1130 }
1131 L611:
1132 j1rng = (np2 + ifp1) / 2;
1133 j2stp = np2 / ifact[if_ - 1];
1134 j1rg2 = (j2stp + ifp2) / 2;
1135 L612:
1136 j2min = ifp2 + 1;
1137 if (ifp1 - np2 >= 0) {
1138 goto L640;
1139 } else {
1140 goto L615;
1141 }
1142 L615:
1143 i__5 = ifp1;
1144 i__4 = ifp2;
1145 for (j2 = j2min; i__4 < 0 ? j2 >= i__5 : j2 <= i__5; j2 += i__4) {
1146 theta = -twopi * (gmt_grdfloat) (j2 - 1) / (gmt_grdfloat) np2;
1147 if (*ksign >= 0) {
1148 goto L620;
1149 } else {
1150 goto L625;
1151 }
1152 L620:
1153 theta = -theta;
1154 L625:
1155 sinth = (gmt_grdfloat)sin((double)(0.5 * theta));
1156 wstpr = sinth * (gmt_grdfloat)(-2. * sinth);
1157 wstpi = (gmt_grdfloat)sin((double)theta);
1158 wr = wstpr + (gmt_grdfloat)1.0;
1159 wi = wstpi;
1160 j1min = j2 + ifp1;
1161 i__3 = j1rng;
1162 i__2 = ifp1;
1163 for (j1 = j1min; i__2 < 0 ? j1 >= i__3 : j1 <= i__3; j1 += i__2) {
1164
1165 i1max = j1 + i1rng - 2;
1166 i__6 = i1max;
1167 for (i1 = j1; i1 <= i__6; i1 += 2) {
1168 i__8 = ntot;
1169 i__7 = np2;
1170 for (i3 = i1; i__7 < 0 ? i3 >= i__8 : i3 <= i__8; i3 += i__7) {
1171 j3max = i3 + ifp2 - np1;
1172 i__9 = j3max;
1173 i__10 = np1;
1174 for (j3 = i3; i__10 < 0 ? j3 >= i__9 : j3 <= i__9; j3 += i__10) {
1175 tempr = data[j3];
1176 data[j3] = data[j3] * wr - data[j3 + 1] * wi;
1177 data[j3 + 1] = tempr * wi + data[j3 + 1] * wr;
1178 }
1179 }
1180 }
1181 tempr = wr;
1182 wr = wr * wstpr - wi * wstpi + wr;
1183 wi = tempr * wstpi + wi * wstpr + wi;
1184 }
1185 }
1186 L640:
1187 theta = -twopi / (gmt_grdfloat) ifact[if_ - 1];
1188 if (*ksign >= 0) {
1189 goto L645;
1190 } else {
1191 goto L650;
1192 }
1193 L645:
1194 theta = -theta;
1195 L650:
1196 sinth = (gmt_grdfloat)sin((double)(0.5 * theta));
1197 wstpr = sinth * (gmt_grdfloat)(-2. * sinth);
1198 wstpi = (gmt_grdfloat)sin((double)theta);
1199 kstep = (n << 1) / ifact[if_ - 1];
1200 krang = kstep * (ifact[if_ - 1] / 2) + 1;
1201 i__2 = i1rng;
1202 for (i1 = 1; i1 <= i__2; i1 += 2) {
1203 i__3 = ntot;
1204 i__4 = np2;
1205 for (i3 = i1; i__4 < 0 ? i3 >= i__3 : i3 <= i__3; i3 += i__4) {
1206 i__5 = krang;
1207 i__10 = kstep;
1208 for (kmin = 1; i__10 < 0 ? kmin >= i__5 : kmin <= i__5; kmin += i__10) {
1209 j1max = i3 + j1rng - ifp1;
1210 i__9 = j1max;
1211 i__7 = ifp1;
1212 for (j1 = i3; i__7 < 0 ? j1 >= i__9 : j1 <= i__9; j1 += i__7) {
1213 j3max = j1 + ifp2 - np1;
1214 i__8 = j3max;
1215 i__6 = np1;
1216 for (j3 = j1; i__6 < 0 ? j3 >= i__8 : j3 <= i__8; j3 += i__6) {
1217 j2max = j3 + ifp1 - ifp2;
1218 k = kmin + (j3 - j1 + (j1 - i3) / ifact[if_ - 1]) / np1hf;
1219 if (kmin - 1 <= 0) {
1220 goto L655;
1221 } else {
1222 goto L665;
1223 }
1224 L655:
1225 sumr = (gmt_grdfloat)0.0;
1226 sumi = (gmt_grdfloat)0.0;
1227 i__11 = j2max;
1228 i__12 = ifp2;
1229 for (j2 = j3; i__12 < 0 ? j2 >= i__11 : j2 <= i__11; j2 += i__12) {
1230 sumr += data[j2];
1231 sumi += data[j2 + 1];
1232 }
1233 work[k] = sumr;
1234 work[k + 1] = sumi;
1235 goto L680;
1236 L665:
1237 kconj = k + ((n - kmin + 1) << 1);
1238 j2 = j2max;
1239 sumr = data[j2];
1240 sumi = data[j2 + 1];
1241 oldsr = (gmt_grdfloat)0.0;
1242 oldsi = (gmt_grdfloat)0.0;
1243 j2 -= ifp2;
1244 L670:
1245 tempr = sumr;
1246 tempi = sumi;
1247 sumr = twowr * sumr - oldsr + data[j2];
1248 sumi = twowr * sumi - oldsi + data[j2 + 1];
1249 oldsr = tempr;
1250 oldsi = tempi;
1251 j2 -= ifp2;
1252 if (j2 - j3 <= 0) {
1253 goto L675;
1254 } else {
1255 goto L670;
1256 }
1257 L675:
1258 tempr = wr * sumr - oldsr + data[j2];
1259 tempi = wi * sumi;
1260 work[k] = tempr - tempi;
1261 work[kconj] = tempr + tempi;
1262 tempr = wr * sumi - oldsi + data[j2 + 1];
1263 tempi = wi * sumr;
1264 work[k + 1] = tempr + tempi;
1265 work[kconj + 1] = tempr - tempi;
1266 L680:
1267 ;
1268 }
1269 }
1270 if (kmin - 1 <= 0) {
1271 goto L685;
1272 } else {
1273 goto L686;
1274 }
1275 L685:
1276 wr = wstpr + (gmt_grdfloat)1.0;
1277 wi = wstpi;
1278 goto L690;
1279 L686:
1280 tempr = wr;
1281 wr = wr * wstpr - wi * wstpi + wr;
1282 wi = tempr * wstpi + wi * wstpr + wi;
1283 L690:
1284 twowr = wr + wr;
1285 }
1286 if (icase - 3 != 0) {
1287 goto L692;
1288 } else {
1289 goto L691;
1290 }
1291 L691:
1292 if (ifp1 - np2 >= 0) {
1293 goto L692;
1294 } else {
1295 goto L695;
1296 }
1297 L692:
1298 k = 1;
1299 i2max = i3 + np2 - np1;
1300 i__10 = i2max;
1301 i__5 = np1;
1302 for (i2 = i3; i__5 < 0 ? i2 >= i__10 : i2 <= i__10; i2 += i__5) {
1303 data[i2] = work[k];
1304 data[i2 + 1] = work[k + 1];
1305 k += 2;
1306 }
1307 goto L698;
1308 L695:
1309 j3max = i3 + ifp2 - np1;
1310 i__5 = j3max;
1311 i__10 = np1;
1312 for (j3 = i3; i__10 < 0 ? j3 >= i__5 : j3 <= i__5; j3 += i__10) {
1313 j2max = j3 + np2 - j2stp;
1314 i__6 = j2max;
1315 i__8 = j2stp;
1316 for (j2 = j3; i__8 < 0 ? j2 >= i__6 : j2 <= i__6; j2 += i__8) {
1317 j1max = j2 + j1rg2 - ifp2;
1318 j1cnj = j3 + j2max + j2stp - j2;
1319 i__7 = j1max;
1320 i__9 = ifp2;
1321 for (j1 = j2; i__9 < 0 ? j1 >= i__7 : j1 <= i__7; j1 += i__9) {
1322 k = j1 + 1 - i3;
1323 data[j1] = work[k];
1324 data[j1 + 1] = work[k + 1];
1325 if (j1 - j2 <= 0) {
1326 goto L697;
1327 } else {
1328 goto L696;
1329 }
1330 L696:
1331 data[j1cnj] = work[k];
1332 data[j1cnj + 1] = -work[k + 1];
1333 L697:
1334 j1cnj -= ifp2;
1335 }
1336 }
1337 }
1338 L698:
1339 ;
1340 }
1341 }
1342 ++if_;
1343 ifp1 = ifp2;
1344 if (ifp1 - np1 <= 0) {
1345 goto L700;
1346 } else {
1347 goto L610;
1348 }
1349 L700:
1350 switch ((int)icase) {
1351 case 1: goto L900;
1352 case 2: goto L800;
1353 case 3: goto L900;
1354 case 4: goto L701;
1355 }
1356 L701:
1357 nhalf = n;
1358 n += n;
1359 theta = -twopi / (gmt_grdfloat) n;
1360 if (*ksign >= 0) {
1361 goto L702;
1362 } else {
1363 goto L703;
1364 }
1365 L702:
1366 theta = -theta;
1367 L703:
1368 sinth = (gmt_grdfloat)sin((double)(0.5 * theta));
1369 wstpr = sinth * (gmt_grdfloat)(-2. * sinth);
1370 wstpi = (gmt_grdfloat)sin((double)theta);
1371 wr = wstpr + (gmt_grdfloat)1.0;
1372 wi = wstpi;
1373 imin = 3;
1374 jmin = (nhalf << 1) - 1;
1375 goto L725;
1376 L710:
1377 j = jmin;
1378 i__4 = ntot;
1379 i__3 = np2;
1380 for (i = imin; i__3 < 0 ? i >= i__4 : i <= i__4; i += i__3) {
1381 sumr = (gmt_grdfloat)0.5 * (data[i] + data[j]);
1382 sumi = (gmt_grdfloat)0.5 * (data[i + 1] + data[j + 1]);
1383 difr = (gmt_grdfloat)0.5 * (data[i] - data[j]);
1384 difi = (gmt_grdfloat)0.5 * (data[i + 1] - data[j + 1]);
1385 tempr = wr * sumi + wi * difr;
1386 tempi = wi * sumi - wr * difr;
1387 data[i] = sumr + tempr;
1388 data[i + 1] = difi + tempi;
1389 data[j] = sumr - tempr;
1390 data[j + 1] = -difi + tempi;
1391 j += np2;
1392 }
1393 imin += 2;
1394 jmin += -2;
1395 tempr = wr;
1396 wr = wr * wstpr - wi * wstpi + wr;
1397 wi = tempr * wstpi + wi * wstpr + wi;
1398 L725:
1399 if ((i__3 = imin - jmin) < 0) {
1400 goto L710;
1401 } else if (i__3 == 0) {
1402 goto L730;
1403 } else {
1404 goto L740;
1405 }
1406 L730:
1407 if (*ksign >= 0) {
1408 goto L740;
1409 } else {
1410 goto L731;
1411 }
1412 L731:
1413 i__3 = ntot;
1414 i__4 = np2;
1415 for (i = imin; i__4 < 0 ? i >= i__3 : i <= i__3; i += i__4) {
1416 data[i + 1] = -data[i + 1];
1417 }
1418 L740:
1419 np2 += np2;
1420 ntot += ntot;
1421 j = ntot + 1;
1422 imax = ntot / 2 + 1;
1423 L745:
1424 imin = imax - (nhalf << 1);
1425 i = imin;
1426 goto L755;
1427 L750:
1428 data[j] = data[i];
1429 data[j + 1] = -data[i + 1];
1430 L755:
1431 i += 2;
1432 j += -2;
1433 if (i - imax >= 0) {
1434 goto L760;
1435 } else {
1436 goto L750;
1437 }
1438 L760:
1439 data[j] = data[imin] - data[imin + 1];
1440 data[j + 1] = (gmt_grdfloat)0.0;
1441 if (i - j >= 0) {
1442 goto L780;
1443 } else {
1444 goto L770;
1445 }
1446 L765:
1447 data[j] = data[i];
1448 data[j + 1] = data[i + 1];
1449 L770:
1450 i += -2;
1451 j += -2;
1452 if (i - imin <= 0) {
1453 goto L775;
1454 } else {
1455 goto L765;
1456 }
1457 L775:
1458 data[j] = data[imin] + data[imin + 1];
1459 data[j + 1] = (gmt_grdfloat)0.0;
1460 imax = imin;
1461 goto L745;
1462 L780:
1463 data[1] += data[2];
1464 data[2] = (gmt_grdfloat)0.0;
1465 goto L900;
1466 L800:
1467 if (i1rng - np1 >= 0) {
1468 goto L900;
1469 } else {
1470 goto L805;
1471 }
1472 L805:
1473 i__4 = ntot;
1474 i__3 = np2;
1475 for (i3 = 1; i__3 < 0 ? i3 >= i__4 : i3 <= i__4; i3 += i__3) {
1476 i2max = i3 + np2 - np1;
1477 i__2 = i2max;
1478 i__9 = np1;
1479 for (i2 = i3; i__9 < 0 ? i2 >= i__2 : i2 <= i__2; i2 += i__9) {
1480 imin = i2 + i1rng;
1481 imax = i2 + np1 - 2;
1482 jmax = (i3 << 1) + np1 - imin;
1483 if (i2 - i3 <= 0) {
1484 goto L820;
1485 } else {
1486 goto L810;
1487 }
1488 L810:
1489 jmax += np2;
1490 L820:
1491 if (idim - 2 <= 0) {
1492 goto L850;
1493 } else {
1494 goto L830;
1495 }
1496 L830:
1497 j = jmax + np0;
1498 i__7 = imax;
1499 for (i = imin; i <= i__7; i += 2) {
1500 data[i] = data[j];
1501 data[i + 1] = -data[j + 1];
1502 j += -2;
1503 }
1504 L850:
1505 j = jmax;
1506 i__7 = imax;
1507 i__8 = np0;
1508 for (i = imin; i__8 < 0 ? i >= i__7 : i <= i__7; i += i__8) {
1509 data[i] = data[j];
1510 data[i + 1] = -data[j + 1];
1511 j -= np0;
1512 }
1513 }
1514 }
1515 L900:
1516 np0 = np1;
1517 np1 = np2;
1518 nprev = n;
1519 }
1520 L920:
1521 return 0;
1522 } /* fourt_ */
1523
gmtfft_brenner_worksize(struct GMT_CTRL * GMT,unsigned int n_columns,unsigned int n_rows)1524 GMT_LOCAL size_t gmtfft_brenner_worksize (struct GMT_CTRL *GMT, unsigned int n_columns, unsigned int n_rows) {
1525 /* Find the size of the workspace that will be needed by the transform.
1526 * To use this routine for a 1-D transform, set n_rows = 1.
1527 *
1528 * This is all based on the comments in Norman Brenner's code
1529 * FOURT, from which our C codes are translated.
1530 *
1531 * Let m = largest prime factor in the list of factors.
1532 * Let p = product of all primes which appear an odd number of
1533 * times in the list of prime factors. Then the worksize needed
1534 * s = max(m,p). However, we know that if n is radix 2, then no
1535 * work is required; yet this formula would say we need a worksize
1536 * of at least 2. So I will return s = 0 when max(m,p) = 2.
1537 *
1538 * W. H. F. Smith, 26 February 1992.
1539 * */
1540 unsigned int f[32], n_factors, nonsymx, nonsymy, nonsym;
1541 size_t storage, ntotal;
1542
1543 /* Find workspace needed. First find non_symmetric factors in n_columns, n_rows */
1544 n_factors = gmt_get_prime_factors (GMT, n_columns, f);
1545 nonsymx = (unsigned int)gmtfft_get_non_symmetric_f (f, n_factors);
1546 n_factors = gmt_get_prime_factors (GMT, n_rows, f);
1547 nonsymy = (unsigned int)gmtfft_get_non_symmetric_f (f, n_factors);
1548 nonsym = MAX (nonsymx, nonsymy);
1549
1550 /* Now get factors of ntotal */
1551 ntotal = gmt_M_get_nm (GMT, n_columns, n_rows);
1552 n_factors = gmt_get_prime_factors (GMT, ntotal, f);
1553 storage = (n_factors > 0) ? MAX (nonsym, f[n_factors-1]) : nonsym;
1554 if (storage != 2) storage *= 2;
1555 if (storage < n_columns) storage = n_columns;
1556 if (storage < n_rows) storage = n_rows;
1557 return (2 * storage);
1558 }
1559
1560 /* C-callable wrapper for gmtfft_brenner_fourt_f */
gmtfft_1d_brenner(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n,int direction,unsigned int mode)1561 GMT_LOCAL int gmtfft_1d_brenner (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n, int direction, unsigned int mode) {
1562 /* void GMT_fourt (struct GMT_CTRL *GMT, gmt_grdfloat *data, int *nn, int ndim, int ksign, int iform, gmt_grdfloat *work) */
1563 /* Data array */
1564 /* Dimension array */
1565 /* Number of dimensions */
1566 /* Forward(-1) or Inverse(+1) */
1567 /* Real(0) or complex(1) data */
1568 /* Work array */
1569
1570 int ksign, ndim = 1, n_signed = n, kmode = mode;
1571 size_t work_size = 0;
1572 gmt_grdfloat *work = NULL;
1573
1574 ksign = (direction == GMT_FFT_INV) ? +1 : -1;
1575 if ((work_size = gmtfft_brenner_worksize (GMT, n, 1))) work = gmt_M_memory (GMT, NULL, work_size, gmt_grdfloat);
1576 (void) gmtfft_brenner_fourt_f (data, &n_signed, &ndim, &ksign, &kmode, work);
1577 gmt_M_free (GMT, work);
1578 return (GMT_OK);
1579 }
1580
gmtfft_2d_brenner(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n_columns,unsigned int n_rows,int direction,unsigned int mode)1581 GMT_LOCAL int gmtfft_2d_brenner (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n_columns, unsigned int n_rows, int direction, unsigned int mode) {
1582 /* Data array */
1583 /* Dimension array */
1584 /* Number of dimensions */
1585 /* Forward(-1) or Inverse(+1) */
1586 /* Real(0) or complex(1) data */
1587 /* Work array */
1588
1589 int ksign, ndim = 2, nn[2] = {n_columns, n_rows}, kmode = mode;
1590 size_t work_size = 0;
1591 gmt_grdfloat *work = NULL;
1592
1593 ksign = (direction == GMT_FFT_INV) ? +1 : -1;
1594 if ((work_size = gmtfft_brenner_worksize (GMT, n_columns, n_rows))) work = gmt_M_memory (GMT, NULL, work_size, gmt_grdfloat);
1595 GMT_Report (GMT->parent, GMT_MSG_DEBUG, "Brenner_fourt_ work size = %" PRIuS "\n", work_size);
1596 (void) gmtfft_brenner_fourt_f (data, nn, &ndim, &ksign, &kmode, work);
1597 gmt_M_free (GMT, work);
1598 return (GMT_OK);
1599 }
1600
gmtfft_1d_selection(struct GMT_CTRL * GMT,uint64_t n)1601 GMT_LOCAL int gmtfft_1d_selection (struct GMT_CTRL *GMT, uint64_t n) {
1602 /* Returns the most suitable 1-D FFT for the job - or the one requested via GMT_FFT */
1603 if (GMT->current.setting.fft != k_fft_auto) {
1604 /* Specific selection requested */
1605 if (GMT->session.fft1d[GMT->current.setting.fft])
1606 return GMT->current.setting.fft; /* User defined FFT */
1607 GMT_Report (GMT->parent, GMT_MSG_WARNING, "Desired FFT Algorithm (%s) not configured - choosing suitable alternative.\n", GMT_fft_algo[GMT->current.setting.fft]);
1608 }
1609 /* Here we want automatic selection from available candidates */
1610 if (GMT->session.fft1d[k_fft_accelerate] && gmtfft_radix2 (n))
1611 return k_fft_accelerate; /* Use if Radix-2 under OS/X */
1612 if (GMT->session.fft1d[k_fft_fftw])
1613 return k_fft_fftw;
1614 return k_fft_kiss; /* Default/fallback general-purpose FFT */
1615 }
1616
gmtfft_2d_selection(struct GMT_CTRL * GMT,unsigned int n_columns,unsigned int n_rows)1617 GMT_LOCAL int gmtfft_2d_selection (struct GMT_CTRL *GMT, unsigned int n_columns, unsigned int n_rows) {
1618 /* Returns the most suitable 2-D FFT for the job - or the one requested via GMT_FFT */
1619 if (GMT->current.setting.fft != k_fft_auto) {
1620 /* Specific selection requested */
1621 if (GMT->session.fft2d[GMT->current.setting.fft])
1622 return GMT->current.setting.fft; /* User defined FFT */
1623 GMT_Report (GMT->parent, GMT_MSG_WARNING, "Desired FFT Algorithm (%s) not configured - choosing suitable alternative.\n", GMT_fft_algo[GMT->current.setting.fft]);
1624 }
1625 /* Here we want automatic selection from available candidates */
1626 if (GMT->session.fft2d[k_fft_accelerate] && gmtfft_radix2 (n_columns) && gmtfft_radix2 (n_rows))
1627 return k_fft_accelerate; /* Use if Radix-2 under OS/X */
1628 if (GMT->session.fft2d[k_fft_fftw])
1629 return k_fft_fftw;
1630 return k_fft_kiss; /* Default/fallback general-purpose FFT */
1631 }
1632
GMT_FFT_1D(void * V_API,gmt_grdfloat * data,uint64_t n,int direction,unsigned int mode)1633 int GMT_FFT_1D (void *V_API, gmt_grdfloat *data, uint64_t n, int direction, unsigned int mode) {
1634 /* data is an array of length n (or 2*n for complex) data points
1635 * n is the number of data points
1636 * direction is either GMT_FFT_FWD (forward) or GMT_FFT_INV (inverse)
1637 * mode is either GMT_FFT_REAL or GMT_FFT_COMPLEX
1638 */
1639 int status, use;
1640 struct GMTAPI_CTRL *API = gmtfft_get_api_ptr (V_API);
1641 struct GMT_CTRL *GMT = API->GMT;
1642 assert (mode == GMT_FFT_COMPLEX); /* GMT_FFT_REAL not implemented yet */
1643 use = gmtfft_1d_selection (GMT, n);
1644 GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "1-D FFT using %s\n", GMT_fft_algo[use]);
1645 status = GMT->session.fft1d[use] (GMT, data, (unsigned int)n, direction, mode);
1646 if (direction == GMT_FFT_INV) { /* Undo the 2/nm factor */
1647 uint64_t nm = 2ULL * n;
1648 gmt_scale_and_offset_f (GMT, data, nm, 2.0 / nm, 0);
1649 }
1650 return status;
1651 }
1652
GMT_FFT_2D(void * V_API,gmt_grdfloat * data,unsigned int n_columns,unsigned int n_rows,int direction,unsigned int mode)1653 int GMT_FFT_2D (void *V_API, gmt_grdfloat *data, unsigned int n_columns, unsigned int n_rows, int direction, unsigned int mode) {
1654 /* data is an array of length n_columns*n_rows (or 2*n_columns*n_rows for complex) data points
1655 * n_columns, n_rows is the number of data nodes
1656 * direction is either GMT_FFT_FWD (forward) or GMT_FFT_INV (inverse)
1657 * mode is either GMT_FFT_REAL or GMT_FFT_COMPLEX
1658 */
1659 int status, use;
1660 struct GMTAPI_CTRL *API = gmtfft_get_api_ptr (V_API);
1661 struct GMT_CTRL *GMT = API->GMT;
1662 assert (mode == GMT_FFT_COMPLEX); /* GMT_FFT_REAL not implemented yet */
1663 use = gmtfft_2d_selection (GMT, n_columns, n_rows);
1664
1665 GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "2-D FFT using %s\n", GMT_fft_algo[use]);
1666 status = GMT->session.fft2d[use] (GMT, data, n_columns, n_rows, direction, mode);
1667 if (direction == GMT_FFT_INV) { /* Undo the 2/nm factor */
1668 uint64_t nm = 2ULL * n_columns * n_rows;
1669 gmt_scale_and_offset_f (GMT, data, nm, 2.0 / nm, 0);
1670 }
1671 return status;
1672 }
1673
gmtlib_fft_initialization(struct GMT_CTRL * GMT)1674 void gmtlib_fft_initialization (struct GMT_CTRL *GMT) {
1675 /* Called by gmt_begin and sets up pointers to the available FFT calls */
1676 #if defined HAVE_FFTW3F_THREADS
1677 int n_cpu = gmtlib_get_num_processors();
1678 #endif /* HAVE_FFTW3_THREADS */
1679
1680 #ifdef HAVE_FFTW3F
1681 GMT->current.setting.fftw_plan = FFTW_ESTIMATE; /* default planner flag [only accessed if FFTW is compiled in] */
1682 #endif /* HAVE_FFTW3F */
1683
1684 #if defined HAVE_FFTW3F_THREADS
1685 if (n_cpu > 1 && !GMT->current.setting.fftwf_threads) {
1686 /* one-time initialization required to use FFTW3 threads */
1687 if ( fftwf_init_threads() ) {
1688 fftwf_plan_with_nthreads(n_cpu);
1689 GMT_Report (GMT->parent, GMT_MSG_DEBUG, "Initialize FFTW with %d threads.\n", n_cpu);
1690 }
1691 }
1692 #endif /* HAVE_FFTW3_THREADS */
1693
1694 /* Start with nothing */
1695 memset (GMT->session.fft1d, k_n_fft_algorithms, sizeof(void*));
1696 memset (GMT->session.fft2d, k_n_fft_algorithms, sizeof(void*));
1697
1698 #ifdef __APPLE__
1699 /* OS X Accelerate Framework */
1700 GMT->session.fft1d[k_fft_accelerate] = &gmtfft_1d_vDSP;
1701 GMT->session.fft2d[k_fft_accelerate] = &gmtfft_2d_vDSP;
1702 #endif
1703 #ifdef HAVE_FFTW3F
1704 /* single precision FFTW3 */
1705 GMT->session.fft1d[k_fft_fftw] = &gmtfft_1d_fftwf;
1706 GMT->session.fft2d[k_fft_fftw] = &gmtfft_2d_fftwf;
1707 #endif /* HAVE_FFTW3F */
1708 /* Kiss FFT is the integrated fallback */
1709 GMT->session.fft1d[k_fft_kiss] = &gmtfft_1d_kiss;
1710 GMT->session.fft2d[k_fft_kiss] = &gmtfft_2d_kiss;
1711 /* Brenner FFT is the legacy fallback */
1712 GMT->session.fft1d[k_fft_brenner] = &gmtfft_1d_brenner;
1713 GMT->session.fft2d[k_fft_brenner] = &gmtfft_2d_brenner;
1714 }
1715
gmtlib_fft_cleanup(struct GMT_CTRL * GMT)1716 void gmtlib_fft_cleanup (struct GMT_CTRL *GMT) {
1717 /* Called by gmt_end */
1718 #ifndef __APPLE__
1719 gmt_M_unused(GMT);
1720 #endif
1721 #if defined HAVE_FFTW3F_THREADS
1722 fftwf_cleanup_threads(); /* clean resources allocated internally by FFTW */
1723 #endif
1724 #ifdef __APPLE__ /* Accelerate framework */
1725 gmtfft_gmtfft_1d_vDSP_reset (&GMT->current.fft);
1726 gmtfft_gmtfft_2d_vDSP_reset (&GMT->current.fft);
1727 #endif
1728 }
1729