1 /*--------------------------------------------------------------------
2  *
3  *	Copyright (c) 1991-2021 by the GMT Team (https://www.generic-mapping-tools.org/team.html)
4  *	See LICENSE.TXT file for copying and redistribution conditions.
5  *
6  *	This program is free software; you can redistribute it and/or modify
7  *	it under the terms of the GNU Lesser General Public License as published by
8  *	the Free Software Foundation; version 3 or any later version.
9  *
10  *	This program is distributed in the hope that it will be useful,
11  *	but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *	GNU Lesser General Public License for more details.
14  *
15  *	Contact info: www.generic-mapping-tools.org
16  *--------------------------------------------------------------------*/
17 /*
18  * Here are various ways to perform 1-D and 2-D Fourier transforms.
19  * Most of these were provided by other people, as indicated below.
20  *
21  *
22  * Author:  Paul Wessel
23  * Date:    1-APR-2011
24  * Version: 5
25  *
26  * Major overhaul, Florian Wobbe, 2012-07-09:
27  *  Added free Kiss FFT (kissfft) to GMT code base.
28  *  Superseded Norman Brenner's ancient Cooley-Tukey FFT implementation
29  *    Kiss FFT. Brenner still available as a legacy/compativility choice.
30  *  Support for Paul Swarztrauber's ancient FFTPACK and for Sun Performance
31  *    Library (perflib) have been removed too because they are not maintained
32  *    anymore.
33  *
34  *  FFTW    : FFTW library (supplied externally)
35  *  vDSP    : OSX Accelerate Framework (OSX only)
36  *  Kiss FFT: Free FFT, based on the principle "Keep It Simple, Stupid"
37  *  Configure the implementation with gmtset GMT_FFT.
38  *
39  *--------------------------------------------------------------------------
40  * Public functions declared in gmt_dev.h ():
41  *
42  *
43  */
44 
45 #include "gmt_dev.h"
46 #include "gmt_internals.h"
47 
48 static char *GMT_fft_algo[] = {
49 	"Auto-Select",
50 	"Accelerate Framework",
51 	"FFTW",
52 	"Kiss FFT",
53 	"Brenner FFT [Legacy]"
54 };
55 
56 /* Functions that fascilitating setting up FFT operations, like determining optimal dimension,
57  * setting wavenumber functions, apply interior or exterior taper to grids, save pre-fft grid
58  * to file, save fft grid (real and imag or mag,hypot) to two files.  Programs that wish to
59  * operate on data in the wavenumber domain will need to use some of these functions; see
60  * grdfft.c and potential/gravfft.c as examples.
61  */
62 
gmtfft_propose_radix2(uint64_t n)63 static inline uint64_t gmtfft_propose_radix2 (uint64_t n) {
64 	/* Returns the smallest base 2 exponent, log2n, that satisfies: 2^log2n >= n */
65 	uint64_t log2n = 1;
66 	while ( 1ULL<<log2n < n ) ++log2n; /* log2n = 1<<(unsigned)ceil(log2(n)); */
67 	return log2n;
68 }
69 
gmtfft_radix2(uint64_t n)70 static inline uint64_t gmtfft_radix2 (uint64_t n) {
71 	/* Returns the base 2 exponent that represents 'n' if 'n' is a power of 2,
72 	 * 0 otherwise */
73 	uint64_t log2n = 1ULL;
74 	while ( 1ULL<<log2n < n ) ++log2n; /* log2n = 1<<(unsigned)ceil(log2(n)); */
75 	if (n == 1ULL<<log2n)
76 		return log2n;
77 	return 0ULL;
78 }
79 
gmtfft_get_fftwave_ptr(struct GMT_FFT_WAVENUMBER * ptr)80 static inline struct GMT_FFT_WAVENUMBER * gmtfft_get_fftwave_ptr (struct GMT_FFT_WAVENUMBER *ptr) {return (ptr);}
gmtfft_get_api_ptr(struct GMTAPI_CTRL * ptr)81 static inline struct GMTAPI_CTRL * gmtfft_get_api_ptr (struct GMTAPI_CTRL *ptr)  {return (ptr);}
82 
gmtfft_get_non_symmetric_f(unsigned int * f,unsigned int n)83 GMT_LOCAL uint64_t gmtfft_get_non_symmetric_f (unsigned int *f, unsigned int n) {
84 	/* Return the product of the non-symmetric factors in f[]  */
85 	unsigned int i = 0, j = 1, retval = 1;
86 
87 	if (n == 1) return (f[0]);
88 
89 	while (i < n) {
90 		while (j < n && f[j] == f[i]) j++;
91 		if ((j-i)%2) retval *= f[i];
92 		i = j;
93 		j = i + 1;
94 	}
95 	if (retval == 1) retval = 0;	/* There are no non-sym factors  */
96 	return (retval);
97 }
98 
99 #ifndef FSIGNIF
100 #define FSIGNIF			24
101 #endif
102 
gmtlib_fourt_stats(struct GMT_CTRL * GMT,unsigned int n_columns,unsigned int n_rows,unsigned int * f,double * r,size_t * s,double * t)103 void gmtlib_fourt_stats (struct GMT_CTRL *GMT, unsigned int n_columns, unsigned int n_rows, unsigned int *f, double *r, size_t *s, double *t) {
104 	/* Find the proportional run time, t, and rms relative error, r,
105 	 * of a Fourier transform of size n_columns,n_rows.  Also gives s, the size
106 	 * of the workspace that will be needed by the transform.
107 	 * To use this routine for a 1-D transform, set n_rows = 1.
108 	 *
109 	 * This is all based on the comments in Norman Brenner's code
110 	 * FOURT, from which our C codes are translated.
111 	 * Brenner says:
112 	 * r = 3 * pow(2, -FSIGNIF) * sum{ pow(prime_factors, 1.5) }
113 	 * where FSIGNIF is the smallest bit in the floating point fraction.
114 	 *
115 	 * Let m = largest prime factor in the list of factors.
116 	 * Let p = product of all primes which appear an odd number of
117 	 * times in the list of prime factors.  Then the worksize needed
118 	 * s = max(m,p).  However, we know that if n is radix 2, then no
119 	 * work is required; yet this formula would say we need a worksize
120 	 * of at least 2.  So I will return s = 0 when max(m,p) = 2.
121 	 *
122 	 * I have two different versions of the comments in FOURT, with
123 	 * different formulae for t.  The simple formula says
124 	 * 	t = n * (sum of prime factors of n).
125 	 * The more complicated formula gives coefficients in microsecs
126 	 * on a cdc3300 (ancient history, but perhaps proportional):
127 	 *	t = 3000 + n*(500 + 43*s2 + 68*sf + 320*nf),
128 	 * where s2 is the sum of all factors of 2, sf is the sum of all
129 	 * factors greater than 2, and nf is the number of factors != 2.
130 	 * We know that factors of 2 are very good to have, and indeed,
131 	 * Brenner's code calls different routines depending on whether
132 	 * the transform is of size 2 or not, so I think that the second
133 	 * formula is more correct, using proportions of 43:68 for 2 and
134 	 * non-2 factors.  So I will use the more complicated formula.
135 	 * However, I realize that the actual numbers are wrong for today's
136 	 * architectures, and the relative proportions may be wrong as well.
137 	 *
138 	 * W. H. F. Smith, 26 February 1992.
139 	 *  */
140 
141 	unsigned int n_factors, i, sum2, sumnot2, nnot2;
142 	uint64_t nonsymx, nonsymy, nonsym, storage, ntotal;
143 	double err_scale;
144 
145 	/* Find workspace needed.  First find non_symmetric factors in n_columns, n_rows  */
146 	n_factors = gmt_get_prime_factors (GMT, n_columns, f);
147 	nonsymx = gmtfft_get_non_symmetric_f (f, n_factors);
148 	n_factors = gmt_get_prime_factors (GMT, n_rows, f);
149 	nonsymy = gmtfft_get_non_symmetric_f (f, n_factors);
150 	nonsym = MAX (nonsymx, nonsymy);
151 
152 	/* Now get factors of ntotal  */
153 	ntotal = gmt_M_get_nm (GMT, n_columns, n_rows);
154 	n_factors = gmt_get_prime_factors (GMT, ntotal, f);
155 	storage = MAX (nonsym, f[n_factors-1]);
156 	*s = (storage == 2) ? 0 : storage;
157 
158 	/* Now find time and error estimates */
159 
160 	err_scale = 0.0;
161 	sum2 = sumnot2 = nnot2 = 0;
162 	for(i = 0; i < n_factors; i++) {
163 		if (f[i] == 2)
164 			sum2 += f[i];
165 		else {
166 			sumnot2 += f[i];
167 			nnot2++;
168 		}
169 		err_scale += pow ((double)f[i], 1.5);
170 	}
171 	*t = 1.0e-06 * (3000.0 + ntotal * (500.0 + 43.0 * sum2 + 68.0 * sumnot2 + 320.0 * nnot2));
172 	*r = err_scale * 3.0 * pow (2.0, -FSIGNIF);
173 
174 	return;
175 }
176 
gmtlib_suggest_fft_dim(struct GMT_CTRL * GMT,unsigned int n_columns,unsigned int n_rows,struct GMT_FFT_SUGGESTION * fft_sug,bool do_print)177 void gmtlib_suggest_fft_dim (struct GMT_CTRL *GMT, unsigned int n_columns, unsigned int n_rows, struct GMT_FFT_SUGGESTION *fft_sug, bool do_print) {
178 	unsigned int f[32], xstop, ystop;
179 	unsigned int nx_best_t, ny_best_t;
180 	unsigned int nx_best_e, ny_best_e;
181 	unsigned int nx_best_s, ny_best_s;
182 	unsigned int nxg, nyg;       /* Guessed by this routine  */
183 	unsigned int nx2, ny2, nx3, ny3, nx5, ny5;   /* For powers  */
184 	size_t current_space, best_space, e_space, t_space, given_space;
185 	double current_time, best_time, given_time, s_time, e_time;
186 	double current_err, best_err, given_err, s_err, t_err;
187 
188 	gmtlib_fourt_stats (GMT, n_columns, n_rows, f, &given_err, &given_space, &given_time);
189 	given_space += n_columns * n_rows;
190 	given_space *= 8;
191 	if (do_print)
192 		GMT_Report (GMT->parent, GMT_MSG_INFORMATION, " Data dimension\t%d %d\ttime factor %.8g\trms error %.8e\tbytes %" PRIuS "\n", n_columns, n_rows, given_time, given_err, given_space);
193 
194 	best_err = s_err = t_err = given_err;
195 	best_time = s_time = e_time = given_time;
196 	best_space = t_space = e_space = given_space;
197 	nx_best_e = nx_best_t = nx_best_s = n_columns;
198 	ny_best_e = ny_best_t = ny_best_s = n_rows;
199 
200 	xstop = 2 * n_columns;
201 	ystop = 2 * n_rows;
202 
203 	for (nx2 = 2; nx2 <= xstop; nx2 *= 2) {
204 	  	for (nx3 = 1; nx3 <= xstop; nx3 *= 3) {
205 		    for (nx5 = 1; nx5 <= xstop; nx5 *= 5) {
206 		        nxg = nx2 * nx3 * nx5;
207 		        if (nxg < n_columns || nxg > xstop) continue;
208 
209 		        for (ny2 = 2; ny2 <= ystop; ny2 *= 2) {
210 		          for (ny3 = 1; ny3 <= ystop; ny3 *= 3) {
211 		            for (ny5 = 1; ny5 <= ystop; ny5 *= 5) {
212 		                nyg = ny2 * ny3 * ny5;
213 		                if (nyg < n_rows || nyg > ystop) continue;
214 
215 			gmtlib_fourt_stats (GMT, nxg, nyg, f, &current_err, &current_space, &current_time);
216 			current_space += nxg*nyg;
217 			current_space *= 8;
218 			if (current_err < best_err) {
219 				best_err = current_err;
220 				nx_best_e = nxg;
221 				ny_best_e = nyg;
222 				e_time = current_time;
223 				e_space = current_space;
224 			}
225 			if (current_time < best_time) {
226 				best_time = current_time;
227 				nx_best_t = nxg;
228 				ny_best_t = nyg;
229 				t_err = current_err;
230 				t_space = current_space;
231 			}
232 			if (current_space < best_space) {
233 				best_space = current_space;
234 				nx_best_s = nxg;
235 				ny_best_s = nyg;
236 				s_time = current_time;
237 				s_err = current_err;
238 			}
239 
240 		    }
241 		  }
242 		}
243 
244 	    }
245 	  }
246 	}
247 
248 	if (do_print) {
249 		GMT_Report (GMT->parent, GMT_MSG_INFORMATION, " Highest speed\t%d %d\ttime factor %.8g\trms error %.8e\tbytes %" PRIuS "\n",
250 			nx_best_t, ny_best_t, best_time, t_err, t_space);
251 		GMT_Report (GMT->parent, GMT_MSG_INFORMATION, " Most accurate\t%d %d\ttime factor %.8g\trms error %.8e\tbytes %" PRIuS "\n",
252 			nx_best_e, ny_best_e, e_time, best_err, e_space);
253 		GMT_Report (GMT->parent, GMT_MSG_INFORMATION, " Least storage\t%d %d\ttime factor %.8g\trms error %.8e\tbytes %" PRIuS "\n",
254 			nx_best_s, ny_best_s, s_time, s_err, best_space);
255 	}
256 	/* Fastest solution */
257 	fft_sug[GMT_FFT_FAST].n_columns = nx_best_t;
258 	fft_sug[GMT_FFT_FAST].n_rows = ny_best_t;
259 	fft_sug[GMT_FFT_FAST].worksize = (t_space/8) - (nx_best_t * ny_best_t);
260 	fft_sug[GMT_FFT_FAST].totalbytes = t_space;
261 	fft_sug[GMT_FFT_FAST].run_time = best_time;
262 	fft_sug[GMT_FFT_FAST].rms_rel_err = t_err;
263 	/* Most accurate solution */
264 	fft_sug[GMT_FFT_ACCURATE].n_columns = nx_best_e;
265 	fft_sug[GMT_FFT_ACCURATE].n_rows = ny_best_e;
266 	fft_sug[GMT_FFT_ACCURATE].worksize = (e_space/8) - (nx_best_e * ny_best_e);
267 	fft_sug[GMT_FFT_ACCURATE].totalbytes = e_space;
268 	fft_sug[GMT_FFT_ACCURATE].run_time = e_time;
269 	fft_sug[GMT_FFT_ACCURATE].rms_rel_err = best_err;
270 	/* Least storage solution */
271 	fft_sug[GMT_FFT_STORAGE].n_columns = nx_best_s;
272 	fft_sug[GMT_FFT_STORAGE].n_rows = ny_best_s;
273 	fft_sug[GMT_FFT_STORAGE].worksize = (best_space/8) - (nx_best_s * ny_best_s);
274 	fft_sug[GMT_FFT_STORAGE].totalbytes = best_space;
275 	fft_sug[GMT_FFT_STORAGE].run_time = s_time;
276 	fft_sug[GMT_FFT_STORAGE].rms_rel_err = s_err;
277 
278 	return;
279 }
280 
gmtfft_kx(uint64_t k,struct GMT_FFT_WAVENUMBER * K)281 GMT_LOCAL double gmtfft_kx (uint64_t k, struct GMT_FFT_WAVENUMBER *K) {
282 	/* Return the value of kx given k,
283 	 * where kx = 2 pi / lambda x,
284 	 * and k refers to the position
285 	 * in the complex data array Grid->data[k].  */
286 
287 	int64_t ii = (k/2)%(K->nx2);
288 	if (ii > (K->nx2)/2) ii -= (K->nx2);
289 	return (ii * K->delta_kx);
290 }
291 
gmtfft_ky(uint64_t k,struct GMT_FFT_WAVENUMBER * K)292 GMT_LOCAL double gmtfft_ky (uint64_t k, struct GMT_FFT_WAVENUMBER *K) {
293 	/* Return the value of ky given k,
294 	 * where ky = 2 pi / lambda y,
295 	 * and k refers to the position
296 	 * in the complex data array Grid->data[k].  */
297 
298 	int64_t jj = (k/2)/(K->nx2);
299 	if (jj > (K->ny2)/2) jj -= (K->ny2);
300 	return (jj * K->delta_ky);
301 }
302 
gmtfft_kr(uint64_t k,struct GMT_FFT_WAVENUMBER * K)303 GMT_LOCAL double gmtfft_kr (uint64_t k, struct GMT_FFT_WAVENUMBER *K) {
304 	/* Return the value of sqrt(kx*kx + ky*ky),
305 	 * where k refers to the position
306 	 * in the complex data array Grid->data[k].  */
307 
308 	return (hypot (gmtfft_kx (k, K), gmtfft_ky (k, K)));
309 }
310 
gmt_fft_get_wave(uint64_t k,struct GMT_FFT_WAVENUMBER * K)311 double gmt_fft_get_wave (uint64_t k, struct GMT_FFT_WAVENUMBER *K) {
312 	/* Return the value of kx, ky. or kr,
313 	 * where k refers to the position
314 	 * in the complex data array Grid->data[k].
315 	 * GMT_fft_init sets the pointer */
316 
317 	return (K->k_ptr (k, K));
318 }
319 
gmt_fft_any_wave(uint64_t k,unsigned int mode,struct GMT_FFT_WAVENUMBER * K)320 double gmt_fft_any_wave (uint64_t k, unsigned int mode, struct GMT_FFT_WAVENUMBER *K) {
321 	/* Lets you specify which wavenumber you want */
322 	double wave = 0.0;
323 
324 	switch (mode) {	/* Select which wavenumber we need */
325 		case GMT_FFT_K_IS_KX: wave = gmtfft_kx (k, K); break;
326 		case GMT_FFT_K_IS_KY: wave = gmtfft_ky (k, K); break;
327 		case GMT_FFT_K_IS_KR: wave = gmtfft_kr (k, K); break;
328 	}
329 	return (wave);
330 }
331 
gmt_fft_set_wave(struct GMT_CTRL * GMT,unsigned int mode,struct GMT_FFT_WAVENUMBER * K)332 int gmt_fft_set_wave (struct GMT_CTRL *GMT, unsigned int mode, struct GMT_FFT_WAVENUMBER *K) {
333 	/* Change wavenumber selection */
334 	switch (mode) {	/* Select which wavenumber we need */
335 		case GMT_FFT_K_IS_KX: K->k_ptr = gmtfft_kx; break;
336 		case GMT_FFT_K_IS_KY: K->k_ptr = gmtfft_ky; break;
337 		case GMT_FFT_K_IS_KR: K->k_ptr = gmtfft_kr; break;
338 		default:
339 			GMT_Report (GMT->parent, GMT_MSG_ERROR, "Bad mode passed to gmt_fft_set_wave (%u)!\n", mode);
340 			return GMT_RUNTIME_ERROR;
341 			break;
342 	}
343 	return GMT_OK;
344 }
345 
346 /*! . */
GMT_FFT_Wavenumber(void * V_API,uint64_t k,unsigned int mode,void * v_K)347 double GMT_FFT_Wavenumber (void *V_API, uint64_t k, unsigned int mode, void *v_K) {
348 	/* Lets you specify which 1-D or 2-D wavenumber you want */
349 	struct GMT_FFT_WAVENUMBER *K = gmtfft_get_fftwave_ptr (v_K);
350 	gmt_M_unused(V_API);
351 	if (K->dim == 2) return (gmt_fft_any_wave (k, mode, K));
352 	else return (gmtfft_kx (k, K));
353 }
354 
355 #ifdef HAVE_FFTW3F
356 
357 #include <fftw3.h>
358 #ifdef _WIN32
359 #include <winsock.h>
360 #endif
361 
362 #define FFTWF_WISDOM_FILENAME "fftwf_wisdom"
363 
gmtfft_fftwf_wisdom_filename(struct GMT_CTRL * GMT)364 GMT_LOCAL char *gmtfft_fftwf_wisdom_filename (struct GMT_CTRL *GMT) {
365 	static char wisdom_file[PATH_MAX+256] = "\0";
366 	char hostname[257];
367 	if (*wisdom_file == '\0') { /* wisdom_file has not been set yet */
368 		if (GMT->session.CACHEDIR == NULL || access (GMT->session.CACHEDIR, R_OK|W_OK|X_OK))
369 			/* CACHEDIR does not exist, or not writable */
370 			return NULL;
371 		else {
372 			/* create wisdom file in CACHEDIR */
373 			strncpy (wisdom_file, GMT->session.CACHEDIR, PATH_MAX);
374 			strcat (wisdom_file, "/" FFTWF_WISDOM_FILENAME "_");
375 			/* cat hostname */
376 			memset (hostname, '\0', 257); /* in case gethostname does not null-terminate string */
377 			gethostname (hostname, 256);
378 			strcat (wisdom_file, hostname);
379 		}
380 	}
381 	return wisdom_file;
382 }
383 
384 /* Wrapper around fftwf_import_wisdom_from_filename */
gmtfft_fftwf_import_wisdom_from_filename(struct GMT_CTRL * GMT)385 GMT_LOCAL void gmtfft_fftwf_import_wisdom_from_filename (struct GMT_CTRL *GMT) {
386 	static bool already_imported = false;
387 	char *filenames[3], **filename = filenames;
388 	int status;
389 
390 	if (already_imported) /* nothing to do */
391 		return;
392 
393 	fftwf_import_system_wisdom (); /* read wisdom from implementation-defined standard file */
394 
395 	/* Initialize filenames */
396 	filenames[0] = FFTWF_WISDOM_FILENAME; /* 1st try importing wisdom from file in current dir */
397 	filenames[1] = gmtfft_fftwf_wisdom_filename(GMT); /* 2nd try wisdom file in CACHEDIR */
398 	filenames[2] = NULL; /* end of array */
399 
400 	while (*filename != NULL) {
401 		if (!access (*filename, R_OK)) {
402 			status = fftwf_import_wisdom_from_filename (*filename);
403 			if (status)
404 				GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "Imported FFTW Wisdom from file: %s\n", *filename);
405 			else
406 				GMT_Report (GMT->parent, GMT_MSG_ERROR, "Importing FFTW Wisdom from file failed: %s\n", *filename);
407 		}
408 		++filename; /* advance to next file in array */
409 	}
410 
411 	already_imported = true;
412 }
413 
414 /* Wrapper around fftwf_export_wisdom_to_filename */
gmtfft_fftwf_export_wisdom_to_filename(struct GMT_CTRL * GMT)415 GMT_LOCAL void gmtfft_fftwf_export_wisdom_to_filename (struct GMT_CTRL *GMT) {
416 	char *filename = gmtfft_fftwf_wisdom_filename(GMT);
417 	int status;
418 
419 	if (filename == NULL)
420 		/* CACHEDIR does not exist, write wisdom to file in current directory */
421 		filename = FFTWF_WISDOM_FILENAME;
422 
423 	status = fftwf_export_wisdom_to_filename (filename);
424 	if (status)
425 		GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "Exported FFTW Wisdom to file: %s\n", filename);
426 	else
427 		GMT_Report (GMT->parent, GMT_MSG_ERROR, "Exporting FFTW Wisdom to file failed: %s\n", filename);
428 }
429 
gmtfft_gmt_fftwf_plan_dft(struct GMT_CTRL * GMT,unsigned n_rows,unsigned n_columns,fftwf_complex * data,int direction)430 GMT_LOCAL fftwf_plan gmtfft_gmt_fftwf_plan_dft(struct GMT_CTRL *GMT, unsigned n_rows, unsigned n_columns, fftwf_complex *data, int direction) {
431 	/* The first two arguments, n0 and n1, are the size of the two-dimensional
432 	 * transform you are trying to compute. The size n can be any positive
433 	 * integer, but sizes that are products of small factors are transformed
434 	 * most efficiently.
435 	 *
436 	 * The next two arguments are pointers to the input and output arrays of the
437 	 * transform. These pointers can be equal, indicating an in-place transform.
438 	 *
439 	 * The fourth argument, sign, can be either FFTW_FORWARD (-1) or
440 	 * FFTW_BACKWARD (+1), and indicates the direction of the transform you are
441 	 * interested in; technically, it is the sign of the exponent in the
442 	 * transform.
443 	 *
444 	 * The flags argument is usually either FFTW_MEASURE or FFTW_ESTIMATE.
445 	 * FFTW_MEASURE instructs FFTW to run and measure the execution time of
446 	 * several FFTs in order to find the best way to compute the transform of
447 	 * size n. This process takes some time (usually a few seconds), depending
448 	 * on your machine and on the size of the transform.
449 	 *
450 	 * FFTW planner flags supported by the planner routines in FFTW
451 	 * FFTW_ESTIMATE:   pick a (probably sub-optimal) plan quickly
452 	 * FFTW_MEASURE:    find optimal plan by computing several FFTs and measuring
453 	 *                  their execution time
454 	 * FFTW_PATIENT:    like FFTW_MEASURE, but considers a wider range of algorithms
455 	 * FFTW_EXHAUSTIVE: like FFTW_PATIENT, but considers an even wider range of
456 	 *                  algorithms
457 	 *
458 	 * Important: the planner overwrites the input array during planning unless
459 	 * a saved plan (see Wisdom) is available for that problem, so you should
460 	 * initialize your input data after creating the plan. The only exceptions
461 	 * to this are the FFTW_ESTIMATE and FFTW_WISDOM_ONLY flags. */
462 
463 	int sign;
464 	fftwf_complex *cin, *cout;
465 	fftwf_plan plan = NULL;
466 
467 	sign = direction == GMT_FFT_FWD ? FFTW_FORWARD : FFTW_BACKWARD;
468 	cin  = data;
469 	cout = cin; /* in-place transform */
470 
471 	if (GMT->current.setting.fftw_plan != FFTW_ESTIMATE) {
472 		gmtfft_fftwf_import_wisdom_from_filename (GMT);
473 		if (n_rows == 0) /* 1d DFT */
474 			plan = fftwf_plan_dft_1d(n_columns, cin, cout, sign, FFTW_WISDOM_ONLY | GMT->current.setting.fftw_plan);
475 		else /* 2d DFT */
476 			plan = fftwf_plan_dft_2d(n_rows, n_columns, cin, cout, sign, FFTW_WISDOM_ONLY | GMT->current.setting.fftw_plan);
477 		if (plan == NULL) {
478 			/* No Wisdom available
479 			 * Need extra memory to prevent overwriting data while planning */
480 			fftwf_complex *in_place_tmp = fftwf_malloc (2 * (n_rows == 0 ? 1 : n_rows) * n_columns * sizeof(gmt_grdfloat));
481 			GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "Generating new FFTW Wisdom, be patient...\n");
482 			if (n_rows == 0) /* 1d DFT */
483 				plan = fftwf_plan_dft_1d(n_columns, in_place_tmp, in_place_tmp, sign, GMT->current.setting.fftw_plan);
484 			else /* 2d DFT */
485 				plan = fftwf_plan_dft_2d(n_rows, n_columns, in_place_tmp, in_place_tmp, sign, GMT->current.setting.fftw_plan);
486 			fftwf_destroy_plan(plan); /* deallocate plan */
487 			plan = NULL;
488 			fftwf_free (in_place_tmp);
489 			/* Save new Wisdom */
490 			gmtfft_fftwf_export_wisdom_to_filename (GMT);
491 		}
492 		else
493 			GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "Using preexisting FFTW Wisdom.\n");
494 	} /* GMT->current.setting.fftw_plan != FFTW_ESTIMATE */
495 	else
496 		GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "Picking a (probably sub-optimal) FFTW plan quickly.\n");
497 
498 	if (plan == NULL) { /* If either FFTW_ESTIMATE or new Wisdom generated */
499 		if (n_rows == 0) /* 1d DFT */
500 			plan = fftwf_plan_dft_1d(n_columns, cin, cout, sign, GMT->current.setting.fftw_plan);
501 		else /* 2d DFT */
502 			plan = fftwf_plan_dft_2d(n_rows, n_columns, cin, cout, sign, GMT->current.setting.fftw_plan);
503 	}
504 
505 	if (plan == NULL) /* There was a problem creating a plan */
506 		GMT_Report (GMT->parent, GMT_MSG_ERROR, "Could not create FFTW plan!\n");
507 
508 	return plan;
509 }
510 
gmtfft_1d_fftwf(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n,int direction,unsigned int mode)511 GMT_LOCAL int gmtfft_1d_fftwf (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n, int direction, unsigned int mode) {
512 	fftwf_plan plan = NULL;
513 	gmt_M_unused(mode);
514 
515 	/* Generate FFTW plan for complex 1d DFT */
516 	plan = gmtfft_gmt_fftwf_plan_dft(GMT, 0, n, (fftwf_complex*)data, direction);
517 	fftwf_execute(plan);      /* do transform */
518 	fftwf_destroy_plan(plan); /* deallocate plan */
519 
520 	return GMT_NOERROR;
521 }
522 
gmtfft_2d_fftwf(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n_columns,unsigned int n_rows,int direction,unsigned int mode)523 GMT_LOCAL int gmtfft_2d_fftwf (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n_columns, unsigned int n_rows, int direction, unsigned int mode) {
524 	fftwf_plan plan = NULL;
525 	gmt_M_unused(mode);
526 
527 	/* Generate FFTW plan for complex 2d DFT */
528 	plan = gmtfft_gmt_fftwf_plan_dft(GMT, n_rows, n_columns, (fftwf_complex*)data, direction);
529 	fftwf_execute(plan);      /* do transform */
530 	fftwf_destroy_plan(plan); /* deallocate plan */
531 
532 	return GMT_NOERROR;
533 }
534 
535 #endif /* HAVE_FFTW3F */
536 
537 #ifdef __APPLE__ /* Accelerate framework */
538 
gmtfft_gmtfft_1d_vDSP_reset(struct GMT_FFT_HIDDEN * Z)539 GMT_LOCAL void gmtfft_gmtfft_1d_vDSP_reset (struct GMT_FFT_HIDDEN *Z) {
540 	if (Z->setup_1d) {	/* Free single-precision FFT data structure and arrays */
541 		vDSP_destroy_fftsetup (Z->setup_1d);
542 		gmt_M_str_free (Z->dsp_split_complex_1d.realp);
543 		gmt_M_str_free (Z->dsp_split_complex_1d.imagp);
544 	}
545 }
546 
gmtfft_1d_vDSP(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n,int direction,unsigned int mode)547 GMT_LOCAL int gmtfft_1d_vDSP (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n, int direction, unsigned int mode) {
548 	FFTDirection fft_direction = direction == GMT_FFT_FWD ?
549 			kFFTDirection_Forward : kFFTDirection_Inverse;
550 	DSPComplex *dsp_complex = (DSPComplex *)data;
551 
552 	/* Base 2 exponent that specifies the largest power of
553 	 * two that can be processed by fft: */
554 	vDSP_Length log2n = gmtfft_radix2 (n);
555 	gmt_M_unused(mode);
556 
557 	if (log2n == 0) {
558 		GMT_Report (GMT->parent, GMT_MSG_ERROR, "Need Radix-2 input try: %u [n]\n", 1U<<gmtfft_propose_radix2 (n));
559 		return -1;
560 	}
561 
562 	if (GMT->current.fft.n_1d != n) {	/* Must update the FFT setup arrays */
563 		/* Build data structure that contains precalculated data for use by
564 		 * single-precision FFT functions: */
565 		gmtfft_gmtfft_1d_vDSP_reset (&GMT->current.fft);
566 		GMT->current.fft.setup_1d = vDSP_create_fftsetup (log2n, kFFTRadix2);
567 		GMT->current.fft.dsp_split_complex_1d.realp = calloc (n, sizeof(gmt_grdfloat));
568 		GMT->current.fft.dsp_split_complex_1d.imagp = calloc (n, sizeof(gmt_grdfloat));
569 		if (GMT->current.fft.dsp_split_complex_1d.realp == NULL || GMT->current.fft.dsp_split_complex_1d.imagp == NULL) {
570 			GMT_Report (GMT->parent, GMT_MSG_ERROR, "Unable to allocate dsp_split_complex array of length %u\n", n);
571 			return -1; /* out of memory */
572 		}
573 		GMT->current.fft.n_1d = n;
574 	}
575 	vDSP_ctoz (dsp_complex, 2, &GMT->current.fft.dsp_split_complex_1d, 1, n);
576 
577 	vDSP_fft_zip (GMT->current.fft.setup_1d, &GMT->current.fft.dsp_split_complex_1d, 1, log2n, fft_direction);
578 
579 	vDSP_ztoc (&GMT->current.fft.dsp_split_complex_1d, 1, dsp_complex, 2, n);
580 
581 	return GMT_NOERROR;
582 }
583 
gmtfft_gmtfft_2d_vDSP_reset(struct GMT_FFT_HIDDEN * Z)584 GMT_LOCAL void gmtfft_gmtfft_2d_vDSP_reset (struct GMT_FFT_HIDDEN *Z) {
585 	if (Z->setup_2d) {	/* Free single-precision 2D FFT data structure and arrays */
586 		vDSP_destroy_fftsetup (Z->setup_2d);
587 		gmt_M_str_free (Z->dsp_split_complex_2d.realp);
588 		gmt_M_str_free (Z->dsp_split_complex_2d.imagp);
589 	}
590 }
591 
gmtfft_2d_vDSP(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n_columns,unsigned int n_rows,int direction,unsigned int mode)592 GMT_LOCAL int gmtfft_2d_vDSP (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n_columns, unsigned int n_rows, int direction, unsigned int mode) {
593 	FFTDirection fft_direction = direction == GMT_FFT_FWD ?
594 			kFFTDirection_Forward : kFFTDirection_Inverse;
595 	DSPComplex *dsp_complex = (DSPComplex *)data;
596 
597 	/* Base 2 exponent that specifies the largest power of
598 	 * two that can be processed by fft: */
599 	vDSP_Length log2nx = gmtfft_radix2 (n_columns);
600 	vDSP_Length log2ny = gmtfft_radix2 (n_rows);
601 	unsigned int n_xy = n_columns * n_rows;
602 	gmt_M_unused(mode);
603 
604 	if (log2nx == 0 || log2ny == 0) {
605 		GMT_Report (GMT->parent, GMT_MSG_ERROR, "Need Radix-2 input try: %u/%u [n_columns/n_rows]\n",
606 				1U<<gmtfft_propose_radix2 (n_columns), 1U<<gmtfft_propose_radix2 (n_rows));
607 		return -1;
608 	}
609 
610 	if (GMT->current.fft.n_2d != n_xy) {	/* Must update the 2-D FFT setup arrays */
611 		/* Build data structure that contains precalculated data for use by
612 	 	* single-precision FFT functions: */
613 		gmtfft_gmtfft_2d_vDSP_reset (&GMT->current.fft);
614 		GMT->current.fft.setup_2d = vDSP_create_fftsetup (MAX (log2nx, log2ny), kFFTRadix2);
615 		GMT->current.fft.dsp_split_complex_2d.realp = calloc (n_xy, sizeof(gmt_grdfloat));
616 		GMT->current.fft.dsp_split_complex_2d.imagp = calloc (n_xy, sizeof(gmt_grdfloat));
617 		if (GMT->current.fft.dsp_split_complex_2d.realp == NULL || GMT->current.fft.dsp_split_complex_2d.imagp == NULL) {
618 			GMT_Report (GMT->parent, GMT_MSG_ERROR, "Unable to allocate dsp_split_complex array of length %u\n", n_xy);
619 			return -1; /* out of memory */
620 		}
621 		GMT->current.fft.n_2d = n_xy;
622 	}
623 	vDSP_ctoz (dsp_complex, 2, &GMT->current.fft.dsp_split_complex_2d, 1, n_xy);
624 
625 	/* complex: */
626 	/* PW note 10/26/2014: We used to pass log2ny, log2nx to vDSP_fft2d_zipbut that gave bad results for n_columns != n_rows.
627 	 * I assume this is because Accelerate expects columns but we pass rows. Now matches KISS, FFTW, etc. */
628 	vDSP_fft2d_zip (GMT->current.fft.setup_2d, &GMT->current.fft.dsp_split_complex_2d, 1, 0, log2nx, log2ny, fft_direction);
629 	/* real:
630 	vDSP_fft2d_zrip (setup, &GMT->current.fft.dsp_split_complex_2d, 1, 0, log2nx, log2ny, fft_direction); */
631 
632 	vDSP_ztoc (&GMT->current.fft.dsp_split_complex_2d, 1, dsp_complex, 2, n_xy);
633 
634 	return GMT_NOERROR;
635 }
636 #endif /* APPLE Accelerate framework */
637 
638 /* Kiss FFT */
639 
640 #include "kiss_fft/kiss_fftnd.h"
641 
gmtfft_1d_kiss(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n,int direction,unsigned int mode)642 GMT_LOCAL int gmtfft_1d_kiss (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n, int direction, unsigned int mode) {
643 	kiss_fft_cpx *fin, *fout;
644 	kiss_fft_cfg config;
645 	gmt_M_unused(GMT); gmt_M_unused(mode);
646 
647 	/* Initialize a FFT (or IFFT) config/state data structure */
648 	config = kiss_fft_alloc(n, direction == GMT_FFT_INV, NULL, NULL);
649 	fin = fout = (kiss_fft_cpx *)data;
650 	kiss_fft (config, fin, fout); /* do transform */
651 	gmt_M_str_free (config); /* Free config data structure */
652 
653 	return GMT_NOERROR;
654 }
655 
gmtfft_2d_kiss(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n_columns,unsigned int n_rows,int direction,unsigned int mode)656 GMT_LOCAL int gmtfft_2d_kiss (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n_columns, unsigned int n_rows, int direction, unsigned int mode) {
657 	const int dim[2] = {n_rows, n_columns}; /* dimensions of fft */
658 	const int dimcount = 2;      /* number of dimensions */
659 	kiss_fft_cpx *fin, *fout;
660 	kiss_fftnd_cfg config;
661 	gmt_M_unused(GMT); gmt_M_unused(mode);
662 
663 	/* Initialize a FFT (or IFFT) config/state data structure */
664 	config = kiss_fftnd_alloc (dim, dimcount, direction == GMT_FFT_INV, NULL, NULL);
665 
666 	fin = fout = (kiss_fft_cpx *)data;
667 	kiss_fftnd (config, fin, fout); /* do transform */
668 	gmt_M_str_free (config); /* Free config data structure */
669 
670 	return GMT_NOERROR;
671 }
672 
673 
gmtfft_brenner_fourt_f(gmt_grdfloat * data,int * nn,int * ndim,int * ksign,int * iform,gmt_grdfloat * work)674 GMT_LOCAL int gmtfft_brenner_fourt_f (gmt_grdfloat *data, int *nn, int *ndim, int *ksign, int *iform, gmt_grdfloat *work) {
675 
676     /* System generated locals */
677     int i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9, i__10, i__11, i__12;
678 
679     /* Builtin functions */
680 
681     double cos(double), sin(double);
682 
683     /* Local variables */
684 
685     static int j1rg2, idiv, irem, ipar, kmin, imin, jmin, lmax, mmax, imax, jmax;
686     static int ntwo, j1cnj, np1hf, np2hf, j1min, i1max, i1rng, j1rng, j2min, j3max;
687     static int j1max, j2max, i2max, non2t, j2stp, i, j, k, l, m, n, icase, ifact[32];
688     static int nhalf, krang, kconj, kdif, idim, ntot, kstep, k2, k3, k4, nprev, iquot;
689     static int i2, i1, i3, j3, k1, j2, j1, if_, np0, np1, np2, ifp1, ifp2, non2;
690 
691     static gmt_grdfloat theta, oldsi, tempi, oldsr, sinth, difi, difr, sumi, sumr, tempr, twopi;
692     static gmt_grdfloat wstpi, wstpr, twowr, wi, wr, u1i, w2i, w3i, u2i, u3i, u4i, t2i, u1r;
693     static gmt_grdfloat u2r, u3r, w2r, w3r, u4r, t2r, t3r, t3i, t4r, t4i;
694     static double wrd, wid;
695 
696 /*---------------------------------------------------------------------------
697        ARGUMENTS :
698 		DATA - COMPLEX ARRAY, LENGTH NN
699 		NN - ARRAY OF NUMBER OF POINTS IN EACH DIMENSION
700 		NDIM - NUMBER OF DIMENSIONS (FOR OUR PURPOSES, NDIM=1)
701 		KSIGN - +1 FOR INVERSE TRANSFORM (FREQ TO GMT_TIME DOMAIN)
702 			-1 FOR FORWARD TRANSFORM (GMT_TIME TO FREQ DOMAIN)
703 		IFORM - 0 REAL DATA
704 			+1 COMPLEX DATA
705 		WORK - 0 IF ALL DIMENSIONS ARE RADIX 2
706 		COMPLEX ARRAY, LARGE AS LARGEST NON-RADIX 2 DIMENSI0N
707 
708 	PROGRAM BY NORMAN BRENNER FROM THE BASIC PROGRAM BY CHARLES
709 	RADER.  RALPH ALTER SUGGESTED THE IDEA FOR THE DIGIT REVERSAL.
710 	MIT LINCOLN LABORATORY, AUGUST 1967.
711 
712 ---------------------------------------------------------------------------*/
713 
714    /* Parameter adjustments */
715     if (work) --work;
716     --nn;
717     --data;
718 
719     /* Function Body */
720     wr = wi = wstpr = wstpi = (gmt_grdfloat)0.0;
721     twopi = (gmt_grdfloat)6.283185307;
722     if (*ndim - 1 >= 0) {
723 	goto L1;
724     } else {
725 	goto L920;
726     }
727 L1:
728     ntot = 2;
729     i__1 = *ndim;
730     for (idim = 1; idim <= i__1; ++idim) {
731 	if (nn[idim] <= 0) {
732 	    goto L920;
733 	} else {
734 	    goto L2;
735 	}
736 L2:
737 	ntot *= nn[idim];
738     }
739     np1 = 2;
740     i__1 = *ndim;
741     for (idim = 1; idim <= i__1; ++idim) {
742 	n = nn[idim];
743 	np2 = np1 * n;
744 	if ((i__2 = n - 1) < 0) {
745 	    goto L920;
746 	} else if (i__2 == 0) {
747 	    goto L900;
748 	} else {
749 	    goto L5;
750 	}
751 L5:
752 	m = n;
753 	ntwo = np1;
754 	if_ = 1;
755 	idiv = 2;
756 L10:
757 	iquot = m / idiv;
758 	irem = m - idiv * iquot;
759 	if (iquot - idiv >= 0) {
760 	    goto L11;
761 	} else {
762 	    goto L50;
763 	}
764 L11:
765 	if (irem != 0) {
766 	    goto L20;
767 	} else {
768 	    goto L12;
769 	}
770 L12:
771 	ntwo += ntwo;
772 	m = iquot;
773 	goto L10;
774 L20:
775 	idiv = 3;
776 L30:
777 	iquot = m / idiv;
778 	irem = m - idiv * iquot;
779 	if (iquot - idiv >= 0) {
780 	    goto L31;
781 	} else {
782 	    goto L60;
783 	}
784 L31:
785 	if (irem != 0) {
786 	    goto L40;
787 	} else {
788 	    goto L32;
789 	}
790 L32:
791 	ifact[if_ - 1] = idiv;
792 	++if_;
793 	m = iquot;
794 	goto L30;
795 L40:
796 	idiv += 2;
797 	goto L30;
798 L50:
799 	if (irem != 0) {
800 	    goto L60;
801 	} else {
802 	    goto L51;
803 	}
804 L51:
805 	ntwo += ntwo;
806 	goto L70;
807 L60:
808 	ifact[if_ - 1] = m;
809 L70:
810 	non2 = np1 * (np2 / ntwo);
811 	icase = 1;
812 	if (idim - 4 >= 0) {
813 	    goto L90;
814 	} else {
815 	    goto L71;
816 	}
817 L71:
818 	if (*iform <= 0) {
819 	    goto L72;
820 	} else {
821 	    goto L90;
822 	}
823 L72:
824 	icase = 2;
825 	if (idim - 1 <= 0) {
826 	    goto L73;
827 	} else {
828 	    goto L90;
829 	}
830 L73:
831 	icase = 3;
832 	if (ntwo - np1 <= 0) {
833 	    goto L90;
834 	} else {
835 	    goto L74;
836 	}
837 L74:
838 	icase = 4;
839 	ntwo /= 2;
840 	n /= 2;
841 	np2 /= 2;
842 	ntot /= 2;
843 	i = 3;
844 	i__2 = ntot;
845 	for (j = 2; j <= i__2; ++j) {
846 	    data[j] = data[i];
847 	    i += 2;
848 	}
849 L90:
850 	i1rng = np1;
851 	if (icase - 2 != 0) {
852 	    goto L100;
853 	} else {
854 	    goto L95;
855 	}
856 L95:
857 	i1rng = np0 * (nprev / 2 + 1);
858 L100:
859 	if (ntwo - np1 <= 0) {
860 	    goto L600;
861 	} else {
862 	    goto L110;
863 	}
864 L110:
865 	np2hf = np2 / 2;
866 	j = 1;
867 	i__2 = np2;
868 	i__3 = non2;
869 	for (i2 = 1; i__3 < 0 ? i2 >= i__2 : i2 <= i__2; i2 += i__3) {
870 	    if (j - i2 >= 0) {
871 		goto L130;
872 	    } else {
873 		goto L120;
874 	    }
875 L120:
876 	    i1max = i2 + non2 - 2;
877 	    i__4 = i1max;
878 	    for (i1 = i2; i1 <= i__4; i1 += 2) {
879 		i__5 = ntot;
880 		i__6 = np2;
881 		for (i3 = i1; i__6 < 0 ? i3 >= i__5 : i3 <= i__5; i3 += i__6) {
882 		    j3 = j + i3 - i2;
883 		    tempr = data[i3];
884 		    tempi = data[i3 + 1];
885 		    data[i3] = data[j3];
886 		    data[i3 + 1] = data[j3 + 1];
887 		    data[j3] = tempr;
888 		    data[j3 + 1] = tempi;
889 		}
890 	    }
891 L130:
892 	    m = np2hf;
893 L140:
894 	    if (j - m <= 0) {
895 		goto L150;
896 	    } else {
897 		goto L145;
898 	    }
899 L145:
900 	    j -= m;
901 	    m /= 2;
902 	    if (m - non2 >= 0) {
903 		goto L140;
904 	    } else {
905 		goto L150;
906 	    }
907 L150:
908 	    j += m;
909 	}
910 	non2t = non2 + non2;
911 	ipar = ntwo / np1;
912 L310:
913 	if ((i__3 = ipar - 2) < 0) {
914 	    goto L350;
915 	} else if (i__3 == 0) {
916 	    goto L330;
917 	} else {
918 	    goto L320;
919 	}
920 L320:
921 	ipar /= 4;
922 	goto L310;
923 L330:
924 	i__3 = i1rng;
925 	for (i1 = 1; i1 <= i__3; i1 += 2) {
926 	    i__2 = non2;
927 	    i__6 = np1;
928 	    for (j3 = i1; i__6 < 0 ? j3 >= i__2 : j3 <= i__2; j3 +=  i__6) {
929 		i__5 = ntot;
930 		i__4 = non2t;
931 		for (k1 = j3; i__4 < 0 ? k1 >= i__5 : k1 <= i__5; k1 += i__4) {
932 		    k2 = k1 + non2;
933 		    tempr = data[k2];
934 		    tempi = data[k2 + 1];
935 		    data[k2] = data[k1] - tempr;
936 		    data[k2 + 1] = data[k1 + 1] - tempi;
937 		    data[k1] += tempr;
938 		    data[k1 + 1] += tempi;
939 		}
940 	    }
941 	}
942 L350:
943 	mmax = non2;
944 L360:
945 	if (mmax - np2hf >= 0) {
946 	    goto L600;
947 	} else {
948 	    goto L370;
949 	}
950 L370:
951 /* Computing MAX */
952 	i__4 = non2t, i__5 = mmax / 2;
953 	lmax = MAX(i__4,i__5);
954 	if (mmax - non2 <= 0) {
955 	    goto L405;
956 	} else {
957 	    goto L380;
958 	}
959 L380:
960 	theta = -twopi * (gmt_grdfloat) non2 / (gmt_grdfloat) (mmax <<  2);
961 	if (*ksign >= 0) {
962 	    goto L390;
963 	} else {
964 	    goto L400;
965 	}
966 L390:
967 	theta = -theta;
968 L400:
969 	sincos ((double)theta, &wid, &wrd);
970 	wr = (gmt_grdfloat)wrd;
971 	wi = (gmt_grdfloat)wid;
972 	wstpr = (gmt_grdfloat)-2.0 * wi * wi;
973 	wstpi = (gmt_grdfloat)2.0 * wr * wi;
974 L405:
975 	i__4 = lmax;
976 	i__5 = non2t;
977 	for (l = non2; i__5 < 0 ? l >= i__4 : l <= i__4; l += i__5) {
978 	    m = l;
979 	    if (mmax - non2 <= 0) {
980 		goto L420;
981 	    } else {
982 		goto L410;
983 	    }
984 L410:
985 	    w2r = wr * wr - wi * wi;
986 	    w2i = (gmt_grdfloat)(wr * 2.0 * wi);
987 	    w3r = w2r * wr - w2i * wi;
988 	    w3i = w2r * wi + w2i * wr;
989 L420:
990 	    i__6 = i1rng;
991 	    for (i1 = 1; i1 <= i__6; i1 += 2) {
992 		i__2 = non2;
993 		i__3 = np1;
994 		for (j3 = i1; i__3 < 0 ? j3 >= i__2 : j3 <= i__2; j3  += i__3) {
995 		    kmin = j3 + ipar * m;
996 		    if (mmax - non2 <= 0) {
997 			goto L430;
998 		    } else {
999 			goto L440;
1000 		    }
1001 L430:
1002 		    kmin = j3;
1003 L440:
1004 		    kdif = ipar * mmax;
1005 L450:
1006 		    kstep = kdif << 2;
1007 		    i__7 = ntot;
1008 		    i__8 = kstep;
1009 		    for (k1 = kmin; i__8 < 0 ? k1 >= i__7 : k1 <=  i__7; k1 += i__8) {
1010 			k2 = k1 + kdif;
1011 			k3 = k2 + kdif;
1012 			k4 = k3 + kdif;
1013 			if (mmax - non2 <= 0) {
1014 			    goto L460;
1015 			} else {
1016 			    goto L480;
1017 			}
1018 L460:
1019 			u1r = data[k1] + data[k2];
1020 			u1i = data[k1 + 1] + data[k2 + 1];
1021 			u2r = data[k3] + data[k4];
1022 			u2i = data[k3 + 1] + data[k4 + 1];
1023 			u3r = data[k1] - data[k2];
1024 			u3i = data[k1 + 1] - data[k2 + 1];
1025 			if (*ksign >= 0) {
1026 			    goto L475;
1027 			} else {
1028 			    goto L470;
1029 			}
1030 L470:
1031 			u4r = data[k3 + 1] - data[k4 + 1];
1032 			u4i = data[k4] - data[k3];
1033 			goto L510;
1034 L475:
1035 			u4r = data[k4 + 1] - data[k3 + 1];
1036 			u4i = data[k3] - data[k4];
1037 			goto L510;
1038 L480:
1039 			t2r = w2r * data[k2] - w2i * data[k2 + 1];
1040 			t2i = w2r * data[k2 + 1] + w2i * data[k2];
1041 			t3r = wr * data[k3] - wi * data[k3 + 1];
1042 			t3i = wr * data[k3 + 1] + wi * data[k3];
1043 			t4r = w3r * data[k4] - w3i * data[k4 + 1];
1044 			t4i = w3r * data[k4 + 1] + w3i * data[k4];
1045 			u1r = data[k1] + t2r;
1046 			u1i = data[k1 + 1] + t2i;
1047 			u2r = t3r + t4r;
1048 			u2i = t3i + t4i;
1049 			u3r = data[k1] - t2r;
1050 			u3i = data[k1 + 1] - t2i;
1051 			if (*ksign >= 0) {
1052 			    goto L500;
1053 			} else {
1054 			    goto L490;
1055 			}
1056 L490:
1057 			u4r = t3i - t4i;
1058 			u4i = t4r - t3r;
1059 			goto L510;
1060 L500:
1061 			u4r = t4i - t3i;
1062 			u4i = t3r - t4r;
1063 L510:
1064 			data[k1] = u1r + u2r;
1065 			data[k1 + 1] = u1i + u2i;
1066 			data[k2] = u3r + u4r;
1067 			data[k2 + 1] = u3i + u4i;
1068 			data[k3] = u1r - u2r;
1069 			data[k3 + 1] = u1i - u2i;
1070 			data[k4] = u3r - u4r;
1071 			data[k4 + 1] = u3i - u4i;
1072 		    }
1073 		    kmin = ((kmin - j3) << 2) + j3;
1074 		    kdif = kstep;
1075 		    if (kdif - np2 >= 0) {
1076 			goto L530;
1077 		    } else {
1078 			goto L450;
1079 		    }
1080 L530:
1081 		    ;
1082 		}
1083 	    }
1084 	    m = mmax - m;
1085 	    if (*ksign >= 0) {
1086 		goto L550;
1087 	    } else {
1088 		goto L540;
1089 	    }
1090 L540:
1091 	    tempr = wr;
1092 	    wr = -wi;
1093 	    wi = -tempr;
1094 	    goto L560;
1095 L550:
1096 	    tempr = wr;
1097 	    wr = wi;
1098 	    wi = tempr;
1099 L560:
1100 	    if (m - lmax <= 0) {
1101 		goto L565;
1102 	    } else {
1103 		goto L410;
1104 	    }
1105 L565:
1106 	    tempr = wr;
1107 	    wr = wr * wstpr - wi * wstpi + wr;
1108 	    wi = wi * wstpr + tempr * wstpi + wi;
1109 	}
1110 	ipar = 3 - ipar;
1111 	mmax += mmax;
1112 	goto L360;
1113 L600:
1114 	if (ntwo - np2 >= 0) {
1115 	    goto L700;
1116 	} else {
1117 	    goto L605;
1118 	}
1119 L605:
1120 	ifp1 = non2;
1121 	if_ = 1;
1122 	np1hf = np1 / 2;
1123 L610:
1124 	ifp2 = ifp1 / ifact[if_ - 1];
1125 	j1rng = np2;
1126 	if (icase - 3 != 0) {
1127 	    goto L612;
1128 	} else {
1129 	    goto L611;
1130 	}
1131 L611:
1132 	j1rng = (np2 + ifp1) / 2;
1133 	j2stp = np2 / ifact[if_ - 1];
1134 	j1rg2 = (j2stp + ifp2) / 2;
1135 L612:
1136 	j2min = ifp2 + 1;
1137 	if (ifp1 - np2 >= 0) {
1138 	    goto L640;
1139 	} else {
1140 	    goto L615;
1141 	}
1142 L615:
1143 	i__5 = ifp1;
1144 	i__4 = ifp2;
1145 	for (j2 = j2min; i__4 < 0 ? j2 >= i__5 : j2 <= i__5; j2 +=  i__4) {
1146 	    theta = -twopi * (gmt_grdfloat) (j2 - 1) / (gmt_grdfloat)  np2;
1147 	    if (*ksign >= 0) {
1148 		goto L620;
1149 	    } else {
1150 		goto L625;
1151 	    }
1152 L620:
1153 	    theta = -theta;
1154 L625:
1155 	    sinth = (gmt_grdfloat)sin((double)(0.5 * theta));
1156 	    wstpr = sinth * (gmt_grdfloat)(-2. * sinth);
1157 	    wstpi = (gmt_grdfloat)sin((double)theta);
1158 	    wr = wstpr + (gmt_grdfloat)1.0;
1159 	    wi = wstpi;
1160 	    j1min = j2 + ifp1;
1161 	    i__3 = j1rng;
1162 	    i__2 = ifp1;
1163 	    for (j1 = j1min; i__2 < 0 ? j1 >= i__3 : j1 <= i__3; j1 += i__2) {
1164 
1165 		i1max = j1 + i1rng - 2;
1166 		i__6 = i1max;
1167 		for (i1 = j1; i1 <= i__6; i1 += 2) {
1168 		    i__8 = ntot;
1169 		    i__7 = np2;
1170 		    for (i3 = i1; i__7 < 0 ? i3 >= i__8 : i3 <= i__8; i3 += i__7) {
1171 			j3max = i3 + ifp2 - np1;
1172 			i__9 = j3max;
1173 			i__10 = np1;
1174 			for (j3 = i3; i__10 < 0 ? j3 >= i__9 : j3 <= i__9; j3 += i__10) {
1175 			    tempr = data[j3];
1176 			    data[j3] = data[j3] * wr - data[j3 + 1] *  wi;
1177 			    data[j3 + 1] = tempr * wi + data[j3 + 1]  * wr;
1178 			}
1179 		    }
1180 		}
1181 		tempr = wr;
1182 		wr = wr * wstpr - wi * wstpi + wr;
1183 		wi = tempr * wstpi + wi * wstpr + wi;
1184 	    }
1185 	}
1186 L640:
1187 	theta = -twopi / (gmt_grdfloat) ifact[if_ - 1];
1188 	if (*ksign >= 0) {
1189 	    goto L645;
1190 	} else {
1191 	    goto L650;
1192 	}
1193 L645:
1194 	theta = -theta;
1195 L650:
1196 	sinth = (gmt_grdfloat)sin((double)(0.5 * theta));
1197 	wstpr = sinth * (gmt_grdfloat)(-2. * sinth);
1198 	wstpi = (gmt_grdfloat)sin((double)theta);
1199 	kstep = (n << 1) / ifact[if_ - 1];
1200 	krang = kstep * (ifact[if_ - 1] / 2) + 1;
1201 	i__2 = i1rng;
1202 	for (i1 = 1; i1 <= i__2; i1 += 2) {
1203 	    i__3 = ntot;
1204 	    i__4 = np2;
1205 	    for (i3 = i1; i__4 < 0 ? i3 >= i__3 : i3 <= i__3; i3 += i__4) {
1206 		i__5 = krang;
1207 		i__10 = kstep;
1208 		for (kmin = 1; i__10 < 0 ? kmin >= i__5 : kmin <= i__5; kmin += i__10) {
1209 		    j1max = i3 + j1rng - ifp1;
1210 		    i__9 = j1max;
1211 		    i__7 = ifp1;
1212 		    for (j1 = i3; i__7 < 0 ? j1 >= i__9 : j1 <= i__9; j1 += i__7) {
1213 			j3max = j1 + ifp2 - np1;
1214 			i__8 = j3max;
1215 			i__6 = np1;
1216 			for (j3 = j1; i__6 < 0 ? j3 >= i__8 : j3 <= i__8; j3 += i__6) {
1217 			    j2max = j3 + ifp1 - ifp2;
1218 			    k = kmin + (j3 - j1 + (j1 - i3) / ifact[if_ - 1]) / np1hf;
1219 			    if (kmin - 1 <= 0) {
1220 				goto L655;
1221 			    } else {
1222 				goto L665;
1223 			    }
1224 L655:
1225 			    sumr = (gmt_grdfloat)0.0;
1226 			    sumi = (gmt_grdfloat)0.0;
1227 			    i__11 = j2max;
1228 			    i__12 = ifp2;
1229 			    for (j2 = j3; i__12 < 0 ? j2 >= i__11 : j2 <= i__11; j2 += i__12) {
1230 				sumr += data[j2];
1231 				sumi += data[j2 + 1];
1232 			    }
1233 			    work[k] = sumr;
1234 			    work[k + 1] = sumi;
1235 			    goto L680;
1236 L665:
1237 			    kconj = k + ((n - kmin + 1) << 1);
1238 			    j2 = j2max;
1239 			    sumr = data[j2];
1240 			    sumi = data[j2 + 1];
1241 			    oldsr = (gmt_grdfloat)0.0;
1242 			    oldsi = (gmt_grdfloat)0.0;
1243 			    j2 -= ifp2;
1244 L670:
1245 			    tempr = sumr;
1246 			    tempi = sumi;
1247 			    sumr = twowr * sumr - oldsr + data[j2];
1248 			    sumi = twowr * sumi - oldsi + data[j2 + 1];
1249 			    oldsr = tempr;
1250 			    oldsi = tempi;
1251 			    j2 -= ifp2;
1252 			    if (j2 - j3 <= 0) {
1253 				goto L675;
1254 			    } else {
1255 				goto L670;
1256 			    }
1257 L675:
1258 			    tempr = wr * sumr - oldsr + data[j2];
1259 			    tempi = wi * sumi;
1260 			    work[k] = tempr - tempi;
1261 			    work[kconj] = tempr + tempi;
1262 			    tempr = wr * sumi - oldsi + data[j2 + 1];
1263 			    tempi = wi * sumr;
1264 			    work[k + 1] = tempr + tempi;
1265 			    work[kconj + 1] = tempr - tempi;
1266 L680:
1267 			    ;
1268 			}
1269 		    }
1270 		    if (kmin - 1 <= 0) {
1271 			goto L685;
1272 		    } else {
1273 			goto L686;
1274 		    }
1275 L685:
1276 		    wr = wstpr + (gmt_grdfloat)1.0;
1277 		    wi = wstpi;
1278 		    goto L690;
1279 L686:
1280 		    tempr = wr;
1281 		    wr = wr * wstpr - wi * wstpi + wr;
1282 		    wi = tempr * wstpi + wi * wstpr + wi;
1283 L690:
1284 		    twowr = wr + wr;
1285 		}
1286 		if (icase - 3 != 0) {
1287 		    goto L692;
1288 		} else {
1289 		    goto L691;
1290 		}
1291 L691:
1292 		if (ifp1 - np2 >= 0) {
1293 		    goto L692;
1294 		} else {
1295 		    goto L695;
1296 		}
1297 L692:
1298 		k = 1;
1299 		i2max = i3 + np2 - np1;
1300 		i__10 = i2max;
1301 		i__5 = np1;
1302 		for (i2 = i3; i__5 < 0 ? i2 >= i__10 : i2 <= i__10; i2 += i__5) {
1303 		    data[i2] = work[k];
1304 		    data[i2 + 1] = work[k + 1];
1305 		    k += 2;
1306 		}
1307 		goto L698;
1308 L695:
1309 		j3max = i3 + ifp2 - np1;
1310 		i__5 = j3max;
1311 		i__10 = np1;
1312 		for (j3 = i3; i__10 < 0 ? j3 >= i__5 : j3 <= i__5; j3 += i__10) {
1313 		    j2max = j3 + np2 - j2stp;
1314 		    i__6 = j2max;
1315 		    i__8 = j2stp;
1316 		    for (j2 = j3; i__8 < 0 ? j2 >= i__6 : j2 <= i__6; j2 += i__8) {
1317 			j1max = j2 + j1rg2 - ifp2;
1318 			j1cnj = j3 + j2max + j2stp - j2;
1319 			i__7 = j1max;
1320 			i__9 = ifp2;
1321 			for (j1 = j2; i__9 < 0 ? j1 >= i__7 : j1 <= i__7; j1 += i__9) {
1322 			    k = j1 + 1 - i3;
1323 			    data[j1] = work[k];
1324 			    data[j1 + 1] = work[k + 1];
1325 			    if (j1 - j2 <= 0) {
1326 				goto L697;
1327 			    } else {
1328 				goto L696;
1329 			    }
1330 L696:
1331 			    data[j1cnj] = work[k];
1332 			    data[j1cnj + 1] = -work[k + 1];
1333 L697:
1334 			    j1cnj -= ifp2;
1335 			}
1336 		    }
1337 		}
1338 L698:
1339 		;
1340 	    }
1341 	}
1342 	++if_;
1343 	ifp1 = ifp2;
1344 	if (ifp1 - np1 <= 0) {
1345 	    goto L700;
1346 	} else {
1347 	    goto L610;
1348 	}
1349 L700:
1350 	switch ((int)icase) {
1351 	    case 1:  goto L900;
1352 	    case 2:  goto L800;
1353 	    case 3:  goto L900;
1354 	    case 4:  goto L701;
1355 	}
1356 L701:
1357 	nhalf = n;
1358 	n += n;
1359 	theta = -twopi / (gmt_grdfloat) n;
1360 	if (*ksign >= 0) {
1361 	    goto L702;
1362 	} else {
1363 	    goto L703;
1364 	}
1365 L702:
1366 	theta = -theta;
1367 L703:
1368 	sinth = (gmt_grdfloat)sin((double)(0.5 * theta));
1369 	wstpr = sinth * (gmt_grdfloat)(-2. * sinth);
1370 	wstpi = (gmt_grdfloat)sin((double)theta);
1371 	wr = wstpr + (gmt_grdfloat)1.0;
1372 	wi = wstpi;
1373 	imin = 3;
1374 	jmin = (nhalf << 1) - 1;
1375 	goto L725;
1376 L710:
1377 	j = jmin;
1378 	i__4 = ntot;
1379 	i__3 = np2;
1380 	for (i = imin; i__3 < 0 ? i >= i__4 : i <= i__4; i += i__3) {
1381 	    sumr = (gmt_grdfloat)0.5 * (data[i] + data[j]);
1382 	    sumi = (gmt_grdfloat)0.5 * (data[i + 1] + data[j + 1]);
1383 	    difr = (gmt_grdfloat)0.5 * (data[i] - data[j]);
1384 	    difi = (gmt_grdfloat)0.5 * (data[i + 1] - data[j + 1]);
1385 	    tempr = wr * sumi + wi * difr;
1386 	    tempi = wi * sumi - wr * difr;
1387 	    data[i] = sumr + tempr;
1388 	    data[i + 1] = difi + tempi;
1389 	    data[j] = sumr - tempr;
1390 	    data[j + 1] = -difi + tempi;
1391 	    j += np2;
1392 	}
1393 	imin += 2;
1394 	jmin += -2;
1395 	tempr = wr;
1396 	wr = wr * wstpr - wi * wstpi + wr;
1397 	wi = tempr * wstpi + wi * wstpr + wi;
1398 L725:
1399 	if ((i__3 = imin - jmin) < 0) {
1400 	    goto L710;
1401 	} else if (i__3 == 0) {
1402 	    goto L730;
1403 	} else {
1404 	    goto L740;
1405 	}
1406 L730:
1407 	if (*ksign >= 0) {
1408 	    goto L740;
1409 	} else {
1410 	    goto L731;
1411 	}
1412 L731:
1413 	i__3 = ntot;
1414 	i__4 = np2;
1415 	for (i = imin; i__4 < 0 ? i >= i__3 : i <= i__3; i += i__4) {
1416 	    data[i + 1] = -data[i + 1];
1417 	}
1418 L740:
1419 	np2 += np2;
1420 	ntot += ntot;
1421 	j = ntot + 1;
1422 	imax = ntot / 2 + 1;
1423 L745:
1424 	imin = imax - (nhalf << 1);
1425 	i = imin;
1426 	goto L755;
1427 L750:
1428 	data[j] = data[i];
1429 	data[j + 1] = -data[i + 1];
1430 L755:
1431 	i += 2;
1432 	j += -2;
1433 	if (i - imax >= 0) {
1434 	    goto L760;
1435 	} else {
1436 	    goto L750;
1437 	}
1438 L760:
1439 	data[j] = data[imin] - data[imin + 1];
1440 	data[j + 1] = (gmt_grdfloat)0.0;
1441 	if (i - j >= 0) {
1442 	    goto L780;
1443 	} else {
1444 	    goto L770;
1445 	}
1446 L765:
1447 	data[j] = data[i];
1448 	data[j + 1] = data[i + 1];
1449 L770:
1450 	i += -2;
1451 	j += -2;
1452 	if (i - imin <= 0) {
1453 	    goto L775;
1454 	} else {
1455 	    goto L765;
1456 	}
1457 L775:
1458 	data[j] = data[imin] + data[imin + 1];
1459 	data[j + 1] = (gmt_grdfloat)0.0;
1460 	imax = imin;
1461 	goto L745;
1462 L780:
1463 	data[1] += data[2];
1464 	data[2] = (gmt_grdfloat)0.0;
1465 	goto L900;
1466 L800:
1467 	if (i1rng - np1 >= 0) {
1468 	    goto L900;
1469 	} else {
1470 	    goto L805;
1471 	}
1472 L805:
1473 	i__4 = ntot;
1474 	i__3 = np2;
1475 	for (i3 = 1; i__3 < 0 ? i3 >= i__4 : i3 <= i__4; i3 += i__3) {
1476 	    i2max = i3 + np2 - np1;
1477 	    i__2 = i2max;
1478 	    i__9 = np1;
1479 	    for (i2 = i3; i__9 < 0 ? i2 >= i__2 : i2 <= i__2; i2 += i__9) {
1480 		imin = i2 + i1rng;
1481 		imax = i2 + np1 - 2;
1482 		jmax = (i3 << 1) + np1 - imin;
1483 		if (i2 - i3 <= 0) {
1484 		    goto L820;
1485 		} else {
1486 		    goto L810;
1487 		}
1488 L810:
1489 		jmax += np2;
1490 L820:
1491 		if (idim - 2 <= 0) {
1492 		    goto L850;
1493 		} else {
1494 		    goto L830;
1495 		}
1496 L830:
1497 		j = jmax + np0;
1498 		i__7 = imax;
1499 		for (i = imin; i <= i__7; i += 2) {
1500 		    data[i] = data[j];
1501 		    data[i + 1] = -data[j + 1];
1502 		    j += -2;
1503 		}
1504 L850:
1505 		j = jmax;
1506 		i__7 = imax;
1507 		i__8 = np0;
1508 		for (i = imin; i__8 < 0 ? i >= i__7 : i <= i__7; i += i__8) {
1509 		    data[i] = data[j];
1510 		    data[i + 1] = -data[j + 1];
1511 		    j -= np0;
1512 		}
1513 	    }
1514 	}
1515 L900:
1516 	np0 = np1;
1517 	np1 = np2;
1518 	nprev = n;
1519     }
1520 L920:
1521     return 0;
1522 } /* fourt_ */
1523 
gmtfft_brenner_worksize(struct GMT_CTRL * GMT,unsigned int n_columns,unsigned int n_rows)1524 GMT_LOCAL size_t gmtfft_brenner_worksize (struct GMT_CTRL *GMT, unsigned int n_columns, unsigned int n_rows) {
1525         /* Find the size of the workspace that will be needed by the transform.
1526          * To use this routine for a 1-D transform, set n_rows = 1.
1527          *
1528          * This is all based on the comments in Norman Brenner's code
1529          * FOURT, from which our C codes are translated.
1530          *
1531          * Let m = largest prime factor in the list of factors.
1532          * Let p = product of all primes which appear an odd number of
1533          * times in the list of prime factors.  Then the worksize needed
1534          * s = max(m,p).  However, we know that if n is radix 2, then no
1535          * work is required; yet this formula would say we need a worksize
1536          * of at least 2.  So I will return s = 0 when max(m,p) = 2.
1537          *
1538          * W. H. F. Smith, 26 February 1992.
1539          *  */
1540         unsigned int f[32], n_factors, nonsymx, nonsymy, nonsym;
1541         size_t storage, ntotal;
1542 
1543         /* Find workspace needed.  First find non_symmetric factors in n_columns, n_rows  */
1544         n_factors = gmt_get_prime_factors (GMT, n_columns, f);
1545         nonsymx = (unsigned int)gmtfft_get_non_symmetric_f (f, n_factors);
1546         n_factors = gmt_get_prime_factors (GMT, n_rows, f);
1547         nonsymy = (unsigned int)gmtfft_get_non_symmetric_f (f, n_factors);
1548         nonsym = MAX (nonsymx, nonsymy);
1549 
1550         /* Now get factors of ntotal  */
1551         ntotal = gmt_M_get_nm (GMT, n_columns, n_rows);
1552         n_factors = gmt_get_prime_factors (GMT, ntotal, f);
1553         storage = (n_factors > 0) ? MAX (nonsym, f[n_factors-1]) : nonsym;
1554         if (storage != 2) storage *= 2;
1555         if (storage < n_columns) storage = n_columns;
1556         if (storage < n_rows) storage = n_rows;
1557         return (2 * storage);
1558 }
1559 
1560 /* C-callable wrapper for gmtfft_brenner_fourt_f */
gmtfft_1d_brenner(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n,int direction,unsigned int mode)1561 GMT_LOCAL int gmtfft_1d_brenner (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n, int direction, unsigned int mode) {
1562         /* void GMT_fourt (struct GMT_CTRL *GMT, gmt_grdfloat *data, int *nn, int ndim, int ksign, int iform, gmt_grdfloat *work) */
1563         /* Data array */
1564         /* Dimension array */
1565         /* Number of dimensions */
1566         /* Forward(-1) or Inverse(+1) */
1567         /* Real(0) or complex(1) data */
1568         /* Work array */
1569 
1570         int ksign, ndim = 1, n_signed = n, kmode = mode;
1571         size_t work_size = 0;
1572         gmt_grdfloat *work = NULL;
1573 
1574         ksign = (direction == GMT_FFT_INV) ? +1 : -1;
1575         if ((work_size = gmtfft_brenner_worksize (GMT, n, 1))) work = gmt_M_memory (GMT, NULL, work_size, gmt_grdfloat);
1576         (void) gmtfft_brenner_fourt_f (data, &n_signed, &ndim, &ksign, &kmode, work);
1577         gmt_M_free (GMT, work);
1578         return (GMT_OK);
1579 }
1580 
gmtfft_2d_brenner(struct GMT_CTRL * GMT,gmt_grdfloat * data,unsigned int n_columns,unsigned int n_rows,int direction,unsigned int mode)1581 GMT_LOCAL int gmtfft_2d_brenner (struct GMT_CTRL *GMT, gmt_grdfloat *data, unsigned int n_columns, unsigned int n_rows, int direction, unsigned int mode) {
1582         /* Data array */
1583         /* Dimension array */
1584         /* Number of dimensions */
1585         /* Forward(-1) or Inverse(+1) */
1586         /* Real(0) or complex(1) data */
1587         /* Work array */
1588 
1589         int ksign, ndim = 2, nn[2] = {n_columns, n_rows}, kmode = mode;
1590         size_t work_size = 0;
1591         gmt_grdfloat *work = NULL;
1592 
1593         ksign = (direction == GMT_FFT_INV) ? +1 : -1;
1594         if ((work_size = gmtfft_brenner_worksize (GMT, n_columns, n_rows))) work = gmt_M_memory (GMT, NULL, work_size, gmt_grdfloat);
1595         GMT_Report (GMT->parent, GMT_MSG_DEBUG, "Brenner_fourt_ work size = %" PRIuS "\n", work_size);
1596         (void) gmtfft_brenner_fourt_f (data, nn, &ndim, &ksign, &kmode, work);
1597         gmt_M_free (GMT, work);
1598         return (GMT_OK);
1599 }
1600 
gmtfft_1d_selection(struct GMT_CTRL * GMT,uint64_t n)1601 GMT_LOCAL int gmtfft_1d_selection (struct GMT_CTRL *GMT, uint64_t n) {
1602 	/* Returns the most suitable 1-D FFT for the job - or the one requested via GMT_FFT */
1603 	if (GMT->current.setting.fft != k_fft_auto) {
1604 		/* Specific selection requested */
1605 		if (GMT->session.fft1d[GMT->current.setting.fft])
1606 			return GMT->current.setting.fft; /* User defined FFT */
1607 		GMT_Report (GMT->parent, GMT_MSG_WARNING, "Desired FFT Algorithm (%s) not configured - choosing suitable alternative.\n", GMT_fft_algo[GMT->current.setting.fft]);
1608 	}
1609 	/* Here we want automatic selection from available candidates */
1610 	if (GMT->session.fft1d[k_fft_accelerate] && gmtfft_radix2 (n))
1611 		return k_fft_accelerate; /* Use if Radix-2 under OS/X */
1612 	if (GMT->session.fft1d[k_fft_fftw])
1613 		return k_fft_fftw;
1614 	return k_fft_kiss; /* Default/fallback general-purpose FFT */
1615 }
1616 
gmtfft_2d_selection(struct GMT_CTRL * GMT,unsigned int n_columns,unsigned int n_rows)1617 GMT_LOCAL int gmtfft_2d_selection (struct GMT_CTRL *GMT, unsigned int n_columns, unsigned int n_rows) {
1618 	/* Returns the most suitable 2-D FFT for the job - or the one requested via GMT_FFT */
1619 	if (GMT->current.setting.fft != k_fft_auto) {
1620 		/* Specific selection requested */
1621 		if (GMT->session.fft2d[GMT->current.setting.fft])
1622 			return GMT->current.setting.fft; /* User defined FFT */
1623 		GMT_Report (GMT->parent, GMT_MSG_WARNING, "Desired FFT Algorithm (%s) not configured - choosing suitable alternative.\n", GMT_fft_algo[GMT->current.setting.fft]);
1624 	}
1625 	/* Here we want automatic selection from available candidates */
1626 	if (GMT->session.fft2d[k_fft_accelerate] && gmtfft_radix2 (n_columns) && gmtfft_radix2 (n_rows))
1627 		return k_fft_accelerate; /* Use if Radix-2 under OS/X */
1628 	if (GMT->session.fft2d[k_fft_fftw])
1629 		return k_fft_fftw;
1630 	return k_fft_kiss; /* Default/fallback general-purpose FFT */
1631 }
1632 
GMT_FFT_1D(void * V_API,gmt_grdfloat * data,uint64_t n,int direction,unsigned int mode)1633 int GMT_FFT_1D (void *V_API, gmt_grdfloat *data, uint64_t n, int direction, unsigned int mode) {
1634 	/* data is an array of length n (or 2*n for complex) data points
1635 	 * n is the number of data points
1636 	 * direction is either GMT_FFT_FWD (forward) or GMT_FFT_INV (inverse)
1637 	 * mode is either GMT_FFT_REAL or GMT_FFT_COMPLEX
1638 	 */
1639 	int status, use;
1640 	struct GMTAPI_CTRL *API = gmtfft_get_api_ptr (V_API);
1641 	struct GMT_CTRL *GMT = API->GMT;
1642 	assert (mode == GMT_FFT_COMPLEX); /* GMT_FFT_REAL not implemented yet */
1643 	use = gmtfft_1d_selection (GMT, n);
1644 	GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "1-D FFT using %s\n", GMT_fft_algo[use]);
1645 	status = GMT->session.fft1d[use] (GMT, data, (unsigned int)n, direction, mode);
1646 	if (direction == GMT_FFT_INV) {	/* Undo the 2/nm factor */
1647 		uint64_t nm = 2ULL * n;
1648 		gmt_scale_and_offset_f (GMT, data, nm, 2.0 / nm, 0);
1649 	}
1650 	return status;
1651 }
1652 
GMT_FFT_2D(void * V_API,gmt_grdfloat * data,unsigned int n_columns,unsigned int n_rows,int direction,unsigned int mode)1653 int GMT_FFT_2D (void *V_API, gmt_grdfloat *data, unsigned int n_columns, unsigned int n_rows, int direction, unsigned int mode) {
1654 	/* data is an array of length n_columns*n_rows (or 2*n_columns*n_rows for complex) data points
1655 	 * n_columns, n_rows is the number of data nodes
1656 	 * direction is either GMT_FFT_FWD (forward) or GMT_FFT_INV (inverse)
1657 	 * mode is either GMT_FFT_REAL or GMT_FFT_COMPLEX
1658 	 */
1659 	int status, use;
1660 	struct GMTAPI_CTRL *API = gmtfft_get_api_ptr (V_API);
1661 	struct GMT_CTRL *GMT = API->GMT;
1662 	assert (mode == GMT_FFT_COMPLEX); /* GMT_FFT_REAL not implemented yet */
1663 	use = gmtfft_2d_selection (GMT, n_columns, n_rows);
1664 
1665 	GMT_Report (GMT->parent, GMT_MSG_INFORMATION, "2-D FFT using %s\n", GMT_fft_algo[use]);
1666 	status = GMT->session.fft2d[use] (GMT, data, n_columns, n_rows, direction, mode);
1667 	if (direction == GMT_FFT_INV) {	/* Undo the 2/nm factor */
1668 		uint64_t nm = 2ULL * n_columns * n_rows;
1669 		gmt_scale_and_offset_f (GMT, data, nm, 2.0 / nm, 0);
1670 	}
1671 	return status;
1672 }
1673 
gmtlib_fft_initialization(struct GMT_CTRL * GMT)1674 void gmtlib_fft_initialization (struct GMT_CTRL *GMT) {
1675 	/* Called by gmt_begin and sets up pointers to the available FFT calls */
1676 #if defined HAVE_FFTW3F_THREADS
1677 	int n_cpu = gmtlib_get_num_processors();
1678 #endif /* HAVE_FFTW3_THREADS */
1679 
1680 #ifdef HAVE_FFTW3F
1681 	GMT->current.setting.fftw_plan = FFTW_ESTIMATE; /* default planner flag [only accessed if FFTW is compiled in] */
1682 #endif /* HAVE_FFTW3F */
1683 
1684 #if defined HAVE_FFTW3F_THREADS
1685 	if (n_cpu > 1 && !GMT->current.setting.fftwf_threads) {
1686 		/* one-time initialization required to use FFTW3 threads */
1687 		if ( fftwf_init_threads() ) {
1688 			fftwf_plan_with_nthreads(n_cpu);
1689 			GMT_Report (GMT->parent, GMT_MSG_DEBUG, "Initialize FFTW with %d threads.\n", n_cpu);
1690 		}
1691 	}
1692 #endif /* HAVE_FFTW3_THREADS */
1693 
1694 	/* Start with nothing */
1695 	memset (GMT->session.fft1d, k_n_fft_algorithms, sizeof(void*));
1696 	memset (GMT->session.fft2d, k_n_fft_algorithms, sizeof(void*));
1697 
1698 #ifdef __APPLE__
1699 	/* OS X Accelerate Framework */
1700 	GMT->session.fft1d[k_fft_accelerate] = &gmtfft_1d_vDSP;
1701 	GMT->session.fft2d[k_fft_accelerate] = &gmtfft_2d_vDSP;
1702 #endif
1703 #ifdef HAVE_FFTW3F
1704 	/* single precision FFTW3 */
1705 	GMT->session.fft1d[k_fft_fftw] = &gmtfft_1d_fftwf;
1706 	GMT->session.fft2d[k_fft_fftw] = &gmtfft_2d_fftwf;
1707 #endif /* HAVE_FFTW3F */
1708 	/* Kiss FFT is the integrated fallback */
1709 	GMT->session.fft1d[k_fft_kiss] = &gmtfft_1d_kiss;
1710 	GMT->session.fft2d[k_fft_kiss] = &gmtfft_2d_kiss;
1711 	/* Brenner FFT is the legacy fallback */
1712 	GMT->session.fft1d[k_fft_brenner] = &gmtfft_1d_brenner;
1713 	GMT->session.fft2d[k_fft_brenner] = &gmtfft_2d_brenner;
1714 }
1715 
gmtlib_fft_cleanup(struct GMT_CTRL * GMT)1716 void gmtlib_fft_cleanup (struct GMT_CTRL *GMT) {
1717 	/* Called by gmt_end */
1718 #ifndef __APPLE__
1719 	gmt_M_unused(GMT);
1720 #endif
1721 #if defined HAVE_FFTW3F_THREADS
1722 	fftwf_cleanup_threads(); /* clean resources allocated internally by FFTW */
1723 #endif
1724 #ifdef __APPLE__ /* Accelerate framework */
1725 	gmtfft_gmtfft_1d_vDSP_reset (&GMT->current.fft);
1726 	gmtfft_gmtfft_2d_vDSP_reset (&GMT->current.fft);
1727 #endif
1728 }
1729