1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // Intel License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
21 //
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
25 //
26 // * The name of Intel Corporation may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41
42 #include "precomp.hpp"
43 #include "opencv2/core/opencl/runtime/opencl_clfft.hpp"
44 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
45 #include "opencl_kernels_core.hpp"
46 #include <map>
47
48 namespace cv
49 {
50
51 // On Win64 optimized versions of DFT and DCT fail the tests (fixed in VS2010)
52 #if defined _MSC_VER && !defined CV_ICC && defined _M_X64 && _MSC_VER < 1600
53 # pragma optimize("", off)
54 # pragma warning(disable: 4748)
55 #endif
56
57 #if IPP_VERSION_X100 >= 710
58 #define USE_IPP_DFT 1
59 #else
60 #undef USE_IPP_DFT
61 #endif
62
63 /****************************************************************************************\
64 Discrete Fourier Transform
65 \****************************************************************************************/
66
67 #define CV_MAX_LOCAL_DFT_SIZE (1 << 15)
68
69 static unsigned char bitrevTab[] =
70 {
71 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0,0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0,
72 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8,0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8,
73 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4,0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4,
74 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec,0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc,
75 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2,0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2,
76 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea,0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa,
77 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6,0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6,
78 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee,0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe,
79 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1,0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1,
80 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9,0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9,
81 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5,0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5,
82 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed,0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd,
83 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3,0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3,
84 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb,0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb,
85 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7,0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7,
86 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef,0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
87 };
88
89 static const double DFTTab[][2] =
90 {
91 { 1.00000000000000000, 0.00000000000000000 },
92 {-1.00000000000000000, 0.00000000000000000 },
93 { 0.00000000000000000, 1.00000000000000000 },
94 { 0.70710678118654757, 0.70710678118654746 },
95 { 0.92387953251128674, 0.38268343236508978 },
96 { 0.98078528040323043, 0.19509032201612825 },
97 { 0.99518472667219693, 0.09801714032956060 },
98 { 0.99879545620517241, 0.04906767432741802 },
99 { 0.99969881869620425, 0.02454122852291229 },
100 { 0.99992470183914450, 0.01227153828571993 },
101 { 0.99998117528260111, 0.00613588464915448 },
102 { 0.99999529380957619, 0.00306795676296598 },
103 { 0.99999882345170188, 0.00153398018628477 },
104 { 0.99999970586288223, 0.00076699031874270 },
105 { 0.99999992646571789, 0.00038349518757140 },
106 { 0.99999998161642933, 0.00019174759731070 },
107 { 0.99999999540410733, 0.00009587379909598 },
108 { 0.99999999885102686, 0.00004793689960307 },
109 { 0.99999999971275666, 0.00002396844980842 },
110 { 0.99999999992818922, 0.00001198422490507 },
111 { 0.99999999998204725, 0.00000599211245264 },
112 { 0.99999999999551181, 0.00000299605622633 },
113 { 0.99999999999887801, 0.00000149802811317 },
114 { 0.99999999999971945, 0.00000074901405658 },
115 { 0.99999999999992983, 0.00000037450702829 },
116 { 0.99999999999998246, 0.00000018725351415 },
117 { 0.99999999999999567, 0.00000009362675707 },
118 { 0.99999999999999889, 0.00000004681337854 },
119 { 0.99999999999999978, 0.00000002340668927 },
120 { 0.99999999999999989, 0.00000001170334463 },
121 { 1.00000000000000000, 0.00000000585167232 },
122 { 1.00000000000000000, 0.00000000292583616 }
123 };
124
125 namespace {
126 template <typename T>
127 struct Constants {
128 static const T sin_120;
129 static const T fft5_2;
130 static const T fft5_3;
131 static const T fft5_4;
132 static const T fft5_5;
133 };
134
135 template <typename T>
136 const T Constants<T>::sin_120 = (T)0.86602540378443864676372317075294;
137
138 template <typename T>
139 const T Constants<T>::fft5_2 = (T)0.559016994374947424102293417182819;
140
141 template <typename T>
142 const T Constants<T>::fft5_3 = (T)-0.951056516295153572116439333379382;
143
144 template <typename T>
145 const T Constants<T>::fft5_4 = (T)-1.538841768587626701285145288018455;
146
147 template <typename T>
148 const T Constants<T>::fft5_5 = (T)0.363271264002680442947733378740309;
149
150 } //namespace
151
152 #define BitRev(i,shift) \
153 ((int)((((unsigned)bitrevTab[(i)&255] << 24)+ \
154 ((unsigned)bitrevTab[((i)>> 8)&255] << 16)+ \
155 ((unsigned)bitrevTab[((i)>>16)&255] << 8)+ \
156 ((unsigned)bitrevTab[((i)>>24)])) >> (shift)))
157
158 static int
DFTFactorize(int n,int * factors)159 DFTFactorize( int n, int* factors )
160 {
161 int nf = 0, f, i, j;
162
163 if( n <= 5 )
164 {
165 factors[0] = n;
166 return 1;
167 }
168
169 f = (((n - 1)^n)+1) >> 1;
170 if( f > 1 )
171 {
172 factors[nf++] = f;
173 n = f == n ? 1 : n/f;
174 }
175
176 for( f = 3; n > 1; )
177 {
178 int d = n/f;
179 if( d*f == n )
180 {
181 factors[nf++] = f;
182 n = d;
183 }
184 else
185 {
186 f += 2;
187 if( f*f > n )
188 break;
189 }
190 }
191
192 if( n > 1 )
193 factors[nf++] = n;
194
195 f = (factors[0] & 1) == 0;
196 for( i = f; i < (nf+f)/2; i++ )
197 CV_SWAP( factors[i], factors[nf-i-1+f], j );
198
199 return nf;
200 }
201
202 static void
DFTInit(int n0,int nf,const int * factors,int * itab,int elem_size,void * _wave,int inv_itab)203 DFTInit( int n0, int nf, const int* factors, int* itab, int elem_size, void* _wave, int inv_itab )
204 {
205 int digits[34], radix[34];
206 int n = factors[0], m = 0;
207 int* itab0 = itab;
208 int i, j, k;
209 Complex<double> w, w1;
210 double t;
211
212 if( n0 <= 5 )
213 {
214 itab[0] = 0;
215 itab[n0-1] = n0-1;
216
217 if( n0 != 4 )
218 {
219 for( i = 1; i < n0-1; i++ )
220 itab[i] = i;
221 }
222 else
223 {
224 itab[1] = 2;
225 itab[2] = 1;
226 }
227 if( n0 == 5 )
228 {
229 if( elem_size == sizeof(Complex<double>) )
230 ((Complex<double>*)_wave)[0] = Complex<double>(1.,0.);
231 else
232 ((Complex<float>*)_wave)[0] = Complex<float>(1.f,0.f);
233 }
234 if( n0 != 4 )
235 return;
236 m = 2;
237 }
238 else
239 {
240 // radix[] is initialized from index 'nf' down to zero
241 assert (nf < 34);
242 radix[nf] = 1;
243 digits[nf] = 0;
244 for( i = 0; i < nf; i++ )
245 {
246 digits[i] = 0;
247 radix[nf-i-1] = radix[nf-i]*factors[nf-i-1];
248 }
249
250 if( inv_itab && factors[0] != factors[nf-1] )
251 itab = (int*)_wave;
252
253 if( (n & 1) == 0 )
254 {
255 int a = radix[1], na2 = n*a>>1, na4 = na2 >> 1;
256 for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
257 ;
258 if( n <= 2 )
259 {
260 itab[0] = 0;
261 itab[1] = na2;
262 }
263 else if( n <= 256 )
264 {
265 int shift = 10 - m;
266 for( i = 0; i <= n - 4; i += 4 )
267 {
268 j = (bitrevTab[i>>2]>>shift)*a;
269 itab[i] = j;
270 itab[i+1] = j + na2;
271 itab[i+2] = j + na4;
272 itab[i+3] = j + na2 + na4;
273 }
274 }
275 else
276 {
277 int shift = 34 - m;
278 for( i = 0; i < n; i += 4 )
279 {
280 int i4 = i >> 2;
281 j = BitRev(i4,shift)*a;
282 itab[i] = j;
283 itab[i+1] = j + na2;
284 itab[i+2] = j + na4;
285 itab[i+3] = j + na2 + na4;
286 }
287 }
288
289 digits[1]++;
290
291 if( nf >= 2 )
292 {
293 for( i = n, j = radix[2]; i < n0; )
294 {
295 for( k = 0; k < n; k++ )
296 itab[i+k] = itab[k] + j;
297 if( (i += n) >= n0 )
298 break;
299 j += radix[2];
300 for( k = 1; ++digits[k] >= factors[k]; k++ )
301 {
302 digits[k] = 0;
303 j += radix[k+2] - radix[k];
304 }
305 }
306 }
307 }
308 else
309 {
310 for( i = 0, j = 0;; )
311 {
312 itab[i] = j;
313 if( ++i >= n0 )
314 break;
315 j += radix[1];
316 for( k = 0; ++digits[k] >= factors[k]; k++ )
317 {
318 digits[k] = 0;
319 j += radix[k+2] - radix[k];
320 }
321 }
322 }
323
324 if( itab != itab0 )
325 {
326 itab0[0] = 0;
327 for( i = n0 & 1; i < n0; i += 2 )
328 {
329 int k0 = itab[i];
330 int k1 = itab[i+1];
331 itab0[k0] = i;
332 itab0[k1] = i+1;
333 }
334 }
335 }
336
337 if( (n0 & (n0-1)) == 0 )
338 {
339 w.re = w1.re = DFTTab[m][0];
340 w.im = w1.im = -DFTTab[m][1];
341 }
342 else
343 {
344 t = -CV_PI*2/n0;
345 w.im = w1.im = sin(t);
346 w.re = w1.re = std::sqrt(1. - w1.im*w1.im);
347 }
348 n = (n0+1)/2;
349
350 if( elem_size == sizeof(Complex<double>) )
351 {
352 Complex<double>* wave = (Complex<double>*)_wave;
353
354 wave[0].re = 1.;
355 wave[0].im = 0.;
356
357 if( (n0 & 1) == 0 )
358 {
359 wave[n].re = -1.;
360 wave[n].im = 0;
361 }
362
363 for( i = 1; i < n; i++ )
364 {
365 wave[i] = w;
366 wave[n0-i].re = w.re;
367 wave[n0-i].im = -w.im;
368
369 t = w.re*w1.re - w.im*w1.im;
370 w.im = w.re*w1.im + w.im*w1.re;
371 w.re = t;
372 }
373 }
374 else
375 {
376 Complex<float>* wave = (Complex<float>*)_wave;
377 assert( elem_size == sizeof(Complex<float>) );
378
379 wave[0].re = 1.f;
380 wave[0].im = 0.f;
381
382 if( (n0 & 1) == 0 )
383 {
384 wave[n].re = -1.f;
385 wave[n].im = 0.f;
386 }
387
388 for( i = 1; i < n; i++ )
389 {
390 wave[i].re = (float)w.re;
391 wave[i].im = (float)w.im;
392 wave[n0-i].re = (float)w.re;
393 wave[n0-i].im = (float)-w.im;
394
395 t = w.re*w1.re - w.im*w1.im;
396 w.im = w.re*w1.im + w.im*w1.re;
397 w.re = t;
398 }
399 }
400 }
401
402 // Reference radix-2 implementation.
403 template<typename T> struct DFT_R2
404 {
operator ()cv::DFT_R2405 void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
406 const int nx = n/2;
407 for(int i = 0 ; i < c_n; i += n)
408 {
409 Complex<T>* v = dst + i;
410 T r0 = v[0].re + v[nx].re;
411 T i0 = v[0].im + v[nx].im;
412 T r1 = v[0].re - v[nx].re;
413 T i1 = v[0].im - v[nx].im;
414 v[0].re = r0; v[0].im = i0;
415 v[nx].re = r1; v[nx].im = i1;
416
417 for( int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
418 {
419 v = dst + i + j;
420 r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
421 i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im;
422 r0 = v[0].re; i0 = v[0].im;
423
424 v[0].re = r0 + r1; v[0].im = i0 + i1;
425 v[nx].re = r0 - r1; v[nx].im = i0 - i1;
426 }
427 }
428 }
429 };
430
431 // Reference radix-3 implementation.
432 template<typename T> struct DFT_R3
433 {
operator ()cv::DFT_R3434 void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
435 const int nx = n / 3;
436 for(int i = 0; i < c_n; i += n )
437 {
438 {
439 Complex<T>* v = dst + i;
440 T r1 = v[nx].re + v[nx*2].re;
441 T i1 = v[nx].im + v[nx*2].im;
442 T r0 = v[0].re;
443 T i0 = v[0].im;
444 T r2 = Constants<T>::sin_120*(v[nx].im - v[nx*2].im);
445 T i2 = Constants<T>::sin_120*(v[nx*2].re - v[nx].re);
446 v[0].re = r0 + r1; v[0].im = i0 + i1;
447 r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
448 v[nx].re = r0 + r2; v[nx].im = i0 + i2;
449 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
450 }
451
452 for(int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
453 {
454 Complex<T>* v = dst + i + j;
455 T r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
456 T i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re;
457 T i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im;
458 T r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re;
459 T r1 = r0 + i2; T i1 = i0 + r2;
460
461 r2 = Constants<T>::sin_120*(i0 - r2); i2 = Constants<T>::sin_120*(i2 - r0);
462 r0 = v[0].re; i0 = v[0].im;
463 v[0].re = r0 + r1; v[0].im = i0 + i1;
464 r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
465 v[nx].re = r0 + r2; v[nx].im = i0 + i2;
466 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
467 }
468 }
469 }
470 };
471
472 // Reference radix-5 implementation.
473 template<typename T> struct DFT_R5
474 {
operator ()cv::DFT_R5475 void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
476 const int nx = n / 5;
477 for(int i = 0; i < c_n; i += n )
478 {
479 for(int j = 0, dw = 0; j < nx; j++, dw += dw0 )
480 {
481 Complex<T>* v0 = dst + i + j;
482 Complex<T>* v1 = v0 + nx*2;
483 Complex<T>* v2 = v1 + nx*2;
484
485 T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5;
486
487 r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im;
488 i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re;
489 r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im;
490 i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re;
491
492 r1 = r3 + r2; i1 = i3 + i2;
493 r3 -= r2; i3 -= i2;
494
495 r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
496 i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
497 r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im;
498 i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re;
499
500 r2 = r4 + r0; i2 = i4 + i0;
501 r4 -= r0; i4 -= i0;
502
503 r0 = v0[0].re; i0 = v0[0].im;
504 r5 = r1 + r2; i5 = i1 + i2;
505
506 v0[0].re = r0 + r5; v0[0].im = i0 + i5;
507
508 r0 -= (T)0.25*r5; i0 -= (T)0.25*i5;
509 r1 = Constants<T>::fft5_2*(r1 - r2); i1 = Constants<T>::fft5_2*(i1 - i2);
510 r2 = -Constants<T>::fft5_3*(i3 + i4); i2 = Constants<T>::fft5_3*(r3 + r4);
511
512 i3 *= -Constants<T>::fft5_5; r3 *= Constants<T>::fft5_5;
513 i4 *= -Constants<T>::fft5_4; r4 *= Constants<T>::fft5_4;
514
515 r5 = r2 + i3; i5 = i2 + r3;
516 r2 -= i4; i2 -= r4;
517
518 r3 = r0 + r1; i3 = i0 + i1;
519 r0 -= r1; i0 -= i1;
520
521 v0[nx].re = r3 + r2; v0[nx].im = i3 + i2;
522 v2[0].re = r3 - r2; v2[0].im = i3 - i2;
523
524 v1[0].re = r0 + r5; v1[0].im = i0 + i5;
525 v1[nx].re = r0 - r5; v1[nx].im = i0 - i5;
526 }
527 }
528 }
529 };
530
531 template<typename T> struct DFT_VecR2
532 {
operator ()cv::DFT_VecR2533 void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
534 DFT_R2<T>()(dst, c_n, n, dw0, wave);
535 }
536 };
537
538 template<typename T> struct DFT_VecR3
539 {
operator ()cv::DFT_VecR3540 void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
541 DFT_R3<T>()(dst, c_n, n, dw0, wave);
542 }
543 };
544
545 template<typename T> struct DFT_VecR4
546 {
operator ()cv::DFT_VecR4547 int operator()(Complex<T>*, int, int, int&, const Complex<T>*) const { return 1; }
548 };
549
550 #if CV_SSE3
551
552 // multiplies *a and *b:
553 // r_re + i*r_im = (a_re + i*a_im)*(b_re + i*b_im)
554 // r_re and r_im are placed respectively in bits 31:0 and 63:32 of the resulting
555 // vector register.
complexMul(const Complex<float> * const a,const Complex<float> * const b)556 inline __m128 complexMul(const Complex<float>* const a, const Complex<float>* const b) {
557 const __m128 z = _mm_setzero_ps();
558 const __m128 neg_elem0 = _mm_set_ps(0.0f,0.0f,0.0f,-0.0f);
559 // v_a[31:0] is a->re and v_a[63:32] is a->im.
560 const __m128 v_a = _mm_loadl_pi(z, (const __m64*)a);
561 const __m128 v_b = _mm_loadl_pi(z, (const __m64*)b);
562 // x_1 = v[nx] * wave[dw].
563 const __m128 v_a_riri = _mm_shuffle_ps(v_a, v_a, _MM_SHUFFLE(0, 1, 0, 1));
564 const __m128 v_b_irri = _mm_shuffle_ps(v_b, v_b, _MM_SHUFFLE(1, 0, 0, 1));
565 const __m128 mul = _mm_mul_ps(v_a_riri, v_b_irri);
566 const __m128 xored = _mm_xor_ps(mul, neg_elem0);
567 return _mm_hadd_ps(xored, z);
568 }
569
570 // optimized radix-2 transform
571 template<> struct DFT_VecR2<float> {
operator ()cv::DFT_VecR2572 void operator()(Complex<float>* dst, const int c_n, const int n, const int dw0, const Complex<float>* wave) const {
573 const __m128 z = _mm_setzero_ps();
574 const int nx = n/2;
575 for(int i = 0 ; i < c_n; i += n)
576 {
577 {
578 Complex<float>* v = dst + i;
579 float r0 = v[0].re + v[nx].re;
580 float i0 = v[0].im + v[nx].im;
581 float r1 = v[0].re - v[nx].re;
582 float i1 = v[0].im - v[nx].im;
583 v[0].re = r0; v[0].im = i0;
584 v[nx].re = r1; v[nx].im = i1;
585 }
586
587 for( int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
588 {
589 Complex<float>* v = dst + i + j;
590 const __m128 x_1 = complexMul(&v[nx], &wave[dw]);
591 const __m128 v_0 = _mm_loadl_pi(z, (const __m64*)&v[0]);
592 _mm_storel_pi((__m64*)&v[0], _mm_add_ps(v_0, x_1));
593 _mm_storel_pi((__m64*)&v[nx], _mm_sub_ps(v_0, x_1));
594 }
595 }
596 }
597 };
598
599 // Optimized radix-3 implementation.
600 template<> struct DFT_VecR3<float> {
operator ()cv::DFT_VecR3601 void operator()(Complex<float>* dst, const int c_n, const int n, const int dw0, const Complex<float>* wave) const {
602 const int nx = n / 3;
603 const __m128 z = _mm_setzero_ps();
604 const __m128 neg_elem1 = _mm_set_ps(0.0f,0.0f,-0.0f,0.0f);
605 const __m128 sin_120 = _mm_set1_ps(Constants<float>::sin_120);
606 const __m128 one_half = _mm_set1_ps(0.5f);
607 for(int i = 0; i < c_n; i += n )
608 {
609 {
610 Complex<float>* v = dst + i;
611
612 float r1 = v[nx].re + v[nx*2].re;
613 float i1 = v[nx].im + v[nx*2].im;
614 float r0 = v[0].re;
615 float i0 = v[0].im;
616 float r2 = Constants<float>::sin_120*(v[nx].im - v[nx*2].im);
617 float i2 = Constants<float>::sin_120*(v[nx*2].re - v[nx].re);
618 v[0].re = r0 + r1; v[0].im = i0 + i1;
619 r0 -= (float)0.5*r1; i0 -= (float)0.5*i1;
620 v[nx].re = r0 + r2; v[nx].im = i0 + i2;
621 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
622 }
623
624 for(int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
625 {
626 Complex<float>* v = dst + i + j;
627 const __m128 x_0 = complexMul(&v[nx], &wave[dw]);
628 const __m128 x_2 = complexMul(&v[nx*2], &wave[dw*2]);
629 const __m128 x_1 = _mm_add_ps(x_0, x_2);
630
631 const __m128 v_0 = _mm_loadl_pi(z, (const __m64*)&v[0]);
632 _mm_storel_pi((__m64*)&v[0], _mm_add_ps(v_0, x_1));
633
634 const __m128 x_3 = _mm_mul_ps(sin_120, _mm_xor_ps(_mm_sub_ps(x_2, x_0), neg_elem1));
635 const __m128 x_3s = _mm_shuffle_ps(x_3, x_3, _MM_SHUFFLE(0, 1, 0, 1));
636 const __m128 x_4 = _mm_sub_ps(v_0, _mm_mul_ps(one_half, x_1));
637 _mm_storel_pi((__m64*)&v[nx], _mm_add_ps(x_4, x_3s));
638 _mm_storel_pi((__m64*)&v[nx*2], _mm_sub_ps(x_4, x_3s));
639 }
640 }
641 }
642 };
643
644 // optimized radix-4 transform
645 template<> struct DFT_VecR4<float>
646 {
operator ()cv::DFT_VecR4647 int operator()(Complex<float>* dst, int N, int n0, int& _dw0, const Complex<float>* wave) const
648 {
649 int n = 1, i, j, nx, dw, dw0 = _dw0;
650 __m128 z = _mm_setzero_ps(), x02=z, x13=z, w01=z, w23=z, y01, y23, t0, t1;
651 Cv32suf t; t.i = 0x80000000;
652 __m128 neg0_mask = _mm_load_ss(&t.f);
653 __m128 neg3_mask = _mm_shuffle_ps(neg0_mask, neg0_mask, _MM_SHUFFLE(0,1,2,3));
654
655 for( ; n*4 <= N; )
656 {
657 nx = n;
658 n *= 4;
659 dw0 /= 4;
660
661 for( i = 0; i < n0; i += n )
662 {
663 Complexf *v0, *v1;
664
665 v0 = dst + i;
666 v1 = v0 + nx*2;
667
668 x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]);
669 x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]);
670 x02 = _mm_loadh_pi(x02, (const __m64*)&v1[0]);
671 x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]);
672
673 y01 = _mm_add_ps(x02, x13);
674 y23 = _mm_sub_ps(x02, x13);
675 t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask);
676 t0 = _mm_movelh_ps(y01, y23);
677 y01 = _mm_add_ps(t0, t1);
678 y23 = _mm_sub_ps(t0, t1);
679
680 _mm_storel_pi((__m64*)&v0[0], y01);
681 _mm_storeh_pi((__m64*)&v0[nx], y01);
682 _mm_storel_pi((__m64*)&v1[0], y23);
683 _mm_storeh_pi((__m64*)&v1[nx], y23);
684
685 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
686 {
687 v0 = dst + i + j;
688 v1 = v0 + nx*2;
689
690 x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]);
691 w23 = _mm_loadl_pi(w23, (const __m64*)&wave[dw*2]);
692 x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]); // x1, x3 = r1 i1 r3 i3
693 w23 = _mm_loadh_pi(w23, (const __m64*)&wave[dw*3]); // w2, w3 = wr2 wi2 wr3 wi3
694
695 t0 = _mm_mul_ps(_mm_moveldup_ps(x13), w23);
696 t1 = _mm_mul_ps(_mm_movehdup_ps(x13), _mm_shuffle_ps(w23, w23, _MM_SHUFFLE(2,3,0,1)));
697 x13 = _mm_addsub_ps(t0, t1);
698 // re(x1*w2), im(x1*w2), re(x3*w3), im(x3*w3)
699 x02 = _mm_loadl_pi(x02, (const __m64*)&v1[0]); // x2 = r2 i2
700 w01 = _mm_loadl_pi(w01, (const __m64*)&wave[dw]); // w1 = wr1 wi1
701 x02 = _mm_shuffle_ps(x02, x02, _MM_SHUFFLE(0,0,1,1));
702 w01 = _mm_shuffle_ps(w01, w01, _MM_SHUFFLE(1,0,0,1));
703 x02 = _mm_mul_ps(x02, w01);
704 x02 = _mm_addsub_ps(x02, _mm_movelh_ps(x02, x02));
705 // re(x0) im(x0) re(x2*w1), im(x2*w1)
706 x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]);
707
708 y01 = _mm_add_ps(x02, x13);
709 y23 = _mm_sub_ps(x02, x13);
710 t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask);
711 t0 = _mm_movelh_ps(y01, y23);
712 y01 = _mm_add_ps(t0, t1);
713 y23 = _mm_sub_ps(t0, t1);
714
715 _mm_storel_pi((__m64*)&v0[0], y01);
716 _mm_storeh_pi((__m64*)&v0[nx], y01);
717 _mm_storel_pi((__m64*)&v1[0], y23);
718 _mm_storeh_pi((__m64*)&v1[nx], y23);
719 }
720 }
721 }
722
723 _dw0 = dw0;
724 return n;
725 }
726 };
727
728 #endif
729
730 #ifdef USE_IPP_DFT
ippsDFTFwd_CToC(const Complex<float> * src,Complex<float> * dst,const void * spec,uchar * buf)731 static IppStatus ippsDFTFwd_CToC( const Complex<float>* src, Complex<float>* dst,
732 const void* spec, uchar* buf)
733 {
734 return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_CToC_32fc, (const Ipp32fc*)src, (Ipp32fc*)dst,
735 (const IppsDFTSpec_C_32fc*)spec, buf);
736 }
737
ippsDFTFwd_CToC(const Complex<double> * src,Complex<double> * dst,const void * spec,uchar * buf)738 static IppStatus ippsDFTFwd_CToC( const Complex<double>* src, Complex<double>* dst,
739 const void* spec, uchar* buf)
740 {
741 return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_CToC_64fc, (const Ipp64fc*)src, (Ipp64fc*)dst,
742 (const IppsDFTSpec_C_64fc*)spec, buf);
743 }
744
ippsDFTInv_CToC(const Complex<float> * src,Complex<float> * dst,const void * spec,uchar * buf)745 static IppStatus ippsDFTInv_CToC( const Complex<float>* src, Complex<float>* dst,
746 const void* spec, uchar* buf)
747 {
748 return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_CToC_32fc, (const Ipp32fc*)src, (Ipp32fc*)dst,
749 (const IppsDFTSpec_C_32fc*)spec, buf);
750 }
751
ippsDFTInv_CToC(const Complex<double> * src,Complex<double> * dst,const void * spec,uchar * buf)752 static IppStatus ippsDFTInv_CToC( const Complex<double>* src, Complex<double>* dst,
753 const void* spec, uchar* buf)
754 {
755 return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_CToC_64fc, (const Ipp64fc*)src, (Ipp64fc*)dst,
756 (const IppsDFTSpec_C_64fc*)spec, buf);
757 }
758
ippsDFTFwd_RToPack(const float * src,float * dst,const void * spec,uchar * buf)759 static IppStatus ippsDFTFwd_RToPack( const float* src, float* dst,
760 const void* spec, uchar* buf)
761 {
762 return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_RToPack_32f, src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
763 }
764
ippsDFTFwd_RToPack(const double * src,double * dst,const void * spec,uchar * buf)765 static IppStatus ippsDFTFwd_RToPack( const double* src, double* dst,
766 const void* spec, uchar* buf)
767 {
768 return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_RToPack_64f, src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
769 }
770
ippsDFTInv_PackToR(const float * src,float * dst,const void * spec,uchar * buf)771 static IppStatus ippsDFTInv_PackToR( const float* src, float* dst,
772 const void* spec, uchar* buf)
773 {
774 return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_PackToR_32f, src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
775 }
776
ippsDFTInv_PackToR(const double * src,double * dst,const void * spec,uchar * buf)777 static IppStatus ippsDFTInv_PackToR( const double* src, double* dst,
778 const void* spec, uchar* buf)
779 {
780 return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_PackToR_64f, src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
781 }
782 #endif
783
784 struct OcvDftOptions;
785
786 typedef void (*DFTFunc)(const OcvDftOptions & c, const void* src, void* dst);
787
788 struct OcvDftOptions {
789 int nf;
790 int *factors;
791 double scale;
792
793 int* itab;
794 void* wave;
795 int tab_size;
796 int n;
797
798 bool isInverse;
799 bool noPermute;
800 bool isComplex;
801
802 bool haveSSE3;
803
804 DFTFunc dft_func;
805 bool useIpp;
806
807 #ifdef USE_IPP_DFT
808 uchar* ipp_spec;
809 uchar* ipp_work;
810 #endif
811
OcvDftOptionscv::OcvDftOptions812 OcvDftOptions()
813 {
814 nf = 0;
815 factors = 0;
816 scale = 0;
817 itab = 0;
818 wave = 0;
819 tab_size = 0;
820 n = 0;
821 isInverse = false;
822 noPermute = false;
823 isComplex = false;
824 useIpp = false;
825 #ifdef USE_IPP_DFT
826 ipp_spec = 0;
827 ipp_work = 0;
828 #endif
829 dft_func = 0;
830 haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
831 }
832 };
833
834 // mixed-radix complex discrete Fourier transform: double-precision version
835 template<typename T> static void
DFT(const OcvDftOptions & c,const Complex<T> * src,Complex<T> * dst)836 DFT(const OcvDftOptions & c, const Complex<T>* src, Complex<T>* dst)
837 {
838 const Complex<T>* wave = (Complex<T>*)c.wave;
839 const int * itab = c.itab;
840
841 int n = c.n;
842 int f_idx, nx;
843 int inv = c.isInverse;
844 int dw0 = c.tab_size, dw;
845 int i, j, k;
846 Complex<T> t;
847 T scale = (T)c.scale;
848
849 if( c.useIpp )
850 {
851 #ifdef USE_IPP_DFT
852 if( !inv )
853 {
854 if (ippsDFTFwd_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0)
855 {
856 CV_IMPL_ADD(CV_IMPL_IPP);
857 return;
858 }
859 }
860 else
861 {
862 if (ippsDFTInv_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0)
863 {
864 CV_IMPL_ADD(CV_IMPL_IPP);
865 return;
866 }
867 }
868 setIppErrorStatus();
869 #endif
870 }
871
872 int tab_step = c.tab_size == n ? 1 : c.tab_size == n*2 ? 2 : c.tab_size/n;
873
874 // 0. shuffle data
875 if( dst != src )
876 {
877 assert( !c.noPermute );
878 if( !inv )
879 {
880 for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
881 {
882 int k0 = itab[0], k1 = itab[tab_step];
883 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
884 dst[i] = src[k0]; dst[i+1] = src[k1];
885 }
886
887 if( i < n )
888 dst[n-1] = src[n-1];
889 }
890 else
891 {
892 for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
893 {
894 int k0 = itab[0], k1 = itab[tab_step];
895 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
896 t.re = src[k0].re; t.im = -src[k0].im;
897 dst[i] = t;
898 t.re = src[k1].re; t.im = -src[k1].im;
899 dst[i+1] = t;
900 }
901
902 if( i < n )
903 {
904 t.re = src[n-1].re; t.im = -src[n-1].im;
905 dst[i] = t;
906 }
907 }
908 }
909 else
910 {
911 if( !c.noPermute )
912 {
913 CV_Assert( c.factors[0] == c.factors[c.nf-1] );
914 if( c.nf == 1 )
915 {
916 if( (n & 3) == 0 )
917 {
918 int n2 = n/2;
919 Complex<T>* dsth = dst + n2;
920
921 for( i = 0; i < n2; i += 2, itab += tab_step*2 )
922 {
923 j = itab[0];
924 assert( (unsigned)j < (unsigned)n2 );
925
926 CV_SWAP(dst[i+1], dsth[j], t);
927 if( j > i )
928 {
929 CV_SWAP(dst[i], dst[j], t);
930 CV_SWAP(dsth[i+1], dsth[j+1], t);
931 }
932 }
933 }
934 // else do nothing
935 }
936 else
937 {
938 for( i = 0; i < n; i++, itab += tab_step )
939 {
940 j = itab[0];
941 assert( (unsigned)j < (unsigned)n );
942 if( j > i )
943 CV_SWAP(dst[i], dst[j], t);
944 }
945 }
946 }
947
948 if( inv )
949 {
950 for( i = 0; i <= n - 2; i += 2 )
951 {
952 T t0 = -dst[i].im;
953 T t1 = -dst[i+1].im;
954 dst[i].im = t0; dst[i+1].im = t1;
955 }
956
957 if( i < n )
958 dst[n-1].im = -dst[n-1].im;
959 }
960 }
961
962 n = 1;
963 // 1. power-2 transforms
964 if( (c.factors[0] & 1) == 0 )
965 {
966 if( c.factors[0] >= 4 && c.haveSSE3)
967 {
968 DFT_VecR4<T> vr4;
969 n = vr4(dst, c.factors[0], c.n, dw0, wave);
970 }
971
972 // radix-4 transform
973 for( ; n*4 <= c.factors[0]; )
974 {
975 nx = n;
976 n *= 4;
977 dw0 /= 4;
978
979 for( i = 0; i < c.n; i += n )
980 {
981 Complex<T> *v0, *v1;
982 T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4;
983
984 v0 = dst + i;
985 v1 = v0 + nx*2;
986
987 r0 = v1[0].re; i0 = v1[0].im;
988 r4 = v1[nx].re; i4 = v1[nx].im;
989
990 r1 = r0 + r4; i1 = i0 + i4;
991 r3 = i0 - i4; i3 = r4 - r0;
992
993 r2 = v0[0].re; i2 = v0[0].im;
994 r4 = v0[nx].re; i4 = v0[nx].im;
995
996 r0 = r2 + r4; i0 = i2 + i4;
997 r2 -= r4; i2 -= i4;
998
999 v0[0].re = r0 + r1; v0[0].im = i0 + i1;
1000 v1[0].re = r0 - r1; v1[0].im = i0 - i1;
1001 v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
1002 v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
1003
1004 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
1005 {
1006 v0 = dst + i + j;
1007 v1 = v0 + nx*2;
1008
1009 r2 = v0[nx].re*wave[dw*2].re - v0[nx].im*wave[dw*2].im;
1010 i2 = v0[nx].re*wave[dw*2].im + v0[nx].im*wave[dw*2].re;
1011 r0 = v1[0].re*wave[dw].im + v1[0].im*wave[dw].re;
1012 i0 = v1[0].re*wave[dw].re - v1[0].im*wave[dw].im;
1013 r3 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
1014 i3 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
1015
1016 r1 = i0 + i3; i1 = r0 + r3;
1017 r3 = r0 - r3; i3 = i3 - i0;
1018 r4 = v0[0].re; i4 = v0[0].im;
1019
1020 r0 = r4 + r2; i0 = i4 + i2;
1021 r2 = r4 - r2; i2 = i4 - i2;
1022
1023 v0[0].re = r0 + r1; v0[0].im = i0 + i1;
1024 v1[0].re = r0 - r1; v1[0].im = i0 - i1;
1025 v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
1026 v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
1027 }
1028 }
1029 }
1030
1031 for( ; n < c.factors[0]; )
1032 {
1033 // do the remaining radix-2 transform
1034 n *= 2;
1035 dw0 /= 2;
1036
1037 if(c.haveSSE3)
1038 {
1039 DFT_VecR2<T> vr2;
1040 vr2(dst, c.n, n, dw0, wave);
1041 }
1042 else
1043 {
1044 DFT_R2<T> vr2;
1045 vr2(dst, c.n, n, dw0, wave);
1046 }
1047 }
1048 }
1049
1050 // 2. all the other transforms
1051 for( f_idx = (c.factors[0]&1) ? 0 : 1; f_idx < c.nf; f_idx++ )
1052 {
1053 int factor = c.factors[f_idx];
1054 nx = n;
1055 n *= factor;
1056 dw0 /= factor;
1057
1058 if( factor == 3 )
1059 {
1060 if(c.haveSSE3)
1061 {
1062 DFT_VecR3<T> vr3;
1063 vr3(dst, c.n, n, dw0, wave);
1064 }
1065 else
1066 {
1067 DFT_R3<T> vr3;
1068 vr3(dst, c.n, n, dw0, wave);
1069 }
1070 }
1071 else if( factor == 5 )
1072 {
1073 DFT_R5<T> vr5;
1074 vr5(dst, c.n, n, dw0, wave);
1075 }
1076 else
1077 {
1078 // radix-"factor" - an odd number
1079 int p, q, factor2 = (factor - 1)/2;
1080 int d, dd, dw_f = c.tab_size/factor;
1081 AutoBuffer<Complex<T> > buf(factor2 * 2);
1082 Complex<T>* a = buf.data();
1083 Complex<T>* b = a + factor2;
1084
1085 for( i = 0; i < c.n; i += n )
1086 {
1087 for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
1088 {
1089 Complex<T>* v = dst + i + j;
1090 Complex<T> v_0 = v[0];
1091 Complex<T> vn_0 = v_0;
1092
1093 if( j == 0 )
1094 {
1095 for( p = 1, k = nx; p <= factor2; p++, k += nx )
1096 {
1097 T r0 = v[k].re + v[n-k].re;
1098 T i0 = v[k].im - v[n-k].im;
1099 T r1 = v[k].re - v[n-k].re;
1100 T i1 = v[k].im + v[n-k].im;
1101
1102 vn_0.re += r0; vn_0.im += i1;
1103 a[p-1].re = r0; a[p-1].im = i0;
1104 b[p-1].re = r1; b[p-1].im = i1;
1105 }
1106 }
1107 else
1108 {
1109 const Complex<T>* wave_ = wave + dw*factor;
1110 d = dw;
1111
1112 for( p = 1, k = nx; p <= factor2; p++, k += nx, d += dw )
1113 {
1114 T r2 = v[k].re*wave[d].re - v[k].im*wave[d].im;
1115 T i2 = v[k].re*wave[d].im + v[k].im*wave[d].re;
1116
1117 T r1 = v[n-k].re*wave_[-d].re - v[n-k].im*wave_[-d].im;
1118 T i1 = v[n-k].re*wave_[-d].im + v[n-k].im*wave_[-d].re;
1119
1120 T r0 = r2 + r1;
1121 T i0 = i2 - i1;
1122 r1 = r2 - r1;
1123 i1 = i2 + i1;
1124
1125 vn_0.re += r0; vn_0.im += i1;
1126 a[p-1].re = r0; a[p-1].im = i0;
1127 b[p-1].re = r1; b[p-1].im = i1;
1128 }
1129 }
1130
1131 v[0] = vn_0;
1132
1133 for( p = 1, k = nx; p <= factor2; p++, k += nx )
1134 {
1135 Complex<T> s0 = v_0, s1 = v_0;
1136 d = dd = dw_f*p;
1137
1138 for( q = 0; q < factor2; q++ )
1139 {
1140 T r0 = wave[d].re * a[q].re;
1141 T i0 = wave[d].im * a[q].im;
1142 T r1 = wave[d].re * b[q].im;
1143 T i1 = wave[d].im * b[q].re;
1144
1145 s1.re += r0 + i0; s0.re += r0 - i0;
1146 s1.im += r1 - i1; s0.im += r1 + i1;
1147
1148 d += dd;
1149 d -= -(d >= c.tab_size) & c.tab_size;
1150 }
1151
1152 v[k] = s0;
1153 v[n-k] = s1;
1154 }
1155 }
1156 }
1157 }
1158 }
1159
1160 if( scale != 1 )
1161 {
1162 T re_scale = scale, im_scale = scale;
1163 if( inv )
1164 im_scale = -im_scale;
1165
1166 for( i = 0; i < c.n; i++ )
1167 {
1168 T t0 = dst[i].re*re_scale;
1169 T t1 = dst[i].im*im_scale;
1170 dst[i].re = t0;
1171 dst[i].im = t1;
1172 }
1173 }
1174 else if( inv )
1175 {
1176 for( i = 0; i <= c.n - 2; i += 2 )
1177 {
1178 T t0 = -dst[i].im;
1179 T t1 = -dst[i+1].im;
1180 dst[i].im = t0;
1181 dst[i+1].im = t1;
1182 }
1183
1184 if( i < c.n )
1185 dst[c.n-1].im = -dst[c.n-1].im;
1186 }
1187 }
1188
1189
1190 /* FFT of real vector
1191 output vector format:
1192 re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ...
1193 re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
1194 template<typename T> static void
RealDFT(const OcvDftOptions & c,const T * src,T * dst)1195 RealDFT(const OcvDftOptions & c, const T* src, T* dst)
1196 {
1197 int n = c.n;
1198 int complex_output = c.isComplex;
1199 T scale = (T)c.scale;
1200 int j;
1201 dst += complex_output;
1202
1203 if( c.useIpp )
1204 {
1205 #ifdef USE_IPP_DFT
1206 if (ippsDFTFwd_RToPack( src, dst, c.ipp_spec, c.ipp_work ) >=0)
1207 {
1208 if( complex_output )
1209 {
1210 dst[-1] = dst[0];
1211 dst[0] = 0;
1212 if( (n & 1) == 0 )
1213 dst[n] = 0;
1214 }
1215 CV_IMPL_ADD(CV_IMPL_IPP);
1216 return;
1217 }
1218 setIppErrorStatus();
1219 #endif
1220 }
1221 assert( c.tab_size == n );
1222
1223 if( n == 1 )
1224 {
1225 dst[0] = src[0]*scale;
1226 }
1227 else if( n == 2 )
1228 {
1229 T t = (src[0] + src[1])*scale;
1230 dst[1] = (src[0] - src[1])*scale;
1231 dst[0] = t;
1232 }
1233 else if( n & 1 )
1234 {
1235 dst -= complex_output;
1236 Complex<T>* _dst = (Complex<T>*)dst;
1237 _dst[0].re = src[0]*scale;
1238 _dst[0].im = 0;
1239 for( j = 1; j < n; j += 2 )
1240 {
1241 T t0 = src[c.itab[j]]*scale;
1242 T t1 = src[c.itab[j+1]]*scale;
1243 _dst[j].re = t0;
1244 _dst[j].im = 0;
1245 _dst[j+1].re = t1;
1246 _dst[j+1].im = 0;
1247 }
1248 OcvDftOptions sub_c = c;
1249 sub_c.isComplex = false;
1250 sub_c.isInverse = false;
1251 sub_c.noPermute = true;
1252 sub_c.scale = 1.;
1253 DFT(sub_c, _dst, _dst);
1254 if( !complex_output )
1255 dst[1] = dst[0];
1256 }
1257 else
1258 {
1259 T t0, t;
1260 T h1_re, h1_im, h2_re, h2_im;
1261 T scale2 = scale*(T)0.5;
1262 int n2 = n >> 1;
1263
1264 c.factors[0] >>= 1;
1265
1266 OcvDftOptions sub_c = c;
1267 sub_c.factors += (c.factors[0] == 1);
1268 sub_c.nf -= (c.factors[0] == 1);
1269 sub_c.isComplex = false;
1270 sub_c.isInverse = false;
1271 sub_c.noPermute = false;
1272 sub_c.scale = 1.;
1273 sub_c.n = n2;
1274
1275 DFT(sub_c, (Complex<T>*)src, (Complex<T>*)dst);
1276
1277 c.factors[0] <<= 1;
1278
1279 t = dst[0] - dst[1];
1280 dst[0] = (dst[0] + dst[1])*scale;
1281 dst[1] = t*scale;
1282
1283 t0 = dst[n2];
1284 t = dst[n-1];
1285 dst[n-1] = dst[1];
1286
1287 const Complex<T> *wave = (const Complex<T>*)c.wave;
1288
1289 for( j = 2, wave++; j < n2; j += 2, wave++ )
1290 {
1291 /* calc odd */
1292 h2_re = scale2*(dst[j+1] + t);
1293 h2_im = scale2*(dst[n-j] - dst[j]);
1294
1295 /* calc even */
1296 h1_re = scale2*(dst[j] + dst[n-j]);
1297 h1_im = scale2*(dst[j+1] - t);
1298
1299 /* rotate */
1300 t = h2_re*wave->re - h2_im*wave->im;
1301 h2_im = h2_re*wave->im + h2_im*wave->re;
1302 h2_re = t;
1303 t = dst[n-j-1];
1304
1305 dst[j-1] = h1_re + h2_re;
1306 dst[n-j-1] = h1_re - h2_re;
1307 dst[j] = h1_im + h2_im;
1308 dst[n-j] = h2_im - h1_im;
1309 }
1310
1311 if( j <= n2 )
1312 {
1313 dst[n2-1] = t0*scale;
1314 dst[n2] = -t*scale;
1315 }
1316 }
1317
1318 if( complex_output && ((n & 1) == 0 || n == 1))
1319 {
1320 dst[-1] = dst[0];
1321 dst[0] = 0;
1322 if( n > 1 )
1323 dst[n] = 0;
1324 }
1325 }
1326
1327 /* Inverse FFT of complex conjugate-symmetric vector
1328 input vector format:
1329 re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR
1330 re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
1331 template<typename T> static void
CCSIDFT(const OcvDftOptions & c,const T * src,T * dst)1332 CCSIDFT(const OcvDftOptions & c, const T* src, T* dst)
1333 {
1334 int n = c.n;
1335 int complex_input = c.isComplex;
1336 int j, k;
1337 T scale = (T)c.scale;
1338 T save_s1 = 0.;
1339 T t0, t1, t2, t3, t;
1340
1341 assert( c.tab_size == n );
1342
1343 if( complex_input )
1344 {
1345 assert( src != dst );
1346 save_s1 = src[1];
1347 ((T*)src)[1] = src[0];
1348 src++;
1349 }
1350 if( c.useIpp )
1351 {
1352 #ifdef USE_IPP_DFT
1353 if (ippsDFTInv_PackToR( src, dst, c.ipp_spec, c.ipp_work ) >=0)
1354 {
1355 if( complex_input )
1356 ((T*)src)[0] = (T)save_s1;
1357 CV_IMPL_ADD(CV_IMPL_IPP);
1358 return;
1359 }
1360
1361 setIppErrorStatus();
1362 #endif
1363 }
1364 if( n == 1 )
1365 {
1366 dst[0] = (T)(src[0]*scale);
1367 }
1368 else if( n == 2 )
1369 {
1370 t = (src[0] + src[1])*scale;
1371 dst[1] = (src[0] - src[1])*scale;
1372 dst[0] = t;
1373 }
1374 else if( n & 1 )
1375 {
1376 Complex<T>* _src = (Complex<T>*)(src-1);
1377 Complex<T>* _dst = (Complex<T>*)dst;
1378
1379 _dst[0].re = src[0];
1380 _dst[0].im = 0;
1381
1382 int n2 = (n+1) >> 1;
1383
1384 for( j = 1; j < n2; j++ )
1385 {
1386 int k0 = c.itab[j], k1 = c.itab[n-j];
1387 t0 = _src[j].re; t1 = _src[j].im;
1388 _dst[k0].re = t0; _dst[k0].im = -t1;
1389 _dst[k1].re = t0; _dst[k1].im = t1;
1390 }
1391
1392 OcvDftOptions sub_c = c;
1393 sub_c.isComplex = false;
1394 sub_c.isInverse = false;
1395 sub_c.noPermute = true;
1396 sub_c.scale = 1.;
1397 sub_c.n = n;
1398
1399 DFT(sub_c, _dst, _dst);
1400 dst[0] *= scale;
1401 for( j = 1; j < n; j += 2 )
1402 {
1403 t0 = dst[j*2]*scale;
1404 t1 = dst[j*2+2]*scale;
1405 dst[j] = t0;
1406 dst[j+1] = t1;
1407 }
1408 }
1409 else
1410 {
1411 int inplace = src == dst;
1412 const Complex<T>* w = (const Complex<T>*)c.wave;
1413
1414 t = src[1];
1415 t0 = (src[0] + src[n-1]);
1416 t1 = (src[n-1] - src[0]);
1417 dst[0] = t0;
1418 dst[1] = t1;
1419
1420 int n2 = (n+1) >> 1;
1421
1422 for( j = 2, w++; j < n2; j += 2, w++ )
1423 {
1424 T h1_re, h1_im, h2_re, h2_im;
1425
1426 h1_re = (t + src[n-j-1]);
1427 h1_im = (src[j] - src[n-j]);
1428
1429 h2_re = (t - src[n-j-1]);
1430 h2_im = (src[j] + src[n-j]);
1431
1432 t = h2_re*w->re + h2_im*w->im;
1433 h2_im = h2_im*w->re - h2_re*w->im;
1434 h2_re = t;
1435
1436 t = src[j+1];
1437 t0 = h1_re - h2_im;
1438 t1 = -h1_im - h2_re;
1439 t2 = h1_re + h2_im;
1440 t3 = h1_im - h2_re;
1441
1442 if( inplace )
1443 {
1444 dst[j] = t0;
1445 dst[j+1] = t1;
1446 dst[n-j] = t2;
1447 dst[n-j+1]= t3;
1448 }
1449 else
1450 {
1451 int j2 = j >> 1;
1452 k = c.itab[j2];
1453 dst[k] = t0;
1454 dst[k+1] = t1;
1455 k = c.itab[n2-j2];
1456 dst[k] = t2;
1457 dst[k+1]= t3;
1458 }
1459 }
1460
1461 if( j <= n2 )
1462 {
1463 t0 = t*2;
1464 t1 = src[n2]*2;
1465
1466 if( inplace )
1467 {
1468 dst[n2] = t0;
1469 dst[n2+1] = t1;
1470 }
1471 else
1472 {
1473 k = c.itab[n2];
1474 dst[k*2] = t0;
1475 dst[k*2+1] = t1;
1476 }
1477 }
1478
1479 c.factors[0] >>= 1;
1480
1481 OcvDftOptions sub_c = c;
1482 sub_c.factors += (c.factors[0] == 1);
1483 sub_c.nf -= (c.factors[0] == 1);
1484 sub_c.isComplex = false;
1485 sub_c.isInverse = false;
1486 sub_c.noPermute = !inplace;
1487 sub_c.scale = 1.;
1488 sub_c.n = n2;
1489
1490 DFT(sub_c, (Complex<T>*)dst, (Complex<T>*)dst);
1491
1492 c.factors[0] <<= 1;
1493
1494 for( j = 0; j < n; j += 2 )
1495 {
1496 t0 = dst[j]*scale;
1497 t1 = dst[j+1]*(-scale);
1498 dst[j] = t0;
1499 dst[j+1] = t1;
1500 }
1501 }
1502 if( complex_input )
1503 ((T*)src)[0] = (T)save_s1;
1504 }
1505
1506 static void
CopyColumn(const uchar * _src,size_t src_step,uchar * _dst,size_t dst_step,int len,size_t elem_size)1507 CopyColumn( const uchar* _src, size_t src_step,
1508 uchar* _dst, size_t dst_step,
1509 int len, size_t elem_size )
1510 {
1511 int i, t0, t1;
1512 const int* src = (const int*)_src;
1513 int* dst = (int*)_dst;
1514 src_step /= sizeof(src[0]);
1515 dst_step /= sizeof(dst[0]);
1516
1517 if( elem_size == sizeof(int) )
1518 {
1519 for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1520 dst[0] = src[0];
1521 }
1522 else if( elem_size == sizeof(int)*2 )
1523 {
1524 for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1525 {
1526 t0 = src[0]; t1 = src[1];
1527 dst[0] = t0; dst[1] = t1;
1528 }
1529 }
1530 else if( elem_size == sizeof(int)*4 )
1531 {
1532 for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1533 {
1534 t0 = src[0]; t1 = src[1];
1535 dst[0] = t0; dst[1] = t1;
1536 t0 = src[2]; t1 = src[3];
1537 dst[2] = t0; dst[3] = t1;
1538 }
1539 }
1540 }
1541
1542
1543 static void
CopyFrom2Columns(const uchar * _src,size_t src_step,uchar * _dst0,uchar * _dst1,int len,size_t elem_size)1544 CopyFrom2Columns( const uchar* _src, size_t src_step,
1545 uchar* _dst0, uchar* _dst1,
1546 int len, size_t elem_size )
1547 {
1548 int i, t0, t1;
1549 const int* src = (const int*)_src;
1550 int* dst0 = (int*)_dst0;
1551 int* dst1 = (int*)_dst1;
1552 src_step /= sizeof(src[0]);
1553
1554 if( elem_size == sizeof(int) )
1555 {
1556 for( i = 0; i < len; i++, src += src_step )
1557 {
1558 t0 = src[0]; t1 = src[1];
1559 dst0[i] = t0; dst1[i] = t1;
1560 }
1561 }
1562 else if( elem_size == sizeof(int)*2 )
1563 {
1564 for( i = 0; i < len*2; i += 2, src += src_step )
1565 {
1566 t0 = src[0]; t1 = src[1];
1567 dst0[i] = t0; dst0[i+1] = t1;
1568 t0 = src[2]; t1 = src[3];
1569 dst1[i] = t0; dst1[i+1] = t1;
1570 }
1571 }
1572 else if( elem_size == sizeof(int)*4 )
1573 {
1574 for( i = 0; i < len*4; i += 4, src += src_step )
1575 {
1576 t0 = src[0]; t1 = src[1];
1577 dst0[i] = t0; dst0[i+1] = t1;
1578 t0 = src[2]; t1 = src[3];
1579 dst0[i+2] = t0; dst0[i+3] = t1;
1580 t0 = src[4]; t1 = src[5];
1581 dst1[i] = t0; dst1[i+1] = t1;
1582 t0 = src[6]; t1 = src[7];
1583 dst1[i+2] = t0; dst1[i+3] = t1;
1584 }
1585 }
1586 }
1587
1588
1589 static void
CopyTo2Columns(const uchar * _src0,const uchar * _src1,uchar * _dst,size_t dst_step,int len,size_t elem_size)1590 CopyTo2Columns( const uchar* _src0, const uchar* _src1,
1591 uchar* _dst, size_t dst_step,
1592 int len, size_t elem_size )
1593 {
1594 int i, t0, t1;
1595 const int* src0 = (const int*)_src0;
1596 const int* src1 = (const int*)_src1;
1597 int* dst = (int*)_dst;
1598 dst_step /= sizeof(dst[0]);
1599
1600 if( elem_size == sizeof(int) )
1601 {
1602 for( i = 0; i < len; i++, dst += dst_step )
1603 {
1604 t0 = src0[i]; t1 = src1[i];
1605 dst[0] = t0; dst[1] = t1;
1606 }
1607 }
1608 else if( elem_size == sizeof(int)*2 )
1609 {
1610 for( i = 0; i < len*2; i += 2, dst += dst_step )
1611 {
1612 t0 = src0[i]; t1 = src0[i+1];
1613 dst[0] = t0; dst[1] = t1;
1614 t0 = src1[i]; t1 = src1[i+1];
1615 dst[2] = t0; dst[3] = t1;
1616 }
1617 }
1618 else if( elem_size == sizeof(int)*4 )
1619 {
1620 for( i = 0; i < len*4; i += 4, dst += dst_step )
1621 {
1622 t0 = src0[i]; t1 = src0[i+1];
1623 dst[0] = t0; dst[1] = t1;
1624 t0 = src0[i+2]; t1 = src0[i+3];
1625 dst[2] = t0; dst[3] = t1;
1626 t0 = src1[i]; t1 = src1[i+1];
1627 dst[4] = t0; dst[5] = t1;
1628 t0 = src1[i+2]; t1 = src1[i+3];
1629 dst[6] = t0; dst[7] = t1;
1630 }
1631 }
1632 }
1633
1634
1635 static void
ExpandCCS(uchar * _ptr,int n,int elem_size)1636 ExpandCCS( uchar* _ptr, int n, int elem_size )
1637 {
1638 int i;
1639 if( elem_size == (int)sizeof(float) )
1640 {
1641 float* p = (float*)_ptr;
1642 for( i = 1; i < (n+1)/2; i++ )
1643 {
1644 p[(n-i)*2] = p[i*2-1];
1645 p[(n-i)*2+1] = -p[i*2];
1646 }
1647 if( (n & 1) == 0 )
1648 {
1649 p[n] = p[n-1];
1650 p[n+1] = 0.f;
1651 n--;
1652 }
1653 for( i = n-1; i > 0; i-- )
1654 p[i+1] = p[i];
1655 p[1] = 0.f;
1656 }
1657 else
1658 {
1659 double* p = (double*)_ptr;
1660 for( i = 1; i < (n+1)/2; i++ )
1661 {
1662 p[(n-i)*2] = p[i*2-1];
1663 p[(n-i)*2+1] = -p[i*2];
1664 }
1665 if( (n & 1) == 0 )
1666 {
1667 p[n] = p[n-1];
1668 p[n+1] = 0.f;
1669 n--;
1670 }
1671 for( i = n-1; i > 0; i-- )
1672 p[i+1] = p[i];
1673 p[1] = 0.f;
1674 }
1675 }
1676
DFT_32f(const OcvDftOptions & c,const Complexf * src,Complexf * dst)1677 static void DFT_32f(const OcvDftOptions & c, const Complexf* src, Complexf* dst)
1678 {
1679 DFT(c, src, dst);
1680 }
1681
DFT_64f(const OcvDftOptions & c,const Complexd * src,Complexd * dst)1682 static void DFT_64f(const OcvDftOptions & c, const Complexd* src, Complexd* dst)
1683 {
1684 DFT(c, src, dst);
1685 }
1686
1687
RealDFT_32f(const OcvDftOptions & c,const float * src,float * dst)1688 static void RealDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
1689 {
1690 RealDFT(c, src, dst);
1691 }
1692
RealDFT_64f(const OcvDftOptions & c,const double * src,double * dst)1693 static void RealDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
1694 {
1695 RealDFT(c, src, dst);
1696 }
1697
CCSIDFT_32f(const OcvDftOptions & c,const float * src,float * dst)1698 static void CCSIDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
1699 {
1700 CCSIDFT(c, src, dst);
1701 }
1702
CCSIDFT_64f(const OcvDftOptions & c,const double * src,double * dst)1703 static void CCSIDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
1704 {
1705 CCSIDFT(c, src, dst);
1706 }
1707
1708 }
1709
1710 #ifdef USE_IPP_DFT
1711 typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*);
1712 typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
1713 #endif
1714
1715 namespace cv
1716 {
1717 #if defined USE_IPP_DFT
1718
1719 typedef IppStatus (CV_STDCALL* ippiDFT_C_Func)(const Ipp32fc*, int, Ipp32fc*, int, const IppiDFTSpec_C_32fc*, Ipp8u*);
1720 typedef IppStatus (CV_STDCALL* ippiDFT_R_Func)(const Ipp32f* , int, Ipp32f* , int, const IppiDFTSpec_R_32f* , Ipp8u*);
1721
1722 template <typename Dft>
1723 class Dft_C_IPPLoop_Invoker : public ParallelLoopBody
1724 {
1725 public:
1726
Dft_C_IPPLoop_Invoker(const uchar * _src,size_t _src_step,uchar * _dst,size_t _dst_step,int _width,const Dft & _ippidft,int _norm_flag,bool * _ok)1727 Dft_C_IPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width,
1728 const Dft& _ippidft, int _norm_flag, bool *_ok) :
1729 ParallelLoopBody(),
1730 src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
1731 ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
1732 {
1733 *ok = true;
1734 }
1735
operator ()(const Range & range) const1736 virtual void operator()(const Range& range) const CV_OVERRIDE
1737 {
1738 IppStatus status;
1739 Ipp8u* pBuffer = 0;
1740 Ipp8u* pMemInit= 0;
1741 int sizeBuffer=0;
1742 int sizeSpec=0;
1743 int sizeInit=0;
1744
1745 IppiSize srcRoiSize = {width, 1};
1746
1747 status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1748 if ( status < 0 )
1749 {
1750 *ok = false;
1751 return;
1752 }
1753
1754 IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)CV_IPP_MALLOC( sizeSpec );
1755
1756 if ( sizeInit > 0 )
1757 pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1758
1759 if ( sizeBuffer > 0 )
1760 pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1761
1762 status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1763
1764 if ( sizeInit > 0 )
1765 ippFree( pMemInit );
1766
1767 if ( status < 0 )
1768 {
1769 ippFree( pDFTSpec );
1770 if ( sizeBuffer > 0 )
1771 ippFree( pBuffer );
1772 *ok = false;
1773 return;
1774 }
1775
1776 for( int i = range.start; i < range.end; ++i)
1777 if(!ippidft((Ipp32fc*)(src + src_step * i), src_step, (Ipp32fc*)(dst + dst_step * i), dst_step,
1778 pDFTSpec, (Ipp8u*)pBuffer))
1779 {
1780 *ok = false;
1781 }
1782
1783 if ( sizeBuffer > 0 )
1784 ippFree( pBuffer );
1785
1786 ippFree( pDFTSpec );
1787 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1788 }
1789
1790 private:
1791 const uchar * src;
1792 size_t src_step;
1793 uchar * dst;
1794 size_t dst_step;
1795 int width;
1796 const Dft& ippidft;
1797 int norm_flag;
1798 bool *ok;
1799
1800 const Dft_C_IPPLoop_Invoker& operator= (const Dft_C_IPPLoop_Invoker&);
1801 };
1802
1803 template <typename Dft>
1804 class Dft_R_IPPLoop_Invoker : public ParallelLoopBody
1805 {
1806 public:
1807
Dft_R_IPPLoop_Invoker(const uchar * _src,size_t _src_step,uchar * _dst,size_t _dst_step,int _width,const Dft & _ippidft,int _norm_flag,bool * _ok)1808 Dft_R_IPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width,
1809 const Dft& _ippidft, int _norm_flag, bool *_ok) :
1810 ParallelLoopBody(),
1811 src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
1812 ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
1813 {
1814 *ok = true;
1815 }
1816
operator ()(const Range & range) const1817 virtual void operator()(const Range& range) const CV_OVERRIDE
1818 {
1819 IppStatus status;
1820 Ipp8u* pBuffer = 0;
1821 Ipp8u* pMemInit= 0;
1822 int sizeBuffer=0;
1823 int sizeSpec=0;
1824 int sizeInit=0;
1825
1826 IppiSize srcRoiSize = {width, 1};
1827
1828 status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1829 if ( status < 0 )
1830 {
1831 *ok = false;
1832 return;
1833 }
1834
1835 IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)CV_IPP_MALLOC( sizeSpec );
1836
1837 if ( sizeInit > 0 )
1838 pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1839
1840 if ( sizeBuffer > 0 )
1841 pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1842
1843 status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1844
1845 if ( sizeInit > 0 )
1846 ippFree( pMemInit );
1847
1848 if ( status < 0 )
1849 {
1850 ippFree( pDFTSpec );
1851 if ( sizeBuffer > 0 )
1852 ippFree( pBuffer );
1853 *ok = false;
1854 return;
1855 }
1856
1857 for( int i = range.start; i < range.end; ++i)
1858 if(!ippidft((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step,
1859 pDFTSpec, (Ipp8u*)pBuffer))
1860 {
1861 *ok = false;
1862 }
1863
1864 if ( sizeBuffer > 0 )
1865 ippFree( pBuffer );
1866
1867 ippFree( pDFTSpec );
1868 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1869 }
1870
1871 private:
1872 const uchar * src;
1873 size_t src_step;
1874 uchar * dst;
1875 size_t dst_step;
1876 int width;
1877 const Dft& ippidft;
1878 int norm_flag;
1879 bool *ok;
1880
1881 const Dft_R_IPPLoop_Invoker& operator= (const Dft_R_IPPLoop_Invoker&);
1882 };
1883
1884 template <typename Dft>
Dft_C_IPPLoop(const uchar * src,size_t src_step,uchar * dst,size_t dst_step,int width,int height,const Dft & ippidft,int norm_flag)1885 bool Dft_C_IPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, const Dft& ippidft, int norm_flag)
1886 {
1887 bool ok;
1888 parallel_for_(Range(0, height), Dft_C_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
1889 return ok;
1890 }
1891
1892 template <typename Dft>
Dft_R_IPPLoop(const uchar * src,size_t src_step,uchar * dst,size_t dst_step,int width,int height,const Dft & ippidft,int norm_flag)1893 bool Dft_R_IPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, const Dft& ippidft, int norm_flag)
1894 {
1895 bool ok;
1896 parallel_for_(Range(0, height), Dft_R_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
1897 return ok;
1898 }
1899
1900 struct IPPDFT_C_Functor
1901 {
IPPDFT_C_Functorcv::IPPDFT_C_Functor1902 IPPDFT_C_Functor(ippiDFT_C_Func _func) : ippiDFT_CToC_32fc_C1R(_func){}
1903
operator ()cv::IPPDFT_C_Functor1904 bool operator()(const Ipp32fc* src, size_t srcStep, Ipp32fc* dst, size_t dstStep, const IppiDFTSpec_C_32fc* pDFTSpec, Ipp8u* pBuffer) const
1905 {
1906 return ippiDFT_CToC_32fc_C1R ? CV_INSTRUMENT_FUN_IPP(ippiDFT_CToC_32fc_C1R, src, static_cast<int>(srcStep), dst, static_cast<int>(dstStep), pDFTSpec, pBuffer) >= 0 : false;
1907 }
1908 private:
1909 ippiDFT_C_Func ippiDFT_CToC_32fc_C1R;
1910 };
1911
1912 struct IPPDFT_R_Functor
1913 {
IPPDFT_R_Functorcv::IPPDFT_R_Functor1914 IPPDFT_R_Functor(ippiDFT_R_Func _func) : ippiDFT_PackToR_32f_C1R(_func){}
1915
operator ()cv::IPPDFT_R_Functor1916 bool operator()(const Ipp32f* src, size_t srcStep, Ipp32f* dst, size_t dstStep, const IppiDFTSpec_R_32f* pDFTSpec, Ipp8u* pBuffer) const
1917 {
1918 return ippiDFT_PackToR_32f_C1R ? CV_INSTRUMENT_FUN_IPP(ippiDFT_PackToR_32f_C1R, src, static_cast<int>(srcStep), dst, static_cast<int>(dstStep), pDFTSpec, pBuffer) >= 0 : false;
1919 }
1920 private:
1921 ippiDFT_R_Func ippiDFT_PackToR_32f_C1R;
1922 };
1923
ippi_DFT_C_32F(const uchar * src,size_t src_step,uchar * dst,size_t dst_step,int width,int height,bool inv,int norm_flag)1924 static bool ippi_DFT_C_32F(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, int norm_flag)
1925 {
1926 CV_INSTRUMENT_REGION_IPP();
1927
1928 IppStatus status;
1929 Ipp8u* pBuffer = 0;
1930 Ipp8u* pMemInit= 0;
1931 int sizeBuffer=0;
1932 int sizeSpec=0;
1933 int sizeInit=0;
1934
1935 IppiSize srcRoiSize = {width, height};
1936
1937 status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1938 if ( status < 0 )
1939 return false;
1940
1941 IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)CV_IPP_MALLOC( sizeSpec );
1942
1943 if ( sizeInit > 0 )
1944 pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1945
1946 if ( sizeBuffer > 0 )
1947 pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1948
1949 status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1950
1951 if ( sizeInit > 0 )
1952 ippFree( pMemInit );
1953
1954 if ( status < 0 )
1955 {
1956 ippFree( pDFTSpec );
1957 if ( sizeBuffer > 0 )
1958 ippFree( pBuffer );
1959 return false;
1960 }
1961
1962 if (!inv)
1963 status = CV_INSTRUMENT_FUN_IPP(ippiDFTFwd_CToC_32fc_C1R, (Ipp32fc*)src, static_cast<int>(src_step), (Ipp32fc*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
1964 else
1965 status = CV_INSTRUMENT_FUN_IPP(ippiDFTInv_CToC_32fc_C1R, (Ipp32fc*)src, static_cast<int>(src_step), (Ipp32fc*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
1966
1967 if ( sizeBuffer > 0 )
1968 ippFree( pBuffer );
1969
1970 ippFree( pDFTSpec );
1971
1972 if(status >= 0)
1973 {
1974 CV_IMPL_ADD(CV_IMPL_IPP);
1975 return true;
1976 }
1977 return false;
1978 }
1979
ippi_DFT_R_32F(const uchar * src,size_t src_step,uchar * dst,size_t dst_step,int width,int height,bool inv,int norm_flag)1980 static bool ippi_DFT_R_32F(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, int norm_flag)
1981 {
1982 CV_INSTRUMENT_REGION_IPP();
1983
1984 IppStatus status;
1985 Ipp8u* pBuffer = 0;
1986 Ipp8u* pMemInit= 0;
1987 int sizeBuffer=0;
1988 int sizeSpec=0;
1989 int sizeInit=0;
1990
1991 IppiSize srcRoiSize = {width, height};
1992
1993 status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1994 if ( status < 0 )
1995 return false;
1996
1997 IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)CV_IPP_MALLOC( sizeSpec );
1998
1999 if ( sizeInit > 0 )
2000 pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
2001
2002 if ( sizeBuffer > 0 )
2003 pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
2004
2005 status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
2006
2007 if ( sizeInit > 0 )
2008 ippFree( pMemInit );
2009
2010 if ( status < 0 )
2011 {
2012 ippFree( pDFTSpec );
2013 if ( sizeBuffer > 0 )
2014 ippFree( pBuffer );
2015 return false;
2016 }
2017
2018 if (!inv)
2019 status = CV_INSTRUMENT_FUN_IPP(ippiDFTFwd_RToPack_32f_C1R, (float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
2020 else
2021 status = CV_INSTRUMENT_FUN_IPP(ippiDFTInv_PackToR_32f_C1R, (float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
2022
2023 if ( sizeBuffer > 0 )
2024 ippFree( pBuffer );
2025
2026 ippFree( pDFTSpec );
2027
2028 if(status >= 0)
2029 {
2030 CV_IMPL_ADD(CV_IMPL_IPP);
2031 return true;
2032 }
2033 return false;
2034 }
2035
2036 #endif
2037 }
2038
2039 #ifdef HAVE_OPENCL
2040
2041 namespace cv
2042 {
2043
2044 enum FftType
2045 {
2046 R2R = 0, // real to CCS in case forward transform, CCS to real otherwise
2047 C2R = 1, // complex to real in case inverse transform
2048 R2C = 2, // real to complex in case forward transform
2049 C2C = 3 // complex to complex
2050 };
2051
2052 struct OCL_FftPlan
2053 {
2054 private:
2055 UMat twiddles;
2056 String buildOptions;
2057 int thread_count;
2058 int dft_size;
2059 int dft_depth;
2060 bool status;
2061
2062 public:
OCL_FftPlancv::OCL_FftPlan2063 OCL_FftPlan(int _size, int _depth) : dft_size(_size), dft_depth(_depth), status(true)
2064 {
2065 CV_Assert( dft_depth == CV_32F || dft_depth == CV_64F );
2066
2067 int min_radix;
2068 std::vector<int> radixes, blocks;
2069 ocl_getRadixes(dft_size, radixes, blocks, min_radix);
2070 thread_count = dft_size / min_radix;
2071
2072 if (thread_count > (int) ocl::Device::getDefault().maxWorkGroupSize())
2073 {
2074 status = false;
2075 return;
2076 }
2077
2078 // generate string with radix calls
2079 String radix_processing;
2080 int n = 1, twiddle_size = 0;
2081 for (size_t i=0; i<radixes.size(); i++)
2082 {
2083 int radix = radixes[i], block = blocks[i];
2084 if (block > 1)
2085 radix_processing += format("fft_radix%d_B%d(smem,twiddles+%d,ind,%d,%d);", radix, block, twiddle_size, n, dft_size/radix);
2086 else
2087 radix_processing += format("fft_radix%d(smem,twiddles+%d,ind,%d,%d);", radix, twiddle_size, n, dft_size/radix);
2088 twiddle_size += (radix-1)*n;
2089 n *= radix;
2090 }
2091
2092 twiddles.create(1, twiddle_size, CV_MAKE_TYPE(dft_depth, 2));
2093 if (dft_depth == CV_32F)
2094 fillRadixTable<float>(twiddles, radixes);
2095 else
2096 fillRadixTable<double>(twiddles, radixes);
2097
2098 buildOptions = format("-D LOCAL_SIZE=%d -D kercn=%d -D FT=%s -D CT=%s%s -D RADIX_PROCESS=%s",
2099 dft_size, min_radix, ocl::typeToStr(dft_depth), ocl::typeToStr(CV_MAKE_TYPE(dft_depth, 2)),
2100 dft_depth == CV_64F ? " -D DOUBLE_SUPPORT" : "", radix_processing.c_str());
2101 }
2102
enqueueTransformcv::OCL_FftPlan2103 bool enqueueTransform(InputArray _src, OutputArray _dst, int num_dfts, int flags, int fftType, bool rows = true) const
2104 {
2105 if (!status)
2106 return false;
2107
2108 UMat src = _src.getUMat();
2109 UMat dst = _dst.getUMat();
2110
2111 size_t globalsize[2];
2112 size_t localsize[2];
2113 String kernel_name;
2114
2115 bool is1d = (flags & DFT_ROWS) != 0 || num_dfts == 1;
2116 bool inv = (flags & DFT_INVERSE) != 0;
2117 String options = buildOptions;
2118
2119 if (rows)
2120 {
2121 globalsize[0] = thread_count; globalsize[1] = src.rows;
2122 localsize[0] = thread_count; localsize[1] = 1;
2123 kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows";
2124 if ((is1d || inv) && (flags & DFT_SCALE))
2125 options += " -D DFT_SCALE";
2126 }
2127 else
2128 {
2129 globalsize[0] = num_dfts; globalsize[1] = thread_count;
2130 localsize[0] = 1; localsize[1] = thread_count;
2131 kernel_name = !inv ? "fft_multi_radix_cols" : "ifft_multi_radix_cols";
2132 if (flags & DFT_SCALE)
2133 options += " -D DFT_SCALE";
2134 }
2135
2136 options += src.channels() == 1 ? " -D REAL_INPUT" : " -D COMPLEX_INPUT";
2137 options += dst.channels() == 1 ? " -D REAL_OUTPUT" : " -D COMPLEX_OUTPUT";
2138 options += is1d ? " -D IS_1D" : "";
2139
2140 if (!inv)
2141 {
2142 if ((is1d && src.channels() == 1) || (rows && (fftType == R2R)))
2143 options += " -D NO_CONJUGATE";
2144 }
2145 else
2146 {
2147 if (rows && (fftType == C2R || fftType == R2R))
2148 options += " -D NO_CONJUGATE";
2149 if (dst.cols % 2 == 0)
2150 options += " -D EVEN";
2151 }
2152
2153 ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options);
2154 if (k.empty())
2155 return false;
2156
2157 k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::ReadOnlyNoSize(twiddles), thread_count, num_dfts);
2158 return k.run(2, globalsize, localsize, false);
2159 }
2160
2161 private:
ocl_getRadixescv::OCL_FftPlan2162 static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>& blocks, int& min_radix)
2163 {
2164 int factors[34];
2165 int nf = DFTFactorize(cols, factors);
2166
2167 int n = 1;
2168 int factor_index = 0;
2169 min_radix = INT_MAX;
2170
2171 // 2^n transforms
2172 if ((factors[factor_index] & 1) == 0)
2173 {
2174 for( ; n < factors[factor_index];)
2175 {
2176 int radix = 2, block = 1;
2177 if (8*n <= factors[0])
2178 radix = 8;
2179 else if (4*n <= factors[0])
2180 {
2181 radix = 4;
2182 if (cols % 12 == 0)
2183 block = 3;
2184 else if (cols % 8 == 0)
2185 block = 2;
2186 }
2187 else
2188 {
2189 if (cols % 10 == 0)
2190 block = 5;
2191 else if (cols % 8 == 0)
2192 block = 4;
2193 else if (cols % 6 == 0)
2194 block = 3;
2195 else if (cols % 4 == 0)
2196 block = 2;
2197 }
2198
2199 radixes.push_back(radix);
2200 blocks.push_back(block);
2201 min_radix = min(min_radix, block*radix);
2202 n *= radix;
2203 }
2204 factor_index++;
2205 }
2206
2207 // all the other transforms
2208 for( ; factor_index < nf; factor_index++)
2209 {
2210 int radix = factors[factor_index], block = 1;
2211 if (radix == 3)
2212 {
2213 if (cols % 12 == 0)
2214 block = 4;
2215 else if (cols % 9 == 0)
2216 block = 3;
2217 else if (cols % 6 == 0)
2218 block = 2;
2219 }
2220 else if (radix == 5)
2221 {
2222 if (cols % 10 == 0)
2223 block = 2;
2224 }
2225 radixes.push_back(radix);
2226 blocks.push_back(block);
2227 min_radix = min(min_radix, block*radix);
2228 }
2229 }
2230
2231 template <typename T>
fillRadixTablecv::OCL_FftPlan2232 static void fillRadixTable(UMat twiddles, const std::vector<int>& radixes)
2233 {
2234 Mat tw = twiddles.getMat(ACCESS_WRITE);
2235 T* ptr = tw.ptr<T>();
2236 int ptr_index = 0;
2237
2238 int n = 1;
2239 for (size_t i=0; i<radixes.size(); i++)
2240 {
2241 int radix = radixes[i];
2242 n *= radix;
2243
2244 for (int j=1; j<radix; j++)
2245 {
2246 double theta = -CV_2PI*j/n;
2247
2248 for (int k=0; k<(n/radix); k++)
2249 {
2250 ptr[ptr_index++] = (T) cos(k*theta);
2251 ptr[ptr_index++] = (T) sin(k*theta);
2252 }
2253 }
2254 }
2255 }
2256 };
2257
2258 class OCL_FftPlanCache
2259 {
2260 public:
getInstance()2261 static OCL_FftPlanCache & getInstance()
2262 {
2263 CV_SINGLETON_LAZY_INIT_REF(OCL_FftPlanCache, new OCL_FftPlanCache())
2264 }
2265
getFftPlan(int dft_size,int depth)2266 Ptr<OCL_FftPlan> getFftPlan(int dft_size, int depth)
2267 {
2268 int key = (dft_size << 16) | (depth & 0xFFFF);
2269 std::map<int, Ptr<OCL_FftPlan> >::iterator f = planStorage.find(key);
2270 if (f != planStorage.end())
2271 {
2272 return f->second;
2273 }
2274 else
2275 {
2276 Ptr<OCL_FftPlan> newPlan = Ptr<OCL_FftPlan>(new OCL_FftPlan(dft_size, depth));
2277 planStorage[key] = newPlan;
2278 return newPlan;
2279 }
2280 }
2281
~OCL_FftPlanCache()2282 ~OCL_FftPlanCache()
2283 {
2284 planStorage.clear();
2285 }
2286
2287 protected:
OCL_FftPlanCache()2288 OCL_FftPlanCache() :
2289 planStorage()
2290 {
2291 }
2292 std::map<int, Ptr<OCL_FftPlan> > planStorage;
2293 };
2294
ocl_dft_rows(InputArray _src,OutputArray _dst,int nonzero_rows,int flags,int fftType)2295 static bool ocl_dft_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType)
2296 {
2297 int type = _src.type(), depth = CV_MAT_DEPTH(type);
2298 Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), depth);
2299 return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true);
2300 }
2301
ocl_dft_cols(InputArray _src,OutputArray _dst,int nonzero_cols,int flags,int fftType)2302 static bool ocl_dft_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType)
2303 {
2304 int type = _src.type(), depth = CV_MAT_DEPTH(type);
2305 Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), depth);
2306 return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false);
2307 }
2308
determineFFTType(bool real_input,bool complex_input,bool real_output,bool complex_output,bool inv)2309 inline FftType determineFFTType(bool real_input, bool complex_input, bool real_output, bool complex_output, bool inv)
2310 {
2311 // output format is not specified
2312 if (!real_output && !complex_output)
2313 complex_output = true;
2314
2315 // input or output format is ambiguous
2316 if (real_input == complex_input || real_output == complex_output)
2317 CV_Error(Error::StsBadArg, "Invalid FFT input or output format");
2318
2319 FftType result = real_input ? (real_output ? R2R : R2C) : (real_output ? C2R : C2C);
2320
2321 // Forward Complex to CCS not supported
2322 if (result == C2R && !inv)
2323 result = C2C;
2324
2325 // Inverse CCS to Complex not supported
2326 if (result == R2C && inv)
2327 result = R2R;
2328
2329 return result;
2330 }
2331
ocl_dft(InputArray _src,OutputArray _dst,int flags,int nonzero_rows)2332 static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows)
2333 {
2334 int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
2335 Size ssize = _src.size();
2336 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
2337
2338 if (!(cn == 1 || cn == 2)
2339 || !(depth == CV_32F || (depth == CV_64F && doubleSupport))
2340 || ((flags & DFT_REAL_OUTPUT) && (flags & DFT_COMPLEX_OUTPUT)))
2341 return false;
2342
2343 // if is not a multiplication of prime numbers { 2, 3, 5 }
2344 if (ssize.area() != getOptimalDFTSize(ssize.area()))
2345 return false;
2346
2347 UMat src = _src.getUMat();
2348 bool inv = (flags & DFT_INVERSE) != 0 ? 1 : 0;
2349
2350 if( nonzero_rows <= 0 || nonzero_rows > _src.rows() )
2351 nonzero_rows = _src.rows();
2352 bool is1d = (flags & DFT_ROWS) != 0 || nonzero_rows == 1;
2353
2354 FftType fftType = determineFFTType(cn == 1, cn == 2,
2355 (flags & DFT_REAL_OUTPUT) != 0, (flags & DFT_COMPLEX_OUTPUT) != 0, inv);
2356
2357 UMat output;
2358 if (fftType == C2C || fftType == R2C)
2359 {
2360 // complex output
2361 _dst.create(src.size(), CV_MAKETYPE(depth, 2));
2362 output = _dst.getUMat();
2363 }
2364 else
2365 {
2366 // real output
2367 if (is1d)
2368 {
2369 _dst.create(src.size(), CV_MAKETYPE(depth, 1));
2370 output = _dst.getUMat();
2371 }
2372 else
2373 {
2374 _dst.create(src.size(), CV_MAKETYPE(depth, 1));
2375 output.create(src.size(), CV_MAKETYPE(depth, 2));
2376 }
2377 }
2378
2379 bool result = false;
2380 if (!inv)
2381 {
2382 int nonzero_cols = fftType == R2R ? output.cols/2 + 1 : output.cols;
2383 result = ocl_dft_rows(src, output, nonzero_rows, flags, fftType);
2384 if (!is1d)
2385 result = result && ocl_dft_cols(output, _dst, nonzero_cols, flags, fftType);
2386 }
2387 else
2388 {
2389 if (fftType == C2C)
2390 {
2391 // complex output
2392 result = ocl_dft_rows(src, output, nonzero_rows, flags, fftType);
2393 if (!is1d)
2394 result = result && ocl_dft_cols(output, output, output.cols, flags, fftType);
2395 }
2396 else
2397 {
2398 if (is1d)
2399 {
2400 result = ocl_dft_rows(src, output, nonzero_rows, flags, fftType);
2401 }
2402 else
2403 {
2404 int nonzero_cols = src.cols/2 + 1;
2405 result = ocl_dft_cols(src, output, nonzero_cols, flags, fftType);
2406 result = result && ocl_dft_rows(output, _dst, nonzero_rows, flags, fftType);
2407 }
2408 }
2409 }
2410 return result;
2411 }
2412
2413 } // namespace cv;
2414
2415 #endif
2416
2417 #ifdef HAVE_CLAMDFFT
2418
2419 namespace cv {
2420
2421 #define CLAMDDFT_Assert(func) \
2422 { \
2423 clfftStatus s = (func); \
2424 CV_Assert(s == CLFFT_SUCCESS); \
2425 }
2426
2427 class PlanCache
2428 {
2429 struct FftPlan
2430 {
FftPlancv::PlanCache::FftPlan2431 FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) :
2432 dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step),
2433 doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType),
2434 context((cl_context)ocl::Context::getDefault().ptr()), plHandle(0)
2435 {
2436 bool dft_inverse = (flags & DFT_INVERSE) != 0;
2437 bool dft_scale = (flags & DFT_SCALE) != 0;
2438 bool dft_rows = (flags & DFT_ROWS) != 0;
2439
2440 clfftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL;
2441 clfftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D;
2442
2443 size_t batchSize = dft_rows ? dft_size.height : 1;
2444 size_t clLengthsIn[3] = { (size_t)dft_size.width, dft_rows ? 1 : (size_t)dft_size.height, 1 };
2445 size_t clStridesIn[3] = { 1, 1, 1 };
2446 size_t clStridesOut[3] = { 1, 1, 1 };
2447 int elemSize = doubleFP ? sizeof(double) : sizeof(float);
2448
2449 switch (fftType)
2450 {
2451 case C2C:
2452 inLayout = CLFFT_COMPLEX_INTERLEAVED;
2453 outLayout = CLFFT_COMPLEX_INTERLEAVED;
2454 clStridesIn[1] = src_step / (elemSize << 1);
2455 clStridesOut[1] = dst_step / (elemSize << 1);
2456 break;
2457 case R2C:
2458 inLayout = CLFFT_REAL;
2459 outLayout = CLFFT_HERMITIAN_INTERLEAVED;
2460 clStridesIn[1] = src_step / elemSize;
2461 clStridesOut[1] = dst_step / (elemSize << 1);
2462 break;
2463 case C2R:
2464 inLayout = CLFFT_HERMITIAN_INTERLEAVED;
2465 outLayout = CLFFT_REAL;
2466 clStridesIn[1] = src_step / (elemSize << 1);
2467 clStridesOut[1] = dst_step / elemSize;
2468 break;
2469 case R2R:
2470 default:
2471 CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type");
2472 break;
2473 }
2474
2475 clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1];
2476 clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1];
2477
2478 CLAMDDFT_Assert(clfftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context::getDefault().ptr(), dim, clLengthsIn))
2479
2480 // setting plan properties
2481 CLAMDDFT_Assert(clfftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE));
2482 CLAMDDFT_Assert(clfftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE))
2483 CLAMDDFT_Assert(clfftSetLayout(plHandle, inLayout, outLayout))
2484 CLAMDDFT_Assert(clfftSetPlanBatchSize(plHandle, batchSize))
2485 CLAMDDFT_Assert(clfftSetPlanInStride(plHandle, dim, clStridesIn))
2486 CLAMDDFT_Assert(clfftSetPlanOutStride(plHandle, dim, clStridesOut))
2487 CLAMDDFT_Assert(clfftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim]))
2488
2489 float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f;
2490 CLAMDDFT_Assert(clfftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale))
2491
2492 // ready to bake
2493 cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
2494 CLAMDDFT_Assert(clfftBakePlan(plHandle, 1, &queue, NULL, NULL))
2495 }
2496
~FftPlancv::PlanCache::FftPlan2497 ~FftPlan()
2498 {
2499 // Do not tear down clFFT.
2500 // The user application may still use clFFT even after OpenCV is unloaded.
2501 /*clfftDestroyPlan(&plHandle);*/
2502 }
2503
2504 friend class PlanCache;
2505
2506 private:
2507 Size dft_size;
2508 int src_step, dst_step;
2509 bool doubleFP;
2510 bool inplace;
2511 int flags;
2512 FftType fftType;
2513
2514 cl_context context;
2515 clfftPlanHandle plHandle;
2516 };
2517
2518 public:
getInstance()2519 static PlanCache & getInstance()
2520 {
2521 CV_SINGLETON_LAZY_INIT_REF(PlanCache, new PlanCache())
2522 }
2523
getPlanHandle(const Size & dft_size,int src_step,int dst_step,bool doubleFP,bool inplace,int flags,FftType fftType)2524 clfftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP,
2525 bool inplace, int flags, FftType fftType)
2526 {
2527 cl_context currentContext = (cl_context)ocl::Context::getDefault().ptr();
2528
2529 for (size_t i = 0, size = planStorage.size(); i < size; ++i)
2530 {
2531 const FftPlan * const plan = planStorage[i];
2532
2533 if (plan->dft_size == dft_size &&
2534 plan->flags == flags &&
2535 plan->src_step == src_step &&
2536 plan->dst_step == dst_step &&
2537 plan->doubleFP == doubleFP &&
2538 plan->fftType == fftType &&
2539 plan->inplace == inplace)
2540 {
2541 if (plan->context != currentContext)
2542 {
2543 planStorage.erase(planStorage.begin() + i);
2544 break;
2545 }
2546
2547 return plan->plHandle;
2548 }
2549 }
2550
2551 // no baked plan is found, so let's create a new one
2552 Ptr<FftPlan> newPlan = Ptr<FftPlan>(new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType));
2553 planStorage.push_back(newPlan);
2554
2555 return newPlan->plHandle;
2556 }
2557
~PlanCache()2558 ~PlanCache()
2559 {
2560 planStorage.clear();
2561 }
2562
2563 protected:
PlanCache()2564 PlanCache() :
2565 planStorage()
2566 {
2567 }
2568
2569 std::vector<Ptr<FftPlan> > planStorage;
2570 };
2571
2572 extern "C" {
2573
oclCleanupCallback(cl_event e,cl_int,void * p)2574 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
2575 {
2576 UMatData * u = (UMatData *)p;
2577
2578 if( u && CV_XADD(&u->urefcount, -1) == 1 )
2579 u->currAllocator->deallocate(u);
2580 u = 0;
2581
2582 clReleaseEvent(e), e = 0;
2583 }
2584
2585 }
2586
ocl_dft_amdfft(InputArray _src,OutputArray _dst,int flags)2587 static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags)
2588 {
2589 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2590 Size ssize = _src.size();
2591
2592 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
2593 if ( (!doubleSupport && depth == CV_64F) ||
2594 !(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) ||
2595 _src.offset() != 0)
2596 return false;
2597
2598 // if is not a multiplication of prime numbers { 2, 3, 5 }
2599 if (ssize.area() != getOptimalDFTSize(ssize.area()))
2600 return false;
2601
2602 int dst_complex_input = cn == 2 ? 1 : 0;
2603 bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0;
2604 int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0;
2605 bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0;
2606
2607 CV_Assert(dft_complex_output + dft_real_output < 2);
2608 FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1);
2609
2610 switch (fftType)
2611 {
2612 case C2C:
2613 _dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2));
2614 break;
2615 case R2C: // TODO implement it if possible
2616 case C2R: // TODO implement it if possible
2617 case R2R: // AMD Fft does not support this type
2618 default:
2619 return false;
2620 }
2621
2622 UMat src = _src.getUMat(), dst = _dst.getUMat();
2623 bool inplace = src.u == dst.u;
2624
2625 clfftPlanHandle plHandle = PlanCache::getInstance().
2626 getPlanHandle(ssize, (int)src.step, (int)dst.step,
2627 depth == CV_64F, inplace, flags, fftType);
2628
2629 // get the bufferSize
2630 size_t bufferSize = 0;
2631 CLAMDDFT_Assert(clfftGetTmpBufSize(plHandle, &bufferSize))
2632 UMat tmpBuffer(1, (int)bufferSize, CV_8UC1);
2633
2634 cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ);
2635 cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW);
2636
2637 cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
2638 cl_event e = 0;
2639
2640 CLAMDDFT_Assert(clfftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
2641 1, &queue, 0, NULL, &e,
2642 &srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW)))
2643
2644 tmpBuffer.addref();
2645 clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u);
2646 return true;
2647 }
2648
2649 #undef DFT_ASSERT
2650
2651 }
2652
2653 #endif // HAVE_CLAMDFFT
2654
2655 namespace cv
2656 {
2657
2658 template <typename T>
complementComplex(T * ptr,size_t step,int n,int len,int dft_dims)2659 static void complementComplex(T * ptr, size_t step, int n, int len, int dft_dims)
2660 {
2661 T* p0 = (T*)ptr;
2662 size_t dstep = step/sizeof(p0[0]);
2663 for(int i = 0; i < len; i++ )
2664 {
2665 T* p = p0 + dstep*i;
2666 T* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
2667
2668 for( int j = 1; j < (n+1)/2; j++ )
2669 {
2670 p[(n-j)*2] = q[j*2];
2671 p[(n-j)*2+1] = -q[j*2+1];
2672 }
2673 }
2674 }
2675
complementComplexOutput(int depth,uchar * ptr,size_t step,int count,int len,int dft_dims)2676 static void complementComplexOutput(int depth, uchar * ptr, size_t step, int count, int len, int dft_dims)
2677 {
2678 if( depth == CV_32F )
2679 complementComplex((float*)ptr, step, count, len, dft_dims);
2680 else
2681 complementComplex((double*)ptr, step, count, len, dft_dims);
2682 }
2683
2684 enum DftMode {
2685 InvalidDft = 0,
2686 FwdRealToCCS,
2687 FwdRealToComplex,
2688 FwdComplexToComplex,
2689 InvCCSToReal,
2690 InvComplexToReal,
2691 InvComplexToComplex,
2692 };
2693
2694 enum DftDims {
2695 InvalidDim = 0,
2696 OneDim,
2697 OneDimColWise,
2698 TwoDims
2699 };
2700
modeName(DftMode m)2701 inline const char * modeName(DftMode m)
2702 {
2703 switch (m)
2704 {
2705 case InvalidDft: return "InvalidDft";
2706 case FwdRealToCCS: return "FwdRealToCCS";
2707 case FwdRealToComplex: return "FwdRealToComplex";
2708 case FwdComplexToComplex: return "FwdComplexToComplex";
2709 case InvCCSToReal: return "InvCCSToReal";
2710 case InvComplexToReal: return "InvComplexToReal";
2711 case InvComplexToComplex: return "InvComplexToComplex";
2712 }
2713 return 0;
2714 }
2715
dimsName(DftDims d)2716 inline const char * dimsName(DftDims d)
2717 {
2718 switch (d)
2719 {
2720 case InvalidDim: return "InvalidDim";
2721 case OneDim: return "OneDim";
2722 case OneDimColWise: return "OneDimColWise";
2723 case TwoDims: return "TwoDims";
2724 };
2725 return 0;
2726 }
2727
2728 template <typename T>
isInv(T mode)2729 inline bool isInv(T mode)
2730 {
2731 switch ((DftMode)mode)
2732 {
2733 case InvCCSToReal:
2734 case InvComplexToReal:
2735 case InvComplexToComplex: return true;
2736 default: return false;
2737 }
2738 }
2739
determineMode(bool inv,int cn1,int cn2)2740 inline DftMode determineMode(bool inv, int cn1, int cn2)
2741 {
2742 if (!inv)
2743 {
2744 if (cn1 == 1 && cn2 == 1)
2745 return FwdRealToCCS;
2746 else if (cn1 == 1 && cn2 == 2)
2747 return FwdRealToComplex;
2748 else if (cn1 == 2 && cn2 == 2)
2749 return FwdComplexToComplex;
2750 }
2751 else
2752 {
2753 if (cn1 == 1 && cn2 == 1)
2754 return InvCCSToReal;
2755 else if (cn1 == 2 && cn2 == 1)
2756 return InvComplexToReal;
2757 else if (cn1 == 2 && cn2 == 2)
2758 return InvComplexToComplex;
2759 }
2760 return InvalidDft;
2761 }
2762
2763
determineDims(int rows,int cols,bool isRowWise,bool isContinuous)2764 inline DftDims determineDims(int rows, int cols, bool isRowWise, bool isContinuous)
2765 {
2766 // printf("%d x %d (%d, %d)\n", rows, cols, isRowWise, isContinuous);
2767 if (isRowWise)
2768 return OneDim;
2769 if (cols == 1 && rows > 1) // one-column-shaped input
2770 {
2771 if (isContinuous)
2772 return OneDim;
2773 else
2774 return OneDimColWise;
2775 }
2776 if (rows == 1)
2777 return OneDim;
2778 if (cols > 1 && rows > 1)
2779 return TwoDims;
2780 return InvalidDim;
2781 }
2782
2783 class OcvDftImpl CV_FINAL : public hal::DFT2D
2784 {
2785 protected:
2786 Ptr<hal::DFT1D> contextA;
2787 Ptr<hal::DFT1D> contextB;
2788 bool needBufferA;
2789 bool needBufferB;
2790 bool inv;
2791 int width;
2792 int height;
2793 DftMode mode;
2794 int elem_size;
2795 int complex_elem_size;
2796 int depth;
2797 bool real_transform;
2798 int nonzero_rows;
2799 bool isRowTransform;
2800 bool isScaled;
2801 std::vector<int> stages;
2802 bool useIpp;
2803 int src_channels;
2804 int dst_channels;
2805
2806 AutoBuffer<uchar> tmp_bufA;
2807 AutoBuffer<uchar> tmp_bufB;
2808 AutoBuffer<uchar> buf0;
2809 AutoBuffer<uchar> buf1;
2810
2811 public:
OcvDftImpl()2812 OcvDftImpl()
2813 {
2814 needBufferA = false;
2815 needBufferB = false;
2816 inv = false;
2817 width = 0;
2818 height = 0;
2819 mode = InvalidDft;
2820 elem_size = 0;
2821 complex_elem_size = 0;
2822 depth = 0;
2823 real_transform = false;
2824 nonzero_rows = 0;
2825 isRowTransform = false;
2826 isScaled = false;
2827 useIpp = false;
2828 src_channels = 0;
2829 dst_channels = 0;
2830 }
2831
init(int _width,int _height,int _depth,int _src_channels,int _dst_channels,int flags,int _nonzero_rows)2832 void init(int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows)
2833 {
2834 bool isComplex = _src_channels != _dst_channels;
2835 nonzero_rows = _nonzero_rows;
2836 width = _width;
2837 height = _height;
2838 depth = _depth;
2839 src_channels = _src_channels;
2840 dst_channels = _dst_channels;
2841 bool isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
2842 bool isInplace = (flags & CV_HAL_DFT_IS_INPLACE) != 0;
2843 bool isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0;
2844 mode = determineMode(isInverse, _src_channels, _dst_channels);
2845 inv = isInverse;
2846 isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0;
2847 isScaled = (flags & CV_HAL_DFT_SCALE) != 0;
2848 needBufferA = false;
2849 needBufferB = false;
2850 real_transform = (mode != FwdComplexToComplex && mode != InvComplexToComplex);
2851
2852 elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
2853 complex_elem_size = elem_size * 2;
2854 if( !real_transform )
2855 elem_size = complex_elem_size;
2856
2857 #if defined USE_IPP_DFT
2858 CV_IPP_CHECK()
2859 {
2860 if (nonzero_rows == 0 && depth == CV_32F && ((width * height)>(int)(1<<6)))
2861 {
2862 if (mode == FwdComplexToComplex || mode == InvComplexToComplex || mode == FwdRealToCCS || mode == InvCCSToReal)
2863 {
2864 useIpp = true;
2865 return;
2866 }
2867 }
2868 }
2869 #endif
2870
2871 DftDims dims = determineDims(height, width, isRowTransform, isContinuous);
2872 if (dims == TwoDims)
2873 {
2874 stages.resize(2);
2875 if (mode == InvCCSToReal || mode == InvComplexToReal)
2876 {
2877 stages[0] = 1;
2878 stages[1] = 0;
2879 }
2880 else
2881 {
2882 stages[0] = 0;
2883 stages[1] = 1;
2884 }
2885 }
2886 else
2887 {
2888 stages.resize(1);
2889 if (dims == OneDimColWise)
2890 stages[0] = 1;
2891 else
2892 stages[0] = 0;
2893 }
2894
2895 for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex)
2896 {
2897 if (stageIndex == 1)
2898 {
2899 isInplace = true;
2900 isComplex = false;
2901 }
2902
2903 int stage = stages[stageIndex];
2904 bool isLastStage = (stageIndex + 1 == stages.size());
2905
2906 int len, count;
2907
2908 int f = 0;
2909 if (inv)
2910 f |= CV_HAL_DFT_INVERSE;
2911 if (isScaled)
2912 f |= CV_HAL_DFT_SCALE;
2913 if (isRowTransform)
2914 f |= CV_HAL_DFT_ROWS;
2915 if (isComplex)
2916 f |= CV_HAL_DFT_COMPLEX_OUTPUT;
2917 if (real_transform)
2918 f |= CV_HAL_DFT_REAL_OUTPUT;
2919 if (!isLastStage)
2920 f |= CV_HAL_DFT_TWO_STAGE;
2921
2922 if( stage == 0 ) // row-wise transform
2923 {
2924 if (width == 1 && !isRowTransform )
2925 {
2926 len = height;
2927 count = width;
2928 }
2929 else
2930 {
2931 len = width;
2932 count = height;
2933 }
2934 needBufferA = isInplace;
2935 contextA = hal::DFT1D::create(len, count, depth, f, &needBufferA);
2936 if (needBufferA)
2937 tmp_bufA.allocate(len * complex_elem_size);
2938 }
2939 else
2940 {
2941 len = height;
2942 count = width;
2943 f |= CV_HAL_DFT_STAGE_COLS;
2944 needBufferB = isInplace;
2945 contextB = hal::DFT1D::create(len, count, depth, f, &needBufferB);
2946 if (needBufferB)
2947 tmp_bufB.allocate(len * complex_elem_size);
2948
2949 buf0.allocate(len * complex_elem_size);
2950 buf1.allocate(len * complex_elem_size);
2951 }
2952 }
2953 }
2954
apply(const uchar * src,size_t src_step,uchar * dst,size_t dst_step)2955 void apply(const uchar * src, size_t src_step, uchar * dst, size_t dst_step) CV_OVERRIDE
2956 {
2957 #if defined USE_IPP_DFT
2958 if (useIpp)
2959 {
2960 int ipp_norm_flag = !isScaled ? 8 : inv ? 2 : 1;
2961 if (!isRowTransform)
2962 {
2963 if (mode == FwdComplexToComplex || mode == InvComplexToComplex)
2964 {
2965 if (ippi_DFT_C_32F(src, src_step, dst, dst_step, width, height, inv, ipp_norm_flag))
2966 {
2967 CV_IMPL_ADD(CV_IMPL_IPP);
2968 return;
2969 }
2970 setIppErrorStatus();
2971 }
2972 else if (mode == FwdRealToCCS || mode == InvCCSToReal)
2973 {
2974 if (ippi_DFT_R_32F(src, src_step, dst, dst_step, width, height, inv, ipp_norm_flag))
2975 {
2976 CV_IMPL_ADD(CV_IMPL_IPP);
2977 return;
2978 }
2979 setIppErrorStatus();
2980 }
2981 }
2982 else
2983 {
2984 if (mode == FwdComplexToComplex || mode == InvComplexToComplex)
2985 {
2986 ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R;
2987 if (Dft_C_IPPLoop(src, src_step, dst, dst_step, width, height, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag))
2988 {
2989 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2990 return;
2991 }
2992 setIppErrorStatus();
2993 }
2994 else if (mode == FwdRealToCCS || mode == InvCCSToReal)
2995 {
2996 ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R;
2997 if (Dft_R_IPPLoop(src, src_step, dst, dst_step, width, height, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag))
2998 {
2999 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
3000 return;
3001 }
3002 setIppErrorStatus();
3003 }
3004 }
3005 return;
3006 }
3007 #endif
3008
3009 for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex)
3010 {
3011 int stage_src_channels = src_channels;
3012 int stage_dst_channels = dst_channels;
3013
3014 if (stageIndex == 1)
3015 {
3016 src = dst;
3017 src_step = dst_step;
3018 stage_src_channels = stage_dst_channels;
3019 }
3020
3021 int stage = stages[stageIndex];
3022 bool isLastStage = (stageIndex + 1 == stages.size());
3023 bool isComplex = stage_src_channels != stage_dst_channels;
3024
3025 if( stage == 0 )
3026 rowDft(src, src_step, dst, dst_step, isComplex, isLastStage);
3027 else
3028 colDft(src, src_step, dst, dst_step, stage_src_channels, stage_dst_channels, isLastStage);
3029 }
3030 }
3031
3032 protected:
3033
rowDft(const uchar * src_data,size_t src_step,uchar * dst_data,size_t dst_step,bool isComplex,bool isLastStage)3034 void rowDft(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, bool isComplex, bool isLastStage)
3035 {
3036 int len, count;
3037 if (width == 1 && !isRowTransform )
3038 {
3039 len = height;
3040 count = width;
3041 }
3042 else
3043 {
3044 len = width;
3045 count = height;
3046 }
3047 int dptr_offset = 0;
3048 int dst_full_len = len*elem_size;
3049
3050 if( needBufferA )
3051 {
3052 if (mode == FwdRealToCCS && (len & 1) && len > 1)
3053 dptr_offset = elem_size;
3054 }
3055
3056 if( !inv && isComplex )
3057 dst_full_len += (len & 1) ? elem_size : complex_elem_size;
3058
3059 int nz = nonzero_rows;
3060 if( nz <= 0 || nz > count )
3061 nz = count;
3062
3063 int i;
3064 for( i = 0; i < nz; i++ )
3065 {
3066 const uchar* sptr = src_data + src_step * i;
3067 uchar* dptr0 = dst_data + dst_step * i;
3068 uchar* dptr = dptr0;
3069
3070 if( needBufferA )
3071 dptr = tmp_bufA.data();
3072
3073 contextA->apply(sptr, dptr);
3074
3075 if( needBufferA )
3076 memcpy( dptr0, dptr + dptr_offset, dst_full_len );
3077 }
3078
3079 for( ; i < count; i++ )
3080 {
3081 uchar* dptr0 = dst_data + dst_step * i;
3082 memset( dptr0, 0, dst_full_len );
3083 }
3084 if(isLastStage && mode == FwdRealToComplex)
3085 complementComplexOutput(depth, dst_data, dst_step, len, nz, 1);
3086 }
3087
colDft(const uchar * src_data,size_t src_step,uchar * dst_data,size_t dst_step,int stage_src_channels,int stage_dst_channels,bool isLastStage)3088 void colDft(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage)
3089 {
3090 int len = height;
3091 int count = width;
3092 int a = 0, b = count;
3093 uchar *dbuf0, *dbuf1;
3094 const uchar* sptr0 = src_data;
3095 uchar* dptr0 = dst_data;
3096
3097 dbuf0 = buf0.data(), dbuf1 = buf1.data();
3098
3099 if( needBufferB )
3100 {
3101 dbuf1 = tmp_bufB.data();
3102 dbuf0 = buf1.data();
3103 }
3104
3105 if( real_transform )
3106 {
3107 int even;
3108 a = 1;
3109 even = (count & 1) == 0;
3110 b = (count+1)/2;
3111 if( !inv )
3112 {
3113 memset( buf0.data(), 0, len*complex_elem_size );
3114 CopyColumn( sptr0, src_step, buf0.data(), complex_elem_size, len, elem_size );
3115 sptr0 += stage_dst_channels*elem_size;
3116 if( even )
3117 {
3118 memset( buf1.data(), 0, len*complex_elem_size );
3119 CopyColumn( sptr0 + (count-2)*elem_size, src_step,
3120 buf1.data(), complex_elem_size, len, elem_size );
3121 }
3122 }
3123 else if( stage_src_channels == 1 )
3124 {
3125 CopyColumn( sptr0, src_step, buf0.data(), elem_size, len, elem_size );
3126 ExpandCCS( buf0.data(), len, elem_size );
3127 if( even )
3128 {
3129 CopyColumn( sptr0 + (count-1)*elem_size, src_step,
3130 buf1.data(), elem_size, len, elem_size );
3131 ExpandCCS( buf1.data(), len, elem_size );
3132 }
3133 sptr0 += elem_size;
3134 }
3135 else
3136 {
3137 CopyColumn( sptr0, src_step, buf0.data(), complex_elem_size, len, complex_elem_size );
3138 if( even )
3139 {
3140 CopyColumn( sptr0 + b*complex_elem_size, src_step,
3141 buf1.data(), complex_elem_size, len, complex_elem_size );
3142 }
3143 sptr0 += complex_elem_size;
3144 }
3145
3146 if( even )
3147 contextB->apply(buf1.data(), dbuf1);
3148 contextB->apply(buf0.data(), dbuf0);
3149
3150 if( stage_dst_channels == 1 )
3151 {
3152 if( !inv )
3153 {
3154 // copy the half of output vector to the first/last column.
3155 // before doing that, defgragment the vector
3156 memcpy( dbuf0 + elem_size, dbuf0, elem_size );
3157 CopyColumn( dbuf0 + elem_size, elem_size, dptr0,
3158 dst_step, len, elem_size );
3159 if( even )
3160 {
3161 memcpy( dbuf1 + elem_size, dbuf1, elem_size );
3162 CopyColumn( dbuf1 + elem_size, elem_size,
3163 dptr0 + (count-1)*elem_size,
3164 dst_step, len, elem_size );
3165 }
3166 dptr0 += elem_size;
3167 }
3168 else
3169 {
3170 // copy the real part of the complex vector to the first/last column
3171 CopyColumn( dbuf0, complex_elem_size, dptr0, dst_step, len, elem_size );
3172 if( even )
3173 CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size,
3174 dst_step, len, elem_size );
3175 dptr0 += elem_size;
3176 }
3177 }
3178 else
3179 {
3180 assert( !inv );
3181 CopyColumn( dbuf0, complex_elem_size, dptr0,
3182 dst_step, len, complex_elem_size );
3183 if( even )
3184 CopyColumn( dbuf1, complex_elem_size,
3185 dptr0 + b*complex_elem_size,
3186 dst_step, len, complex_elem_size );
3187 dptr0 += complex_elem_size;
3188 }
3189 }
3190
3191 for(int i = a; i < b; i += 2 )
3192 {
3193 if( i+1 < b )
3194 {
3195 CopyFrom2Columns( sptr0, src_step, buf0.data(), buf1.data(), len, complex_elem_size );
3196 contextB->apply(buf1.data(), dbuf1);
3197 }
3198 else
3199 CopyColumn( sptr0, src_step, buf0.data(), complex_elem_size, len, complex_elem_size );
3200
3201 contextB->apply(buf0.data(), dbuf0);
3202
3203 if( i+1 < b )
3204 CopyTo2Columns( dbuf0, dbuf1, dptr0, dst_step, len, complex_elem_size );
3205 else
3206 CopyColumn( dbuf0, complex_elem_size, dptr0, dst_step, len, complex_elem_size );
3207 sptr0 += 2*complex_elem_size;
3208 dptr0 += 2*complex_elem_size;
3209 }
3210 if(isLastStage && mode == FwdRealToComplex)
3211 complementComplexOutput(depth, dst_data, dst_step, count, len, 2);
3212 }
3213 };
3214
3215 class OcvDftBasicImpl CV_FINAL : public hal::DFT1D
3216 {
3217 public:
3218 OcvDftOptions opt;
3219 int _factors[34];
3220 AutoBuffer<uchar> wave_buf;
3221 AutoBuffer<int> itab_buf;
3222 #ifdef USE_IPP_DFT
3223 AutoBuffer<uchar> ippbuf;
3224 AutoBuffer<uchar> ippworkbuf;
3225 #endif
3226
3227 public:
OcvDftBasicImpl()3228 OcvDftBasicImpl()
3229 {
3230 opt.factors = _factors;
3231 }
init(int len,int count,int depth,int flags,bool * needBuffer)3232 void init(int len, int count, int depth, int flags, bool *needBuffer)
3233 {
3234 int prev_len = opt.n;
3235
3236 int stage = (flags & CV_HAL_DFT_STAGE_COLS) != 0 ? 1 : 0;
3237 int complex_elem_size = depth == CV_32F ? sizeof(Complex<float>) : sizeof(Complex<double>);
3238 opt.isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
3239 bool real_transform = (flags & CV_HAL_DFT_REAL_OUTPUT) != 0;
3240 opt.isComplex = (stage == 0) && (flags & CV_HAL_DFT_COMPLEX_OUTPUT) != 0;
3241 bool needAnotherStage = (flags & CV_HAL_DFT_TWO_STAGE) != 0;
3242
3243 opt.scale = 1;
3244 opt.tab_size = len;
3245 opt.n = len;
3246
3247 opt.useIpp = false;
3248 #ifdef USE_IPP_DFT
3249 opt.ipp_spec = 0;
3250 opt.ipp_work = 0;
3251
3252 if( CV_IPP_CHECK_COND && (opt.n*count >= 64) ) // use IPP DFT if available
3253 {
3254 int ipp_norm_flag = (flags & CV_HAL_DFT_SCALE) == 0 ? 8 : opt.isInverse ? 2 : 1;
3255 int specsize=0, initsize=0, worksize=0;
3256 IppDFTGetSizeFunc getSizeFunc = 0;
3257 IppDFTInitFunc initFunc = 0;
3258
3259 if( real_transform && stage == 0 )
3260 {
3261 if( depth == CV_32F )
3262 {
3263 getSizeFunc = ippsDFTGetSize_R_32f;
3264 initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
3265 }
3266 else
3267 {
3268 getSizeFunc = ippsDFTGetSize_R_64f;
3269 initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
3270 }
3271 }
3272 else
3273 {
3274 if( depth == CV_32F )
3275 {
3276 getSizeFunc = ippsDFTGetSize_C_32fc;
3277 initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
3278 }
3279 else
3280 {
3281 getSizeFunc = ippsDFTGetSize_C_64fc;
3282 initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
3283 }
3284 }
3285 if( getSizeFunc(opt.n, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
3286 {
3287 ippbuf.allocate(specsize + initsize + 64);
3288 opt.ipp_spec = alignPtr(&ippbuf[0], 32);
3289 ippworkbuf.allocate(worksize + 32);
3290 opt.ipp_work = alignPtr(&ippworkbuf[0], 32);
3291 uchar* initbuf = alignPtr((uchar*)opt.ipp_spec + specsize, 32);
3292 if( initFunc(opt.n, ipp_norm_flag, ippAlgHintNone, opt.ipp_spec, initbuf) >= 0 )
3293 opt.useIpp = true;
3294 }
3295 else
3296 setIppErrorStatus();
3297 }
3298 #endif
3299
3300 if (!opt.useIpp)
3301 {
3302 if (len != prev_len)
3303 {
3304 opt.nf = DFTFactorize( opt.n, opt.factors );
3305 }
3306 bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1];
3307 if (len != prev_len || (!inplace_transform && opt.isInverse && real_transform))
3308 {
3309 wave_buf.allocate(opt.n*complex_elem_size);
3310 opt.wave = wave_buf.data();
3311 itab_buf.allocate(opt.n);
3312 opt.itab = itab_buf.data();
3313 DFTInit( opt.n, opt.nf, opt.factors, opt.itab, complex_elem_size,
3314 opt.wave, stage == 0 && opt.isInverse && real_transform );
3315 }
3316 // otherwise reuse the tables calculated on the previous stage
3317 if (needBuffer)
3318 {
3319 if( (stage == 0 && ((*needBuffer && !inplace_transform) || (real_transform && (len & 1)))) ||
3320 (stage == 1 && !inplace_transform) )
3321 {
3322 *needBuffer = true;
3323 }
3324 }
3325 }
3326 else
3327 {
3328 if (needBuffer)
3329 {
3330 *needBuffer = false;
3331 }
3332 }
3333
3334 {
3335 static DFTFunc dft_tbl[6] =
3336 {
3337 (DFTFunc)DFT_32f,
3338 (DFTFunc)RealDFT_32f,
3339 (DFTFunc)CCSIDFT_32f,
3340 (DFTFunc)DFT_64f,
3341 (DFTFunc)RealDFT_64f,
3342 (DFTFunc)CCSIDFT_64f
3343 };
3344 int idx = 0;
3345 if (stage == 0)
3346 {
3347 if (real_transform)
3348 {
3349 if (!opt.isInverse)
3350 idx = 1;
3351 else
3352 idx = 2;
3353 }
3354 }
3355 if (depth == CV_64F)
3356 idx += 3;
3357
3358 opt.dft_func = dft_tbl[idx];
3359 }
3360
3361 if(!needAnotherStage && (flags & CV_HAL_DFT_SCALE) != 0)
3362 {
3363 int rowCount = count;
3364 if (stage == 0 && (flags & CV_HAL_DFT_ROWS) != 0)
3365 rowCount = 1;
3366 opt.scale = 1./(len * rowCount);
3367 }
3368 }
3369
apply(const uchar * src,uchar * dst)3370 void apply(const uchar *src, uchar *dst) CV_OVERRIDE
3371 {
3372 opt.dft_func(opt, src, dst);
3373 }
3374
free()3375 void free() {}
3376 };
3377
3378 struct ReplacementDFT1D : public hal::DFT1D
3379 {
3380 cvhalDFT *context;
3381 bool isInitialized;
3382
ReplacementDFT1Dcv::ReplacementDFT1D3383 ReplacementDFT1D() : context(0), isInitialized(false) {}
initcv::ReplacementDFT1D3384 bool init(int len, int count, int depth, int flags, bool *needBuffer)
3385 {
3386 int res = cv_hal_dftInit1D(&context, len, count, depth, flags, needBuffer);
3387 isInitialized = (res == CV_HAL_ERROR_OK);
3388 return isInitialized;
3389 }
applycv::ReplacementDFT1D3390 void apply(const uchar *src, uchar *dst) CV_OVERRIDE
3391 {
3392 if (isInitialized)
3393 {
3394 CALL_HAL(dft1D, cv_hal_dft1D, context, src, dst);
3395 }
3396 }
~ReplacementDFT1Dcv::ReplacementDFT1D3397 ~ReplacementDFT1D()
3398 {
3399 if (isInitialized)
3400 {
3401 CALL_HAL(dftFree1D, cv_hal_dftFree1D, context);
3402 }
3403 }
3404 };
3405
3406 struct ReplacementDFT2D : public hal::DFT2D
3407 {
3408 cvhalDFT *context;
3409 bool isInitialized;
3410
ReplacementDFT2Dcv::ReplacementDFT2D3411 ReplacementDFT2D() : context(0), isInitialized(false) {}
initcv::ReplacementDFT2D3412 bool init(int width, int height, int depth,
3413 int src_channels, int dst_channels,
3414 int flags, int nonzero_rows)
3415 {
3416 int res = cv_hal_dftInit2D(&context, width, height, depth, src_channels, dst_channels, flags, nonzero_rows);
3417 isInitialized = (res == CV_HAL_ERROR_OK);
3418 return isInitialized;
3419 }
applycv::ReplacementDFT2D3420 void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step) CV_OVERRIDE
3421 {
3422 if (isInitialized)
3423 {
3424 CALL_HAL(dft2D, cv_hal_dft2D, context, src, src_step, dst, dst_step);
3425 }
3426 }
~ReplacementDFT2Dcv::ReplacementDFT2D3427 ~ReplacementDFT2D()
3428 {
3429 if (isInitialized)
3430 {
3431 CALL_HAL(dftFree2D, cv_hal_dftFree1D, context);
3432 }
3433 }
3434 };
3435
3436 namespace hal {
3437
3438 //================== 1D ======================
3439
create(int len,int count,int depth,int flags,bool * needBuffer)3440 Ptr<DFT1D> DFT1D::create(int len, int count, int depth, int flags, bool *needBuffer)
3441 {
3442 {
3443 ReplacementDFT1D *impl = new ReplacementDFT1D();
3444 if (impl->init(len, count, depth, flags, needBuffer))
3445 {
3446 return Ptr<DFT1D>(impl);
3447 }
3448 delete impl;
3449 }
3450 {
3451 OcvDftBasicImpl *impl = new OcvDftBasicImpl();
3452 impl->init(len, count, depth, flags, needBuffer);
3453 return Ptr<DFT1D>(impl);
3454 }
3455 }
3456
3457 //================== 2D ======================
3458
create(int width,int height,int depth,int src_channels,int dst_channels,int flags,int nonzero_rows)3459 Ptr<DFT2D> DFT2D::create(int width, int height, int depth,
3460 int src_channels, int dst_channels,
3461 int flags, int nonzero_rows)
3462 {
3463 {
3464 ReplacementDFT2D *impl = new ReplacementDFT2D();
3465 if (impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows))
3466 {
3467 return Ptr<DFT2D>(impl);
3468 }
3469 delete impl;
3470 }
3471 {
3472 if(width == 1 && nonzero_rows > 0 )
3473 {
3474 CV_Error( CV_StsNotImplemented,
3475 "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
3476 "For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
3477 }
3478 OcvDftImpl *impl = new OcvDftImpl();
3479 impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows);
3480 return Ptr<DFT2D>(impl);
3481 }
3482 }
3483
3484 } // cv::hal::
3485 } // cv::
3486
3487
dft(InputArray _src0,OutputArray _dst,int flags,int nonzero_rows)3488 void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
3489 {
3490 CV_INSTRUMENT_REGION();
3491
3492 #ifdef HAVE_CLAMDFFT
3493 CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
3494 _dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0,
3495 ocl_dft_amdfft(_src0, _dst, flags))
3496 #endif
3497
3498 #ifdef HAVE_OPENCL
3499 CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2,
3500 ocl_dft(_src0, _dst, flags, nonzero_rows))
3501 #endif
3502
3503 Mat src0 = _src0.getMat(), src = src0;
3504 bool inv = (flags & DFT_INVERSE) != 0;
3505 int type = src.type();
3506 int depth = src.depth();
3507
3508 CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
3509
3510 // Fail if DFT_COMPLEX_INPUT is specified, but src is not 2 channels.
3511 CV_Assert( !((flags & DFT_COMPLEX_INPUT) && src.channels() != 2) );
3512
3513 if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
3514 _dst.create( src.size(), CV_MAKETYPE(depth, 2) );
3515 else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
3516 _dst.create( src.size(), depth );
3517 else
3518 _dst.create( src.size(), type );
3519
3520 Mat dst = _dst.getMat();
3521
3522 int f = 0;
3523 if (src.isContinuous() && dst.isContinuous())
3524 f |= CV_HAL_DFT_IS_CONTINUOUS;
3525 if (inv)
3526 f |= CV_HAL_DFT_INVERSE;
3527 if (flags & DFT_ROWS)
3528 f |= CV_HAL_DFT_ROWS;
3529 if (flags & DFT_SCALE)
3530 f |= CV_HAL_DFT_SCALE;
3531 if (src.data == dst.data)
3532 f |= CV_HAL_DFT_IS_INPLACE;
3533 Ptr<hal::DFT2D> c = hal::DFT2D::create(src.cols, src.rows, depth, src.channels(), dst.channels(), f, nonzero_rows);
3534 c->apply(src.data, src.step, dst.data, dst.step);
3535 }
3536
3537
idft(InputArray src,OutputArray dst,int flags,int nonzero_rows)3538 void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows )
3539 {
3540 CV_INSTRUMENT_REGION();
3541
3542 dft( src, dst, flags | DFT_INVERSE, nonzero_rows );
3543 }
3544
3545 #ifdef HAVE_OPENCL
3546
3547 namespace cv {
3548
ocl_mulSpectrums(InputArray _srcA,InputArray _srcB,OutputArray _dst,int flags,bool conjB)3549 static bool ocl_mulSpectrums( InputArray _srcA, InputArray _srcB,
3550 OutputArray _dst, int flags, bool conjB )
3551 {
3552 int atype = _srcA.type(), btype = _srcB.type(),
3553 rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
3554 Size asize = _srcA.size(), bsize = _srcB.size();
3555 CV_Assert(asize == bsize);
3556
3557 if ( !(atype == CV_32FC2 && btype == CV_32FC2) || flags != 0 )
3558 return false;
3559
3560 UMat A = _srcA.getUMat(), B = _srcB.getUMat();
3561 CV_Assert(A.size() == B.size());
3562
3563 _dst.create(A.size(), atype);
3564 UMat dst = _dst.getUMat();
3565
3566 ocl::Kernel k("mulAndScaleSpectrums",
3567 ocl::core::mulspectrums_oclsrc,
3568 format("%s", conjB ? "-D CONJ" : ""));
3569 if (k.empty())
3570 return false;
3571
3572 k.args(ocl::KernelArg::ReadOnlyNoSize(A), ocl::KernelArg::ReadOnlyNoSize(B),
3573 ocl::KernelArg::WriteOnly(dst), rowsPerWI);
3574
3575 size_t globalsize[2] = { (size_t)asize.width, ((size_t)asize.height + rowsPerWI - 1) / rowsPerWI };
3576 return k.run(2, globalsize, NULL, false);
3577 }
3578
3579 }
3580
3581 #endif
3582
3583 namespace {
3584
3585 #define VAL(buf, elem) (((T*)((char*)data ## buf + (step ## buf * (elem))))[0])
3586 #define MUL_SPECTRUMS_COL(A, B, C) \
3587 VAL(C, 0) = VAL(A, 0) * VAL(B, 0); \
3588 for (size_t j = 1; j <= rows - 2; j += 2) \
3589 { \
3590 double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
3591 double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
3592 if (conjB) b_im = -b_im; \
3593 double c_re = a_re * b_re - a_im * b_im; \
3594 double c_im = a_re * b_im + a_im * b_re; \
3595 VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
3596 } \
3597 if ((rows & 1) == 0) \
3598 VAL(C, rows-1) = VAL(A, rows-1) * VAL(B, rows-1)
3599
3600 template <typename T, bool conjB> static inline
mulSpectrums_processCol_noinplace(const T * dataA,const T * dataB,T * dataC,size_t stepA,size_t stepB,size_t stepC,size_t rows)3601 void mulSpectrums_processCol_noinplace(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
3602 {
3603 MUL_SPECTRUMS_COL(A, B, C);
3604 }
3605
3606 template <typename T, bool conjB> static inline
mulSpectrums_processCol_inplaceA(const T * dataB,T * dataAC,size_t stepB,size_t stepAC,size_t rows)3607 void mulSpectrums_processCol_inplaceA(const T* dataB, T* dataAC, size_t stepB, size_t stepAC, size_t rows)
3608 {
3609 MUL_SPECTRUMS_COL(AC, B, AC);
3610 }
3611 template <typename T, bool conjB, bool inplaceA> static inline
mulSpectrums_processCol(const T * dataA,const T * dataB,T * dataC,size_t stepA,size_t stepB,size_t stepC,size_t rows)3612 void mulSpectrums_processCol(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
3613 {
3614 if (inplaceA)
3615 mulSpectrums_processCol_inplaceA<T, conjB>(dataB, dataC, stepB, stepC, rows);
3616 else
3617 mulSpectrums_processCol_noinplace<T, conjB>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
3618 }
3619 #undef MUL_SPECTRUMS_COL
3620 #undef VAL
3621
3622 template <typename T, bool conjB, bool inplaceA> static inline
mulSpectrums_processCols(const T * dataA,const T * dataB,T * dataC,size_t stepA,size_t stepB,size_t stepC,size_t rows,size_t cols)3623 void mulSpectrums_processCols(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols)
3624 {
3625 mulSpectrums_processCol<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
3626 if ((cols & 1) == 0)
3627 {
3628 mulSpectrums_processCol<T, conjB, inplaceA>(dataA + cols - 1, dataB + cols - 1, dataC + cols - 1, stepA, stepB, stepC, rows);
3629 }
3630 }
3631
3632 #define VAL(buf, elem) (data ## buf[(elem)])
3633 #define MUL_SPECTRUMS_ROW(A, B, C) \
3634 for (size_t j = j0; j < j1; j += 2) \
3635 { \
3636 double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
3637 double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
3638 if (conjB) b_im = -b_im; \
3639 double c_re = a_re * b_re - a_im * b_im; \
3640 double c_im = a_re * b_im + a_im * b_re; \
3641 VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
3642 }
3643 template <typename T, bool conjB> static inline
mulSpectrums_processRow_noinplace(const T * dataA,const T * dataB,T * dataC,size_t j0,size_t j1)3644 void mulSpectrums_processRow_noinplace(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
3645 {
3646 MUL_SPECTRUMS_ROW(A, B, C);
3647 }
3648 template <typename T, bool conjB> static inline
mulSpectrums_processRow_inplaceA(const T * dataB,T * dataAC,size_t j0,size_t j1)3649 void mulSpectrums_processRow_inplaceA(const T* dataB, T* dataAC, size_t j0, size_t j1)
3650 {
3651 MUL_SPECTRUMS_ROW(AC, B, AC);
3652 }
3653 template <typename T, bool conjB, bool inplaceA> static inline
mulSpectrums_processRow(const T * dataA,const T * dataB,T * dataC,size_t j0,size_t j1)3654 void mulSpectrums_processRow(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
3655 {
3656 if (inplaceA)
3657 mulSpectrums_processRow_inplaceA<T, conjB>(dataB, dataC, j0, j1);
3658 else
3659 mulSpectrums_processRow_noinplace<T, conjB>(dataA, dataB, dataC, j0, j1);
3660 }
3661 #undef MUL_SPECTRUMS_ROW
3662 #undef VAL
3663
3664 template <typename T, bool conjB, bool inplaceA> static inline
mulSpectrums_processRows(const T * dataA,const T * dataB,T * dataC,size_t stepA,size_t stepB,size_t stepC,size_t rows,size_t cols,size_t j0,size_t j1,bool is_1d_CN1)3665 void mulSpectrums_processRows(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d_CN1)
3666 {
3667 while (rows-- > 0)
3668 {
3669 if (is_1d_CN1)
3670 dataC[0] = dataA[0]*dataB[0];
3671 mulSpectrums_processRow<T, conjB, inplaceA>(dataA, dataB, dataC, j0, j1);
3672 if (is_1d_CN1 && (cols & 1) == 0)
3673 dataC[j1] = dataA[j1]*dataB[j1];
3674
3675 dataA = (const T*)(((char*)dataA) + stepA);
3676 dataB = (const T*)(((char*)dataB) + stepB);
3677 dataC = (T*)(((char*)dataC) + stepC);
3678 }
3679 }
3680
3681
3682 template <typename T, bool conjB, bool inplaceA> static inline
mulSpectrums_Impl_(const T * dataA,const T * dataB,T * dataC,size_t stepA,size_t stepB,size_t stepC,size_t rows,size_t cols,size_t j0,size_t j1,bool is_1d,bool isCN1)3683 void mulSpectrums_Impl_(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
3684 {
3685 if (!is_1d && isCN1)
3686 {
3687 mulSpectrums_processCols<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols);
3688 }
3689 mulSpectrums_processRows<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d && isCN1);
3690 }
3691 template <typename T, bool conjB> static inline
mulSpectrums_Impl(const T * dataA,const T * dataB,T * dataC,size_t stepA,size_t stepB,size_t stepC,size_t rows,size_t cols,size_t j0,size_t j1,bool is_1d,bool isCN1)3692 void mulSpectrums_Impl(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
3693 {
3694 if (dataA == dataC)
3695 mulSpectrums_Impl_<T, conjB, true>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
3696 else
3697 mulSpectrums_Impl_<T, conjB, false>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
3698 }
3699
3700 } // namespace
3701
mulSpectrums(InputArray _srcA,InputArray _srcB,OutputArray _dst,int flags,bool conjB)3702 void cv::mulSpectrums( InputArray _srcA, InputArray _srcB,
3703 OutputArray _dst, int flags, bool conjB )
3704 {
3705 CV_INSTRUMENT_REGION();
3706
3707 CV_OCL_RUN(_dst.isUMat() && _srcA.dims() <= 2 && _srcB.dims() <= 2,
3708 ocl_mulSpectrums(_srcA, _srcB, _dst, flags, conjB))
3709
3710 Mat srcA = _srcA.getMat(), srcB = _srcB.getMat();
3711 int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type();
3712 size_t rows = srcA.rows, cols = srcA.cols;
3713
3714 CV_Assert( type == srcB.type() && srcA.size() == srcB.size() );
3715 CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
3716
3717 _dst.create( srcA.rows, srcA.cols, type );
3718 Mat dst = _dst.getMat();
3719
3720 // correct inplace support
3721 // Case 'dst.data == srcA.data' is handled by implementation,
3722 // because it is used frequently (filter2D, matchTemplate)
3723 if (dst.data == srcB.data)
3724 srcB = srcB.clone(); // workaround for B only
3725
3726 bool is_1d = (flags & DFT_ROWS)
3727 || (rows == 1)
3728 || (cols == 1 && srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous());
3729
3730 if( is_1d && !(flags & DFT_ROWS) )
3731 cols = cols + rows - 1, rows = 1;
3732
3733 bool isCN1 = cn == 1;
3734 size_t j0 = isCN1 ? 1 : 0;
3735 size_t j1 = cols*cn - (((cols & 1) == 0 && cn == 1) ? 1 : 0);
3736
3737 if (depth == CV_32F)
3738 {
3739 const float* dataA = srcA.ptr<float>();
3740 const float* dataB = srcB.ptr<float>();
3741 float* dataC = dst.ptr<float>();
3742 if (!conjB)
3743 mulSpectrums_Impl<float, false>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
3744 else
3745 mulSpectrums_Impl<float, true>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
3746 }
3747 else
3748 {
3749 const double* dataA = srcA.ptr<double>();
3750 const double* dataB = srcB.ptr<double>();
3751 double* dataC = dst.ptr<double>();
3752 if (!conjB)
3753 mulSpectrums_Impl<double, false>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
3754 else
3755 mulSpectrums_Impl<double, true>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
3756 }
3757 }
3758
3759
3760 /****************************************************************************************\
3761 Discrete Cosine Transform
3762 \****************************************************************************************/
3763
3764 namespace cv
3765 {
3766
3767 /* DCT is calculated using DFT, as described here:
3768 http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/:
3769 */
3770 template<typename T> static void
DCT(const OcvDftOptions & c,const T * src,size_t src_step,T * dft_src,T * dft_dst,T * dst,size_t dst_step,const Complex<T> * dct_wave)3771 DCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step,
3772 const Complex<T>* dct_wave )
3773 {
3774 static const T sin_45 = (T)0.70710678118654752440084436210485;
3775
3776 int n = c.n;
3777 int j, n2 = n >> 1;
3778
3779 src_step /= sizeof(src[0]);
3780 dst_step /= sizeof(dst[0]);
3781 T* dst1 = dst + (n-1)*dst_step;
3782
3783 if( n == 1 )
3784 {
3785 dst[0] = src[0];
3786 return;
3787 }
3788
3789 for( j = 0; j < n2; j++, src += src_step*2 )
3790 {
3791 dft_src[j] = src[0];
3792 dft_src[n-j-1] = src[src_step];
3793 }
3794
3795 RealDFT(c, dft_src, dft_dst);
3796 src = dft_dst;
3797
3798 dst[0] = (T)(src[0]*dct_wave->re*sin_45);
3799 dst += dst_step;
3800 for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
3801 dst += dst_step, dst1 -= dst_step )
3802 {
3803 T t0 = dct_wave->re*src[j*2-1] - dct_wave->im*src[j*2];
3804 T t1 = -dct_wave->im*src[j*2-1] - dct_wave->re*src[j*2];
3805 dst[0] = t0;
3806 dst1[0] = t1;
3807 }
3808
3809 dst[0] = src[n-1]*dct_wave->re;
3810 }
3811
3812
3813 template<typename T> static void
IDCT(const OcvDftOptions & c,const T * src,size_t src_step,T * dft_src,T * dft_dst,T * dst,size_t dst_step,const Complex<T> * dct_wave)3814 IDCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step,
3815 const Complex<T>* dct_wave)
3816 {
3817 static const T sin_45 = (T)0.70710678118654752440084436210485;
3818 int n = c.n;
3819 int j, n2 = n >> 1;
3820
3821 src_step /= sizeof(src[0]);
3822 dst_step /= sizeof(dst[0]);
3823 const T* src1 = src + (n-1)*src_step;
3824
3825 if( n == 1 )
3826 {
3827 dst[0] = src[0];
3828 return;
3829 }
3830
3831 dft_src[0] = (T)(src[0]*2*dct_wave->re*sin_45);
3832 src += src_step;
3833 for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
3834 src += src_step, src1 -= src_step )
3835 {
3836 T t0 = dct_wave->re*src[0] - dct_wave->im*src1[0];
3837 T t1 = -dct_wave->im*src[0] - dct_wave->re*src1[0];
3838 dft_src[j*2-1] = t0;
3839 dft_src[j*2] = t1;
3840 }
3841
3842 dft_src[n-1] = (T)(src[0]*2*dct_wave->re);
3843 CCSIDFT(c, dft_src, dft_dst);
3844
3845 for( j = 0; j < n2; j++, dst += dst_step*2 )
3846 {
3847 dst[0] = dft_dst[j];
3848 dst[dst_step] = dft_dst[n-j-1];
3849 }
3850 }
3851
3852
3853 static void
DCTInit(int n,int elem_size,void * _wave,int inv)3854 DCTInit( int n, int elem_size, void* _wave, int inv )
3855 {
3856 static const double DctScale[] =
3857 {
3858 0.707106781186547570, 0.500000000000000000, 0.353553390593273790,
3859 0.250000000000000000, 0.176776695296636890, 0.125000000000000000,
3860 0.088388347648318447, 0.062500000000000000, 0.044194173824159223,
3861 0.031250000000000000, 0.022097086912079612, 0.015625000000000000,
3862 0.011048543456039806, 0.007812500000000000, 0.005524271728019903,
3863 0.003906250000000000, 0.002762135864009952, 0.001953125000000000,
3864 0.001381067932004976, 0.000976562500000000, 0.000690533966002488,
3865 0.000488281250000000, 0.000345266983001244, 0.000244140625000000,
3866 0.000172633491500622, 0.000122070312500000, 0.000086316745750311,
3867 0.000061035156250000, 0.000043158372875155, 0.000030517578125000
3868 };
3869
3870 int i;
3871 Complex<double> w, w1;
3872 double t, scale;
3873
3874 if( n == 1 )
3875 return;
3876
3877 assert( (n&1) == 0 );
3878
3879 if( (n & (n - 1)) == 0 )
3880 {
3881 int m;
3882 for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
3883 ;
3884 scale = (!inv ? 2 : 1)*DctScale[m];
3885 w1.re = DFTTab[m+2][0];
3886 w1.im = -DFTTab[m+2][1];
3887 }
3888 else
3889 {
3890 t = 1./(2*n);
3891 scale = (!inv ? 2 : 1)*std::sqrt(t);
3892 w1.im = sin(-CV_PI*t);
3893 w1.re = std::sqrt(1. - w1.im*w1.im);
3894 }
3895 n >>= 1;
3896
3897 if( elem_size == sizeof(Complex<double>) )
3898 {
3899 Complex<double>* wave = (Complex<double>*)_wave;
3900
3901 w.re = scale;
3902 w.im = 0.;
3903
3904 for( i = 0; i <= n; i++ )
3905 {
3906 wave[i] = w;
3907 t = w.re*w1.re - w.im*w1.im;
3908 w.im = w.re*w1.im + w.im*w1.re;
3909 w.re = t;
3910 }
3911 }
3912 else
3913 {
3914 Complex<float>* wave = (Complex<float>*)_wave;
3915 assert( elem_size == sizeof(Complex<float>) );
3916
3917 w.re = (float)scale;
3918 w.im = 0.f;
3919
3920 for( i = 0; i <= n; i++ )
3921 {
3922 wave[i].re = (float)w.re;
3923 wave[i].im = (float)w.im;
3924 t = w.re*w1.re - w.im*w1.im;
3925 w.im = w.re*w1.im + w.im*w1.re;
3926 w.re = t;
3927 }
3928 }
3929 }
3930
3931
3932 typedef void (*DCTFunc)(const OcvDftOptions & c, const void* src, size_t src_step, void* dft_src,
3933 void* dft_dst, void* dst, size_t dst_step, const void* dct_wave);
3934
DCT_32f(const OcvDftOptions & c,const float * src,size_t src_step,float * dft_src,float * dft_dst,float * dst,size_t dst_step,const Complexf * dct_wave)3935 static void DCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst,
3936 float* dst, size_t dst_step, const Complexf* dct_wave)
3937 {
3938 DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3939 }
3940
IDCT_32f(const OcvDftOptions & c,const float * src,size_t src_step,float * dft_src,float * dft_dst,float * dst,size_t dst_step,const Complexf * dct_wave)3941 static void IDCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst,
3942 float* dst, size_t dst_step, const Complexf* dct_wave)
3943 {
3944 IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3945 }
3946
DCT_64f(const OcvDftOptions & c,const double * src,size_t src_step,double * dft_src,double * dft_dst,double * dst,size_t dst_step,const Complexd * dct_wave)3947 static void DCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst,
3948 double* dst, size_t dst_step, const Complexd* dct_wave)
3949 {
3950 DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3951 }
3952
IDCT_64f(const OcvDftOptions & c,const double * src,size_t src_step,double * dft_src,double * dft_dst,double * dst,size_t dst_step,const Complexd * dct_wave)3953 static void IDCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst,
3954 double* dst, size_t dst_step, const Complexd* dct_wave)
3955 {
3956 IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3957 }
3958
3959 }
3960
3961 #ifdef HAVE_IPP
3962 namespace cv
3963 {
3964
3965 #if IPP_VERSION_X100 >= 900
3966 typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer);
3967 typedef IppStatus (CV_STDCALL * ippiDCTInit)(void* pDCTSpec, IppiSize roiSize, Ipp8u* pMemInit );
3968 typedef IppStatus (CV_STDCALL * ippiDCTGetSize)(IppiSize roiSize, int* pSizeSpec, int* pSizeInit, int* pSizeBuf);
3969 #elif IPP_VERSION_X100 >= 700
3970 typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f*, int, Ipp32f*, int, const void*, Ipp8u*);
3971 typedef IppStatus (CV_STDCALL * ippiDCTInitAlloc)(void**, IppiSize, IppHintAlgorithm);
3972 typedef IppStatus (CV_STDCALL * ippiDCTFree)(void* pDCTSpec);
3973 typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*);
3974 #endif
3975
3976 class DctIPPLoop_Invoker : public ParallelLoopBody
3977 {
3978 public:
DctIPPLoop_Invoker(const uchar * _src,size_t _src_step,uchar * _dst,size_t _dst_step,int _width,bool _inv,bool * _ok)3979 DctIPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width, bool _inv, bool *_ok) :
3980 ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), inv(_inv), ok(_ok)
3981 {
3982 *ok = true;
3983 }
3984
operator ()(const Range & range) const3985 virtual void operator()(const Range& range) const CV_OVERRIDE
3986 {
3987 if(*ok == false)
3988 return;
3989
3990 #if IPP_VERSION_X100 >= 900
3991 IppiSize srcRoiSize = {width, 1};
3992
3993 int specSize = 0;
3994 int initSize = 0;
3995 int bufferSize = 0;
3996
3997 Ipp8u* pDCTSpec = NULL;
3998 Ipp8u* pBuffer = NULL;
3999 Ipp8u* pInitBuf = NULL;
4000
4001 #define IPP_RETURN \
4002 if(pDCTSpec) \
4003 ippFree(pDCTSpec); \
4004 if(pBuffer) \
4005 ippFree(pBuffer); \
4006 if(pInitBuf) \
4007 ippFree(pInitBuf); \
4008 return;
4009
4010 ippiDCTFunc ippiDCT_32f_C1R = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4011 ippiDCTInit ippDctInit = inv ? (ippiDCTInit)ippiDCTInvInit_32f : (ippiDCTInit)ippiDCTFwdInit_32f;
4012 ippiDCTGetSize ippDctGetSize = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
4013
4014 if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < 0)
4015 {
4016 *ok = false;
4017 return;
4018 }
4019
4020 pDCTSpec = (Ipp8u*)CV_IPP_MALLOC(specSize);
4021 if(!pDCTSpec && specSize)
4022 {
4023 *ok = false;
4024 return;
4025 }
4026
4027 pBuffer = (Ipp8u*)CV_IPP_MALLOC(bufferSize);
4028 if(!pBuffer && bufferSize)
4029 {
4030 *ok = false;
4031 IPP_RETURN
4032 }
4033 pInitBuf = (Ipp8u*)CV_IPP_MALLOC(initSize);
4034 if(!pInitBuf && initSize)
4035 {
4036 *ok = false;
4037 IPP_RETURN
4038 }
4039
4040 if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < 0)
4041 {
4042 *ok = false;
4043 IPP_RETURN
4044 }
4045
4046 for(int i = range.start; i < range.end; ++i)
4047 {
4048 if(CV_INSTRUMENT_FUN_IPP(ippiDCT_32f_C1R, (float*)(src + src_step * i), static_cast<int>(src_step), (float*)(dst + dst_step * i), static_cast<int>(dst_step), pDCTSpec, pBuffer) < 0)
4049 {
4050 *ok = false;
4051 IPP_RETURN
4052 }
4053 }
4054 IPP_RETURN
4055 #undef IPP_RETURN
4056 #elif IPP_VERSION_X100 >= 700
4057 void* pDCTSpec;
4058 AutoBuffer<uchar> buf;
4059 uchar* pBuffer = 0;
4060 int bufSize=0;
4061
4062 IppiSize srcRoiSize = {width, 1};
4063
4064 CV_SUPPRESS_DEPRECATED_START
4065
4066 ippiDCTFunc ippDctFun = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4067 ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
4068 ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f;
4069 ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
4070
4071 if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
4072 {
4073 buf.allocate( bufSize );
4074 pBuffer = (uchar*)buf;
4075
4076 for( int i = range.start; i < range.end; ++i)
4077 {
4078 if(ippDctFun((float*)(src + src_step * i), static_cast<int>(src_step), (float*)(dst + dst_step * i), static_cast<int>(dst_step), pDCTSpec, (Ipp8u*)pBuffer) < 0)
4079 {
4080 *ok = false;
4081 break;
4082 }
4083 }
4084 }
4085 else
4086 *ok = false;
4087
4088 if (pDCTSpec)
4089 ippFree(pDCTSpec);
4090
4091 CV_SUPPRESS_DEPRECATED_END
4092 #else
4093 CV_UNUSED(range);
4094 *ok = false;
4095 #endif
4096 }
4097
4098 private:
4099 const uchar * src;
4100 size_t src_step;
4101 uchar * dst;
4102 size_t dst_step;
4103 int width;
4104 bool inv;
4105 bool *ok;
4106 };
4107
DctIPPLoop(const uchar * src,size_t src_step,uchar * dst,size_t dst_step,int width,int height,bool inv)4108 static bool DctIPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv)
4109 {
4110 bool ok;
4111 parallel_for_(Range(0, height), DctIPPLoop_Invoker(src, src_step, dst, dst_step, width, inv, &ok), height/(double)(1<<4) );
4112 return ok;
4113 }
4114
ippi_DCT_32f(const uchar * src,size_t src_step,uchar * dst,size_t dst_step,int width,int height,bool inv,bool row)4115 static bool ippi_DCT_32f(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, bool row)
4116 {
4117 CV_INSTRUMENT_REGION_IPP();
4118
4119 if(row)
4120 return DctIPPLoop(src, src_step, dst, dst_step, width, height, inv);
4121 else
4122 {
4123 #if IPP_VERSION_X100 >= 900
4124 IppiSize srcRoiSize = {width, height};
4125
4126 int specSize = 0;
4127 int initSize = 0;
4128 int bufferSize = 0;
4129
4130 Ipp8u* pDCTSpec = NULL;
4131 Ipp8u* pBuffer = NULL;
4132 Ipp8u* pInitBuf = NULL;
4133
4134 #define IPP_RELEASE \
4135 if(pDCTSpec) \
4136 ippFree(pDCTSpec); \
4137 if(pBuffer) \
4138 ippFree(pBuffer); \
4139 if(pInitBuf) \
4140 ippFree(pInitBuf); \
4141
4142 ippiDCTFunc ippiDCT_32f_C1R = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4143 ippiDCTInit ippDctInit = inv ? (ippiDCTInit)ippiDCTInvInit_32f : (ippiDCTInit)ippiDCTFwdInit_32f;
4144 ippiDCTGetSize ippDctGetSize = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
4145
4146 if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < 0)
4147 return false;
4148
4149 pDCTSpec = (Ipp8u*)CV_IPP_MALLOC(specSize);
4150 if(!pDCTSpec && specSize)
4151 return false;
4152
4153 pBuffer = (Ipp8u*)CV_IPP_MALLOC(bufferSize);
4154 if(!pBuffer && bufferSize)
4155 {
4156 IPP_RELEASE
4157 return false;
4158 }
4159 pInitBuf = (Ipp8u*)CV_IPP_MALLOC(initSize);
4160 if(!pInitBuf && initSize)
4161 {
4162 IPP_RELEASE
4163 return false;
4164 }
4165
4166 if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < 0)
4167 {
4168 IPP_RELEASE
4169 return false;
4170 }
4171
4172 if(CV_INSTRUMENT_FUN_IPP(ippiDCT_32f_C1R, (float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDCTSpec, pBuffer) < 0)
4173 {
4174 IPP_RELEASE
4175 return false;
4176 }
4177
4178 IPP_RELEASE
4179 return true;
4180 #undef IPP_RELEASE
4181 #elif IPP_VERSION_X100 >= 700
4182 IppStatus status;
4183 void* pDCTSpec;
4184 AutoBuffer<uchar> buf;
4185 uchar* pBuffer = 0;
4186 int bufSize=0;
4187
4188 IppiSize srcRoiSize = {width, height};
4189
4190 CV_SUPPRESS_DEPRECATED_START
4191
4192 ippiDCTFunc ippDctFun = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4193 ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
4194 ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f;
4195 ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
4196
4197 status = ippStsErr;
4198
4199 if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
4200 {
4201 buf.allocate( bufSize );
4202 pBuffer = (uchar*)buf;
4203
4204 status = ippDctFun((float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDCTSpec, (Ipp8u*)pBuffer);
4205 }
4206
4207 if (pDCTSpec)
4208 ippFree(pDCTSpec);
4209
4210 CV_SUPPRESS_DEPRECATED_END
4211
4212 return status >= 0;
4213 #else
4214 CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(inv); CV_UNUSED(row);
4215 return false;
4216 #endif
4217 }
4218 }
4219 }
4220 #endif
4221
4222 namespace cv {
4223
4224 class OcvDctImpl CV_FINAL : public hal::DCT2D
4225 {
4226 public:
4227 OcvDftOptions opt;
4228
4229 int _factors[34];
4230 AutoBuffer<uint> wave_buf;
4231 AutoBuffer<int> itab_buf;
4232
4233 DCTFunc dct_func;
4234 bool isRowTransform;
4235 bool isInverse;
4236 bool isContinuous;
4237 int start_stage;
4238 int end_stage;
4239 int width;
4240 int height;
4241 int depth;
4242
init(int _width,int _height,int _depth,int flags)4243 void init(int _width, int _height, int _depth, int flags)
4244 {
4245 width = _width;
4246 height = _height;
4247 depth = _depth;
4248 isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
4249 isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0;
4250 isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0;
4251 static DCTFunc dct_tbl[4] =
4252 {
4253 (DCTFunc)DCT_32f,
4254 (DCTFunc)IDCT_32f,
4255 (DCTFunc)DCT_64f,
4256 (DCTFunc)IDCT_64f
4257 };
4258 dct_func = dct_tbl[(int)isInverse + (depth == CV_64F)*2];
4259 opt.nf = 0;
4260 opt.isComplex = false;
4261 opt.isInverse = false;
4262 opt.noPermute = false;
4263 opt.scale = 1.;
4264 opt.factors = _factors;
4265
4266 if (isRowTransform || height == 1 || (width == 1 && isContinuous))
4267 {
4268 start_stage = end_stage = 0;
4269 }
4270 else
4271 {
4272 start_stage = (width == 1);
4273 end_stage = 1;
4274 }
4275 }
apply(const uchar * src,size_t src_step,uchar * dst,size_t dst_step)4276 void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step) CV_OVERRIDE
4277 {
4278 CV_IPP_RUN(IPP_VERSION_X100 >= 700 && depth == CV_32F, ippi_DCT_32f(src, src_step, dst, dst_step, width, height, isInverse, isRowTransform))
4279
4280 AutoBuffer<uchar> dct_wave;
4281 AutoBuffer<uchar> src_buf, dst_buf;
4282 uchar *src_dft_buf = 0, *dst_dft_buf = 0;
4283 int prev_len = 0;
4284 int elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
4285 int complex_elem_size = elem_size*2;
4286
4287 for(int stage = start_stage ; stage <= end_stage; stage++ )
4288 {
4289 const uchar* sptr = src;
4290 uchar* dptr = dst;
4291 size_t sstep0, sstep1, dstep0, dstep1;
4292 int len, count;
4293
4294 if( stage == 0 )
4295 {
4296 len = width;
4297 count = height;
4298 if( len == 1 && !isRowTransform )
4299 {
4300 len = height;
4301 count = 1;
4302 }
4303 sstep0 = src_step;
4304 dstep0 = dst_step;
4305 sstep1 = dstep1 = elem_size;
4306 }
4307 else
4308 {
4309 len = height;
4310 count = width;
4311 sstep1 = src_step;
4312 dstep1 = dst_step;
4313 sstep0 = dstep0 = elem_size;
4314 }
4315
4316 opt.n = len;
4317 opt.tab_size = len;
4318
4319 if( len != prev_len )
4320 {
4321 if( len > 1 && (len & 1) )
4322 CV_Error( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" );
4323
4324 opt.nf = DFTFactorize( len, opt.factors );
4325 bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1];
4326
4327 wave_buf.allocate(len*complex_elem_size);
4328 opt.wave = wave_buf.data();
4329 itab_buf.allocate(len);
4330 opt.itab = itab_buf.data();
4331 DFTInit( len, opt.nf, opt.factors, opt.itab, complex_elem_size, opt.wave, isInverse );
4332
4333 dct_wave.allocate((len/2 + 1)*complex_elem_size);
4334 src_buf.allocate(len*elem_size);
4335 src_dft_buf = src_buf.data();
4336 if(!inplace_transform)
4337 {
4338 dst_buf.allocate(len*elem_size);
4339 dst_dft_buf = dst_buf.data();
4340 }
4341 else
4342 {
4343 dst_dft_buf = src_buf.data();
4344 }
4345 DCTInit( len, complex_elem_size, dct_wave.data(), isInverse);
4346 prev_len = len;
4347 }
4348 // otherwise reuse the tables calculated on the previous stage
4349 for(unsigned i = 0; i < static_cast<unsigned>(count); i++ )
4350 {
4351 dct_func( opt, sptr + i*sstep0, sstep1, src_dft_buf, dst_dft_buf,
4352 dptr + i*dstep0, dstep1, dct_wave.data());
4353 }
4354 src = dst;
4355 src_step = dst_step;
4356 }
4357 }
4358 };
4359
4360 struct ReplacementDCT2D : public hal::DCT2D
4361 {
4362 cvhalDFT *context;
4363 bool isInitialized;
4364
ReplacementDCT2Dcv::ReplacementDCT2D4365 ReplacementDCT2D() : context(0), isInitialized(false) {}
initcv::ReplacementDCT2D4366 bool init(int width, int height, int depth, int flags)
4367 {
4368 int res = hal_ni_dctInit2D(&context, width, height, depth, flags);
4369 isInitialized = (res == CV_HAL_ERROR_OK);
4370 return isInitialized;
4371 }
applycv::ReplacementDCT2D4372 void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) CV_OVERRIDE
4373 {
4374 if (isInitialized)
4375 {
4376 CALL_HAL(dct2D, cv_hal_dct2D, context, src_data, src_step, dst_data, dst_step);
4377 }
4378 }
~ReplacementDCT2Dcv::ReplacementDCT2D4379 ~ReplacementDCT2D()
4380 {
4381 if (isInitialized)
4382 {
4383 CALL_HAL(dctFree2D, cv_hal_dctFree2D, context);
4384 }
4385 }
4386 };
4387
4388 namespace hal {
4389
create(int width,int height,int depth,int flags)4390 Ptr<DCT2D> DCT2D::create(int width, int height, int depth, int flags)
4391 {
4392 {
4393 ReplacementDCT2D *impl = new ReplacementDCT2D();
4394 if (impl->init(width, height, depth, flags))
4395 {
4396 return Ptr<DCT2D>(impl);
4397 }
4398 delete impl;
4399 }
4400 {
4401 OcvDctImpl *impl = new OcvDctImpl();
4402 impl->init(width, height, depth, flags);
4403 return Ptr<DCT2D>(impl);
4404 }
4405 }
4406
4407 } // cv::hal::
4408 } // cv::
4409
dct(InputArray _src0,OutputArray _dst,int flags)4410 void cv::dct( InputArray _src0, OutputArray _dst, int flags )
4411 {
4412 CV_INSTRUMENT_REGION();
4413
4414 Mat src0 = _src0.getMat(), src = src0;
4415 int type = src.type(), depth = src.depth();
4416
4417 CV_Assert( type == CV_32FC1 || type == CV_64FC1 );
4418 _dst.create( src.rows, src.cols, type );
4419 Mat dst = _dst.getMat();
4420
4421 int f = 0;
4422 if ((flags & DFT_ROWS) != 0)
4423 f |= CV_HAL_DFT_ROWS;
4424 if ((flags & DCT_INVERSE) != 0)
4425 f |= CV_HAL_DFT_INVERSE;
4426 if (src.isContinuous() && dst.isContinuous())
4427 f |= CV_HAL_DFT_IS_CONTINUOUS;
4428
4429 Ptr<hal::DCT2D> c = hal::DCT2D::create(src.cols, src.rows, depth, f);
4430 c->apply(src.data, src.step, dst.data, dst.step);
4431 }
4432
4433
idct(InputArray src,OutputArray dst,int flags)4434 void cv::idct( InputArray src, OutputArray dst, int flags )
4435 {
4436 CV_INSTRUMENT_REGION();
4437
4438 dct( src, dst, flags | DCT_INVERSE );
4439 }
4440
4441 namespace cv
4442 {
4443
4444 static const int optimalDFTSizeTab[] = {
4445 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24, 25, 27, 30, 32, 36, 40, 45, 48,
4446 50, 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 128, 135, 144, 150, 160,
4447 162, 180, 192, 200, 216, 225, 240, 243, 250, 256, 270, 288, 300, 320, 324, 360, 375,
4448 384, 400, 405, 432, 450, 480, 486, 500, 512, 540, 576, 600, 625, 640, 648, 675, 720,
4449 729, 750, 768, 800, 810, 864, 900, 960, 972, 1000, 1024, 1080, 1125, 1152, 1200,
4450 1215, 1250, 1280, 1296, 1350, 1440, 1458, 1500, 1536, 1600, 1620, 1728, 1800, 1875,
4451 1920, 1944, 2000, 2025, 2048, 2160, 2187, 2250, 2304, 2400, 2430, 2500, 2560, 2592,
4452 2700, 2880, 2916, 3000, 3072, 3125, 3200, 3240, 3375, 3456, 3600, 3645, 3750, 3840,
4453 3888, 4000, 4050, 4096, 4320, 4374, 4500, 4608, 4800, 4860, 5000, 5120, 5184, 5400,
4454 5625, 5760, 5832, 6000, 6075, 6144, 6250, 6400, 6480, 6561, 6750, 6912, 7200, 7290,
4455 7500, 7680, 7776, 8000, 8100, 8192, 8640, 8748, 9000, 9216, 9375, 9600, 9720, 10000,
4456 10125, 10240, 10368, 10800, 10935, 11250, 11520, 11664, 12000, 12150, 12288, 12500,
4457 12800, 12960, 13122, 13500, 13824, 14400, 14580, 15000, 15360, 15552, 15625, 16000,
4458 16200, 16384, 16875, 17280, 17496, 18000, 18225, 18432, 18750, 19200, 19440, 19683,
4459 20000, 20250, 20480, 20736, 21600, 21870, 22500, 23040, 23328, 24000, 24300, 24576,
4460 25000, 25600, 25920, 26244, 27000, 27648, 28125, 28800, 29160, 30000, 30375, 30720,
4461 31104, 31250, 32000, 32400, 32768, 32805, 33750, 34560, 34992, 36000, 36450, 36864,
4462 37500, 38400, 38880, 39366, 40000, 40500, 40960, 41472, 43200, 43740, 45000, 46080,
4463 46656, 46875, 48000, 48600, 49152, 50000, 50625, 51200, 51840, 52488, 54000, 54675,
4464 55296, 56250, 57600, 58320, 59049, 60000, 60750, 61440, 62208, 62500, 64000, 64800,
4465 65536, 65610, 67500, 69120, 69984, 72000, 72900, 73728, 75000, 76800, 77760, 78125,
4466 78732, 80000, 81000, 81920, 82944, 84375, 86400, 87480, 90000, 91125, 92160, 93312,
4467 93750, 96000, 97200, 98304, 98415, 100000, 101250, 102400, 103680, 104976, 108000,
4468 109350, 110592, 112500, 115200, 116640, 118098, 120000, 121500, 122880, 124416, 125000,
4469 128000, 129600, 131072, 131220, 135000, 138240, 139968, 140625, 144000, 145800, 147456,
4470 150000, 151875, 153600, 155520, 156250, 157464, 160000, 162000, 163840, 164025, 165888,
4471 168750, 172800, 174960, 177147, 180000, 182250, 184320, 186624, 187500, 192000, 194400,
4472 196608, 196830, 200000, 202500, 204800, 207360, 209952, 216000, 218700, 221184, 225000,
4473 230400, 233280, 234375, 236196, 240000, 243000, 245760, 248832, 250000, 253125, 256000,
4474 259200, 262144, 262440, 270000, 273375, 276480, 279936, 281250, 288000, 291600, 294912,
4475 295245, 300000, 303750, 307200, 311040, 312500, 314928, 320000, 324000, 327680, 328050,
4476 331776, 337500, 345600, 349920, 354294, 360000, 364500, 368640, 373248, 375000, 384000,
4477 388800, 390625, 393216, 393660, 400000, 405000, 409600, 414720, 419904, 421875, 432000,
4478 437400, 442368, 450000, 455625, 460800, 466560, 468750, 472392, 480000, 486000, 491520,
4479 492075, 497664, 500000, 506250, 512000, 518400, 524288, 524880, 531441, 540000, 546750,
4480 552960, 559872, 562500, 576000, 583200, 589824, 590490, 600000, 607500, 614400, 622080,
4481 625000, 629856, 640000, 648000, 655360, 656100, 663552, 675000, 691200, 699840, 703125,
4482 708588, 720000, 729000, 737280, 746496, 750000, 759375, 768000, 777600, 781250, 786432,
4483 787320, 800000, 810000, 819200, 820125, 829440, 839808, 843750, 864000, 874800, 884736,
4484 885735, 900000, 911250, 921600, 933120, 937500, 944784, 960000, 972000, 983040, 984150,
4485 995328, 1000000, 1012500, 1024000, 1036800, 1048576, 1049760, 1062882, 1080000, 1093500,
4486 1105920, 1119744, 1125000, 1152000, 1166400, 1171875, 1179648, 1180980, 1200000,
4487 1215000, 1228800, 1244160, 1250000, 1259712, 1265625, 1280000, 1296000, 1310720,
4488 1312200, 1327104, 1350000, 1366875, 1382400, 1399680, 1406250, 1417176, 1440000,
4489 1458000, 1474560, 1476225, 1492992, 1500000, 1518750, 1536000, 1555200, 1562500,
4490 1572864, 1574640, 1594323, 1600000, 1620000, 1638400, 1640250, 1658880, 1679616,
4491 1687500, 1728000, 1749600, 1769472, 1771470, 1800000, 1822500, 1843200, 1866240,
4492 1875000, 1889568, 1920000, 1944000, 1953125, 1966080, 1968300, 1990656, 2000000,
4493 2025000, 2048000, 2073600, 2097152, 2099520, 2109375, 2125764, 2160000, 2187000,
4494 2211840, 2239488, 2250000, 2278125, 2304000, 2332800, 2343750, 2359296, 2361960,
4495 2400000, 2430000, 2457600, 2460375, 2488320, 2500000, 2519424, 2531250, 2560000,
4496 2592000, 2621440, 2624400, 2654208, 2657205, 2700000, 2733750, 2764800, 2799360,
4497 2812500, 2834352, 2880000, 2916000, 2949120, 2952450, 2985984, 3000000, 3037500,
4498 3072000, 3110400, 3125000, 3145728, 3149280, 3188646, 3200000, 3240000, 3276800,
4499 3280500, 3317760, 3359232, 3375000, 3456000, 3499200, 3515625, 3538944, 3542940,
4500 3600000, 3645000, 3686400, 3732480, 3750000, 3779136, 3796875, 3840000, 3888000,
4501 3906250, 3932160, 3936600, 3981312, 4000000, 4050000, 4096000, 4100625, 4147200,
4502 4194304, 4199040, 4218750, 4251528, 4320000, 4374000, 4423680, 4428675, 4478976,
4503 4500000, 4556250, 4608000, 4665600, 4687500, 4718592, 4723920, 4782969, 4800000,
4504 4860000, 4915200, 4920750, 4976640, 5000000, 5038848, 5062500, 5120000, 5184000,
4505 5242880, 5248800, 5308416, 5314410, 5400000, 5467500, 5529600, 5598720, 5625000,
4506 5668704, 5760000, 5832000, 5859375, 5898240, 5904900, 5971968, 6000000, 6075000,
4507 6144000, 6220800, 6250000, 6291456, 6298560, 6328125, 6377292, 6400000, 6480000,
4508 6553600, 6561000, 6635520, 6718464, 6750000, 6834375, 6912000, 6998400, 7031250,
4509 7077888, 7085880, 7200000, 7290000, 7372800, 7381125, 7464960, 7500000, 7558272,
4510 7593750, 7680000, 7776000, 7812500, 7864320, 7873200, 7962624, 7971615, 8000000,
4511 8100000, 8192000, 8201250, 8294400, 8388608, 8398080, 8437500, 8503056, 8640000,
4512 8748000, 8847360, 8857350, 8957952, 9000000, 9112500, 9216000, 9331200, 9375000,
4513 9437184, 9447840, 9565938, 9600000, 9720000, 9765625, 9830400, 9841500, 9953280,
4514 10000000, 10077696, 10125000, 10240000, 10368000, 10485760, 10497600, 10546875, 10616832,
4515 10628820, 10800000, 10935000, 11059200, 11197440, 11250000, 11337408, 11390625, 11520000,
4516 11664000, 11718750, 11796480, 11809800, 11943936, 12000000, 12150000, 12288000, 12301875,
4517 12441600, 12500000, 12582912, 12597120, 12656250, 12754584, 12800000, 12960000, 13107200,
4518 13122000, 13271040, 13286025, 13436928, 13500000, 13668750, 13824000, 13996800, 14062500,
4519 14155776, 14171760, 14400000, 14580000, 14745600, 14762250, 14929920, 15000000, 15116544,
4520 15187500, 15360000, 15552000, 15625000, 15728640, 15746400, 15925248, 15943230, 16000000,
4521 16200000, 16384000, 16402500, 16588800, 16777216, 16796160, 16875000, 17006112, 17280000,
4522 17496000, 17578125, 17694720, 17714700, 17915904, 18000000, 18225000, 18432000, 18662400,
4523 18750000, 18874368, 18895680, 18984375, 19131876, 19200000, 19440000, 19531250, 19660800,
4524 19683000, 19906560, 20000000, 20155392, 20250000, 20480000, 20503125, 20736000, 20971520,
4525 20995200, 21093750, 21233664, 21257640, 21600000, 21870000, 22118400, 22143375, 22394880,
4526 22500000, 22674816, 22781250, 23040000, 23328000, 23437500, 23592960, 23619600, 23887872,
4527 23914845, 24000000, 24300000, 24576000, 24603750, 24883200, 25000000, 25165824, 25194240,
4528 25312500, 25509168, 25600000, 25920000, 26214400, 26244000, 26542080, 26572050, 26873856,
4529 27000000, 27337500, 27648000, 27993600, 28125000, 28311552, 28343520, 28800000, 29160000,
4530 29296875, 29491200, 29524500, 29859840, 30000000, 30233088, 30375000, 30720000, 31104000,
4531 31250000, 31457280, 31492800, 31640625, 31850496, 31886460, 32000000, 32400000, 32768000,
4532 32805000, 33177600, 33554432, 33592320, 33750000, 34012224, 34171875, 34560000, 34992000,
4533 35156250, 35389440, 35429400, 35831808, 36000000, 36450000, 36864000, 36905625, 37324800,
4534 37500000, 37748736, 37791360, 37968750, 38263752, 38400000, 38880000, 39062500, 39321600,
4535 39366000, 39813120, 39858075, 40000000, 40310784, 40500000, 40960000, 41006250, 41472000,
4536 41943040, 41990400, 42187500, 42467328, 42515280, 43200000, 43740000, 44236800, 44286750,
4537 44789760, 45000000, 45349632, 45562500, 46080000, 46656000, 46875000, 47185920, 47239200,
4538 47775744, 47829690, 48000000, 48600000, 48828125, 49152000, 49207500, 49766400, 50000000,
4539 50331648, 50388480, 50625000, 51018336, 51200000, 51840000, 52428800, 52488000, 52734375,
4540 53084160, 53144100, 53747712, 54000000, 54675000, 55296000, 55987200, 56250000, 56623104,
4541 56687040, 56953125, 57600000, 58320000, 58593750, 58982400, 59049000, 59719680, 60000000,
4542 60466176, 60750000, 61440000, 61509375, 62208000, 62500000, 62914560, 62985600, 63281250,
4543 63700992, 63772920, 64000000, 64800000, 65536000, 65610000, 66355200, 66430125, 67108864,
4544 67184640, 67500000, 68024448, 68343750, 69120000, 69984000, 70312500, 70778880, 70858800,
4545 71663616, 72000000, 72900000, 73728000, 73811250, 74649600, 75000000, 75497472, 75582720,
4546 75937500, 76527504, 76800000, 77760000, 78125000, 78643200, 78732000, 79626240, 79716150,
4547 80000000, 80621568, 81000000, 81920000, 82012500, 82944000, 83886080, 83980800, 84375000,
4548 84934656, 85030560, 86400000, 87480000, 87890625, 88473600, 88573500, 89579520, 90000000,
4549 90699264, 91125000, 92160000, 93312000, 93750000, 94371840, 94478400, 94921875, 95551488,
4550 95659380, 96000000, 97200000, 97656250, 98304000, 98415000, 99532800, 100000000,
4551 100663296, 100776960, 101250000, 102036672, 102400000, 102515625, 103680000, 104857600,
4552 104976000, 105468750, 106168320, 106288200, 107495424, 108000000, 109350000, 110592000,
4553 110716875, 111974400, 112500000, 113246208, 113374080, 113906250, 115200000, 116640000,
4554 117187500, 117964800, 118098000, 119439360, 119574225, 120000000, 120932352, 121500000,
4555 122880000, 123018750, 124416000, 125000000, 125829120, 125971200, 126562500, 127401984,
4556 127545840, 128000000, 129600000, 131072000, 131220000, 132710400, 132860250, 134217728,
4557 134369280, 135000000, 136048896, 136687500, 138240000, 139968000, 140625000, 141557760,
4558 141717600, 143327232, 144000000, 145800000, 146484375, 147456000, 147622500, 149299200,
4559 150000000, 150994944, 151165440, 151875000, 153055008, 153600000, 155520000, 156250000,
4560 157286400, 157464000, 158203125, 159252480, 159432300, 160000000, 161243136, 162000000,
4561 163840000, 164025000, 165888000, 167772160, 167961600, 168750000, 169869312, 170061120,
4562 170859375, 172800000, 174960000, 175781250, 176947200, 177147000, 179159040, 180000000,
4563 181398528, 182250000, 184320000, 184528125, 186624000, 187500000, 188743680, 188956800,
4564 189843750, 191102976, 191318760, 192000000, 194400000, 195312500, 196608000, 196830000,
4565 199065600, 199290375, 200000000, 201326592, 201553920, 202500000, 204073344, 204800000,
4566 205031250, 207360000, 209715200, 209952000, 210937500, 212336640, 212576400, 214990848,
4567 216000000, 218700000, 221184000, 221433750, 223948800, 225000000, 226492416, 226748160,
4568 227812500, 230400000, 233280000, 234375000, 235929600, 236196000, 238878720, 239148450,
4569 240000000, 241864704, 243000000, 244140625, 245760000, 246037500, 248832000, 250000000,
4570 251658240, 251942400, 253125000, 254803968, 255091680, 256000000, 259200000, 262144000,
4571 262440000, 263671875, 265420800, 265720500, 268435456, 268738560, 270000000, 272097792,
4572 273375000, 276480000, 279936000, 281250000, 283115520, 283435200, 284765625, 286654464,
4573 288000000, 291600000, 292968750, 294912000, 295245000, 298598400, 300000000, 301989888,
4574 302330880, 303750000, 306110016, 307200000, 307546875, 311040000, 312500000, 314572800,
4575 314928000, 316406250, 318504960, 318864600, 320000000, 322486272, 324000000, 327680000,
4576 328050000, 331776000, 332150625, 335544320, 335923200, 337500000, 339738624, 340122240,
4577 341718750, 345600000, 349920000, 351562500, 353894400, 354294000, 358318080, 360000000,
4578 362797056, 364500000, 368640000, 369056250, 373248000, 375000000, 377487360, 377913600,
4579 379687500, 382205952, 382637520, 384000000, 388800000, 390625000, 393216000, 393660000,
4580 398131200, 398580750, 400000000, 402653184, 403107840, 405000000, 408146688, 409600000,
4581 410062500, 414720000, 419430400, 419904000, 421875000, 424673280, 425152800, 429981696,
4582 432000000, 437400000, 439453125, 442368000, 442867500, 447897600, 450000000, 452984832,
4583 453496320, 455625000, 460800000, 466560000, 468750000, 471859200, 472392000, 474609375,
4584 477757440, 478296900, 480000000, 483729408, 486000000, 488281250, 491520000, 492075000,
4585 497664000, 500000000, 503316480, 503884800, 506250000, 509607936, 510183360, 512000000,
4586 512578125, 518400000, 524288000, 524880000, 527343750, 530841600, 531441000, 536870912,
4587 537477120, 540000000, 544195584, 546750000, 552960000, 553584375, 559872000, 562500000,
4588 566231040, 566870400, 569531250, 573308928, 576000000, 583200000, 585937500, 589824000,
4589 590490000, 597196800, 597871125, 600000000, 603979776, 604661760, 607500000, 612220032,
4590 614400000, 615093750, 622080000, 625000000, 629145600, 629856000, 632812500, 637009920,
4591 637729200, 640000000, 644972544, 648000000, 655360000, 656100000, 663552000, 664301250,
4592 671088640, 671846400, 675000000, 679477248, 680244480, 683437500, 691200000, 699840000,
4593 703125000, 707788800, 708588000, 716636160, 720000000, 725594112, 729000000, 732421875,
4594 737280000, 738112500, 746496000, 750000000, 754974720, 755827200, 759375000, 764411904,
4595 765275040, 768000000, 777600000, 781250000, 786432000, 787320000, 791015625, 796262400,
4596 797161500, 800000000, 805306368, 806215680, 810000000, 816293376, 819200000, 820125000,
4597 829440000, 838860800, 839808000, 843750000, 849346560, 850305600, 854296875, 859963392,
4598 864000000, 874800000, 878906250, 884736000, 885735000, 895795200, 900000000, 905969664,
4599 906992640, 911250000, 921600000, 922640625, 933120000, 937500000, 943718400, 944784000,
4600 949218750, 955514880, 956593800, 960000000, 967458816, 972000000, 976562500, 983040000,
4601 984150000, 995328000, 996451875, 1000000000, 1006632960, 1007769600, 1012500000,
4602 1019215872, 1020366720, 1024000000, 1025156250, 1036800000, 1048576000, 1049760000,
4603 1054687500, 1061683200, 1062882000, 1073741824, 1074954240, 1080000000, 1088391168,
4604 1093500000, 1105920000, 1107168750, 1119744000, 1125000000, 1132462080, 1133740800,
4605 1139062500, 1146617856, 1152000000, 1166400000, 1171875000, 1179648000, 1180980000,
4606 1194393600, 1195742250, 1200000000, 1207959552, 1209323520, 1215000000, 1220703125,
4607 1224440064, 1228800000, 1230187500, 1244160000, 1250000000, 1258291200, 1259712000,
4608 1265625000, 1274019840, 1275458400, 1280000000, 1289945088, 1296000000, 1310720000,
4609 1312200000, 1318359375, 1327104000, 1328602500, 1342177280, 1343692800, 1350000000,
4610 1358954496, 1360488960, 1366875000, 1382400000, 1399680000, 1406250000, 1415577600,
4611 1417176000, 1423828125, 1433272320, 1440000000, 1451188224, 1458000000, 1464843750,
4612 1474560000, 1476225000, 1492992000, 1500000000, 1509949440, 1511654400, 1518750000,
4613 1528823808, 1530550080, 1536000000, 1537734375, 1555200000, 1562500000, 1572864000,
4614 1574640000, 1582031250, 1592524800, 1594323000, 1600000000, 1610612736, 1612431360,
4615 1620000000, 1632586752, 1638400000, 1640250000, 1658880000, 1660753125, 1677721600,
4616 1679616000, 1687500000, 1698693120, 1700611200, 1708593750, 1719926784, 1728000000,
4617 1749600000, 1757812500, 1769472000, 1771470000, 1791590400, 1800000000, 1811939328,
4618 1813985280, 1822500000, 1843200000, 1845281250, 1866240000, 1875000000, 1887436800,
4619 1889568000, 1898437500, 1911029760, 1913187600, 1920000000, 1934917632, 1944000000,
4620 1953125000, 1966080000, 1968300000, 1990656000, 1992903750, 2000000000, 2013265920,
4621 2015539200, 2025000000, 2038431744, 2040733440, 2048000000, 2050312500, 2073600000,
4622 2097152000, 2099520000, 2109375000, 2123366400, 2125764000
4623 };
4624
4625 }
4626
getOptimalDFTSize(int size0)4627 int cv::getOptimalDFTSize( int size0 )
4628 {
4629 int a = 0, b = sizeof(optimalDFTSizeTab)/sizeof(optimalDFTSizeTab[0]) - 1;
4630 if( (unsigned)size0 >= (unsigned)optimalDFTSizeTab[b] )
4631 return -1;
4632
4633 while( a < b )
4634 {
4635 int c = (a + b) >> 1;
4636 if( size0 <= optimalDFTSizeTab[c] )
4637 b = c;
4638 else
4639 a = c+1;
4640 }
4641
4642 return optimalDFTSizeTab[b];
4643 }
4644
4645
4646 #ifndef OPENCV_EXCLUDE_C_API
4647
4648 CV_IMPL void
cvDFT(const CvArr * srcarr,CvArr * dstarr,int flags,int nonzero_rows)4649 cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows )
4650 {
4651 cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0;
4652 int _flags = ((flags & CV_DXT_INVERSE) ? cv::DFT_INVERSE : 0) |
4653 ((flags & CV_DXT_SCALE) ? cv::DFT_SCALE : 0) |
4654 ((flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0);
4655
4656 CV_Assert( src.size == dst.size );
4657
4658 if( src.type() != dst.type() )
4659 {
4660 if( dst.channels() == 2 )
4661 _flags |= cv::DFT_COMPLEX_OUTPUT;
4662 else
4663 _flags |= cv::DFT_REAL_OUTPUT;
4664 }
4665
4666 cv::dft( src, dst, _flags, nonzero_rows );
4667 CV_Assert( dst.data == dst0.data ); // otherwise it means that the destination size or type was incorrect
4668 }
4669
4670
4671 CV_IMPL void
cvMulSpectrums(const CvArr * srcAarr,const CvArr * srcBarr,CvArr * dstarr,int flags)4672 cvMulSpectrums( const CvArr* srcAarr, const CvArr* srcBarr,
4673 CvArr* dstarr, int flags )
4674 {
4675 cv::Mat srcA = cv::cvarrToMat(srcAarr),
4676 srcB = cv::cvarrToMat(srcBarr),
4677 dst = cv::cvarrToMat(dstarr);
4678 CV_Assert( srcA.size == dst.size && srcA.type() == dst.type() );
4679
4680 cv::mulSpectrums(srcA, srcB, dst,
4681 (flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0,
4682 (flags & CV_DXT_MUL_CONJ) != 0 );
4683 }
4684
4685
4686 CV_IMPL void
cvDCT(const CvArr * srcarr,CvArr * dstarr,int flags)4687 cvDCT( const CvArr* srcarr, CvArr* dstarr, int flags )
4688 {
4689 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
4690 CV_Assert( src.size == dst.size && src.type() == dst.type() );
4691 int _flags = ((flags & CV_DXT_INVERSE) ? cv::DCT_INVERSE : 0) |
4692 ((flags & CV_DXT_ROWS) ? cv::DCT_ROWS : 0);
4693 cv::dct( src, dst, _flags );
4694 }
4695
4696
4697 CV_IMPL int
cvGetOptimalDFTSize(int size0)4698 cvGetOptimalDFTSize( int size0 )
4699 {
4700 return cv::getOptimalDFTSize(size0);
4701 }
4702
4703 #endif // OPENCV_EXCLUDE_C_API
4704 /* End of file. */
4705