1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                        Intel License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 //   * Redistribution's of source code must retain the above copyright notice,
20 //     this list of conditions and the following disclaimer.
21 //
22 //   * Redistribution's in binary form must reproduce the above copyright notice,
23 //     this list of conditions and the following disclaimer in the documentation
24 //     and/or other materials provided with the distribution.
25 //
26 //   * The name of Intel Corporation may not be used to endorse or promote products
27 //     derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41 
42 #include "precomp.hpp"
43 #include "opencv2/core/opencl/runtime/opencl_clfft.hpp"
44 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
45 #include "opencl_kernels_core.hpp"
46 #include <map>
47 
48 namespace cv
49 {
50 
51 // On Win64 optimized versions of DFT and DCT fail the tests (fixed in VS2010)
52 #if defined _MSC_VER && !defined CV_ICC && defined _M_X64 && _MSC_VER < 1600
53 # pragma optimize("", off)
54 # pragma warning(disable: 4748)
55 #endif
56 
57 #if IPP_VERSION_X100 >= 710
58 #define USE_IPP_DFT 1
59 #else
60 #undef USE_IPP_DFT
61 #endif
62 
63 /****************************************************************************************\
64                                Discrete Fourier Transform
65 \****************************************************************************************/
66 
67 #define CV_MAX_LOCAL_DFT_SIZE  (1 << 15)
68 
69 static unsigned char bitrevTab[] =
70 {
71   0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0,0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0,
72   0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8,0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8,
73   0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4,0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4,
74   0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec,0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc,
75   0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2,0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2,
76   0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea,0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa,
77   0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6,0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6,
78   0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee,0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe,
79   0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1,0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1,
80   0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9,0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9,
81   0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5,0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5,
82   0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed,0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd,
83   0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3,0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3,
84   0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb,0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb,
85   0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7,0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7,
86   0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef,0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
87 };
88 
89 static const double DFTTab[][2] =
90 {
91 { 1.00000000000000000, 0.00000000000000000 },
92 {-1.00000000000000000, 0.00000000000000000 },
93 { 0.00000000000000000, 1.00000000000000000 },
94 { 0.70710678118654757, 0.70710678118654746 },
95 { 0.92387953251128674, 0.38268343236508978 },
96 { 0.98078528040323043, 0.19509032201612825 },
97 { 0.99518472667219693, 0.09801714032956060 },
98 { 0.99879545620517241, 0.04906767432741802 },
99 { 0.99969881869620425, 0.02454122852291229 },
100 { 0.99992470183914450, 0.01227153828571993 },
101 { 0.99998117528260111, 0.00613588464915448 },
102 { 0.99999529380957619, 0.00306795676296598 },
103 { 0.99999882345170188, 0.00153398018628477 },
104 { 0.99999970586288223, 0.00076699031874270 },
105 { 0.99999992646571789, 0.00038349518757140 },
106 { 0.99999998161642933, 0.00019174759731070 },
107 { 0.99999999540410733, 0.00009587379909598 },
108 { 0.99999999885102686, 0.00004793689960307 },
109 { 0.99999999971275666, 0.00002396844980842 },
110 { 0.99999999992818922, 0.00001198422490507 },
111 { 0.99999999998204725, 0.00000599211245264 },
112 { 0.99999999999551181, 0.00000299605622633 },
113 { 0.99999999999887801, 0.00000149802811317 },
114 { 0.99999999999971945, 0.00000074901405658 },
115 { 0.99999999999992983, 0.00000037450702829 },
116 { 0.99999999999998246, 0.00000018725351415 },
117 { 0.99999999999999567, 0.00000009362675707 },
118 { 0.99999999999999889, 0.00000004681337854 },
119 { 0.99999999999999978, 0.00000002340668927 },
120 { 0.99999999999999989, 0.00000001170334463 },
121 { 1.00000000000000000, 0.00000000585167232 },
122 { 1.00000000000000000, 0.00000000292583616 }
123 };
124 
125 namespace {
126 template <typename T>
127 struct Constants {
128     static const T sin_120;
129     static const T fft5_2;
130     static const T fft5_3;
131     static const T fft5_4;
132     static const T fft5_5;
133 };
134 
135 template <typename T>
136 const T Constants<T>::sin_120 = (T)0.86602540378443864676372317075294;
137 
138 template <typename T>
139 const T Constants<T>::fft5_2 = (T)0.559016994374947424102293417182819;
140 
141 template <typename T>
142 const T Constants<T>::fft5_3 = (T)-0.951056516295153572116439333379382;
143 
144 template <typename T>
145 const T Constants<T>::fft5_4 = (T)-1.538841768587626701285145288018455;
146 
147 template <typename T>
148 const T Constants<T>::fft5_5 = (T)0.363271264002680442947733378740309;
149 
150 }  //namespace
151 
152 #define BitRev(i,shift) \
153    ((int)((((unsigned)bitrevTab[(i)&255] << 24)+ \
154            ((unsigned)bitrevTab[((i)>> 8)&255] << 16)+ \
155            ((unsigned)bitrevTab[((i)>>16)&255] <<  8)+ \
156            ((unsigned)bitrevTab[((i)>>24)])) >> (shift)))
157 
158 static int
DFTFactorize(int n,int * factors)159 DFTFactorize( int n, int* factors )
160 {
161     int nf = 0, f, i, j;
162 
163     if( n <= 5 )
164     {
165         factors[0] = n;
166         return 1;
167     }
168 
169     f = (((n - 1)^n)+1) >> 1;
170     if( f > 1 )
171     {
172         factors[nf++] = f;
173         n = f == n ? 1 : n/f;
174     }
175 
176     for( f = 3; n > 1; )
177     {
178         int d = n/f;
179         if( d*f == n )
180         {
181             factors[nf++] = f;
182             n = d;
183         }
184         else
185         {
186             f += 2;
187             if( f*f > n )
188                 break;
189         }
190     }
191 
192     if( n > 1 )
193         factors[nf++] = n;
194 
195     f = (factors[0] & 1) == 0;
196     for( i = f; i < (nf+f)/2; i++ )
197         CV_SWAP( factors[i], factors[nf-i-1+f], j );
198 
199     return nf;
200 }
201 
202 static void
DFTInit(int n0,int nf,const int * factors,int * itab,int elem_size,void * _wave,int inv_itab)203 DFTInit( int n0, int nf, const int* factors, int* itab, int elem_size, void* _wave, int inv_itab )
204 {
205     int digits[34], radix[34];
206     int n = factors[0], m = 0;
207     int* itab0 = itab;
208     int i, j, k;
209     Complex<double> w, w1;
210     double t;
211 
212     if( n0 <= 5 )
213     {
214         itab[0] = 0;
215         itab[n0-1] = n0-1;
216 
217         if( n0 != 4 )
218         {
219             for( i = 1; i < n0-1; i++ )
220                 itab[i] = i;
221         }
222         else
223         {
224             itab[1] = 2;
225             itab[2] = 1;
226         }
227         if( n0 == 5 )
228         {
229             if( elem_size == sizeof(Complex<double>) )
230                 ((Complex<double>*)_wave)[0] = Complex<double>(1.,0.);
231             else
232                 ((Complex<float>*)_wave)[0] = Complex<float>(1.f,0.f);
233         }
234         if( n0 != 4 )
235             return;
236         m = 2;
237     }
238     else
239     {
240         // radix[] is initialized from index 'nf' down to zero
241         assert (nf < 34);
242         radix[nf] = 1;
243         digits[nf] = 0;
244         for( i = 0; i < nf; i++ )
245         {
246             digits[i] = 0;
247             radix[nf-i-1] = radix[nf-i]*factors[nf-i-1];
248         }
249 
250         if( inv_itab && factors[0] != factors[nf-1] )
251             itab = (int*)_wave;
252 
253         if( (n & 1) == 0 )
254         {
255             int a = radix[1], na2 = n*a>>1, na4 = na2 >> 1;
256             for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
257                 ;
258             if( n <= 2  )
259             {
260                 itab[0] = 0;
261                 itab[1] = na2;
262             }
263             else if( n <= 256 )
264             {
265                 int shift = 10 - m;
266                 for( i = 0; i <= n - 4; i += 4 )
267                 {
268                     j = (bitrevTab[i>>2]>>shift)*a;
269                     itab[i] = j;
270                     itab[i+1] = j + na2;
271                     itab[i+2] = j + na4;
272                     itab[i+3] = j + na2 + na4;
273                 }
274             }
275             else
276             {
277                 int shift = 34 - m;
278                 for( i = 0; i < n; i += 4 )
279                 {
280                     int i4 = i >> 2;
281                     j = BitRev(i4,shift)*a;
282                     itab[i] = j;
283                     itab[i+1] = j + na2;
284                     itab[i+2] = j + na4;
285                     itab[i+3] = j + na2 + na4;
286                 }
287             }
288 
289             digits[1]++;
290 
291             if( nf >= 2 )
292             {
293                 for( i = n, j = radix[2]; i < n0; )
294                 {
295                     for( k = 0; k < n; k++ )
296                         itab[i+k] = itab[k] + j;
297                     if( (i += n) >= n0 )
298                         break;
299                     j += radix[2];
300                     for( k = 1; ++digits[k] >= factors[k]; k++ )
301                     {
302                         digits[k] = 0;
303                         j += radix[k+2] - radix[k];
304                     }
305                 }
306             }
307         }
308         else
309         {
310             for( i = 0, j = 0;; )
311             {
312                 itab[i] = j;
313                 if( ++i >= n0 )
314                     break;
315                 j += radix[1];
316                 for( k = 0; ++digits[k] >= factors[k]; k++ )
317                 {
318                     digits[k] = 0;
319                     j += radix[k+2] - radix[k];
320                 }
321             }
322         }
323 
324         if( itab != itab0 )
325         {
326             itab0[0] = 0;
327             for( i = n0 & 1; i < n0; i += 2 )
328             {
329                 int k0 = itab[i];
330                 int k1 = itab[i+1];
331                 itab0[k0] = i;
332                 itab0[k1] = i+1;
333             }
334         }
335     }
336 
337     if( (n0 & (n0-1)) == 0 )
338     {
339         w.re = w1.re = DFTTab[m][0];
340         w.im = w1.im = -DFTTab[m][1];
341     }
342     else
343     {
344         t = -CV_PI*2/n0;
345         w.im = w1.im = sin(t);
346         w.re = w1.re = std::sqrt(1. - w1.im*w1.im);
347     }
348     n = (n0+1)/2;
349 
350     if( elem_size == sizeof(Complex<double>) )
351     {
352         Complex<double>* wave = (Complex<double>*)_wave;
353 
354         wave[0].re = 1.;
355         wave[0].im = 0.;
356 
357         if( (n0 & 1) == 0 )
358         {
359             wave[n].re = -1.;
360             wave[n].im = 0;
361         }
362 
363         for( i = 1; i < n; i++ )
364         {
365             wave[i] = w;
366             wave[n0-i].re = w.re;
367             wave[n0-i].im = -w.im;
368 
369             t = w.re*w1.re - w.im*w1.im;
370             w.im = w.re*w1.im + w.im*w1.re;
371             w.re = t;
372         }
373     }
374     else
375     {
376         Complex<float>* wave = (Complex<float>*)_wave;
377         assert( elem_size == sizeof(Complex<float>) );
378 
379         wave[0].re = 1.f;
380         wave[0].im = 0.f;
381 
382         if( (n0 & 1) == 0 )
383         {
384             wave[n].re = -1.f;
385             wave[n].im = 0.f;
386         }
387 
388         for( i = 1; i < n; i++ )
389         {
390             wave[i].re = (float)w.re;
391             wave[i].im = (float)w.im;
392             wave[n0-i].re = (float)w.re;
393             wave[n0-i].im = (float)-w.im;
394 
395             t = w.re*w1.re - w.im*w1.im;
396             w.im = w.re*w1.im + w.im*w1.re;
397             w.re = t;
398         }
399     }
400 }
401 
402 // Reference radix-2 implementation.
403 template<typename T> struct DFT_R2
404 {
operator ()cv::DFT_R2405     void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
406         const int nx = n/2;
407         for(int i = 0 ; i < c_n; i += n)
408         {
409             Complex<T>* v = dst + i;
410             T r0 = v[0].re + v[nx].re;
411             T i0 = v[0].im + v[nx].im;
412             T r1 = v[0].re - v[nx].re;
413             T i1 = v[0].im - v[nx].im;
414             v[0].re = r0; v[0].im = i0;
415             v[nx].re = r1; v[nx].im = i1;
416 
417             for( int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
418             {
419                 v = dst + i + j;
420                 r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
421                 i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im;
422                 r0 = v[0].re; i0 = v[0].im;
423 
424                 v[0].re = r0 + r1; v[0].im = i0 + i1;
425                 v[nx].re = r0 - r1; v[nx].im = i0 - i1;
426             }
427         }
428     }
429 };
430 
431 // Reference radix-3 implementation.
432 template<typename T> struct DFT_R3
433 {
operator ()cv::DFT_R3434     void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
435         const int nx = n / 3;
436         for(int i = 0; i < c_n; i += n )
437         {
438             {
439                 Complex<T>* v = dst + i;
440                 T r1 = v[nx].re + v[nx*2].re;
441                 T i1 = v[nx].im + v[nx*2].im;
442                 T r0 = v[0].re;
443                 T i0 = v[0].im;
444                 T r2 = Constants<T>::sin_120*(v[nx].im - v[nx*2].im);
445                 T i2 = Constants<T>::sin_120*(v[nx*2].re - v[nx].re);
446                 v[0].re = r0 + r1; v[0].im = i0 + i1;
447                 r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
448                 v[nx].re = r0 + r2; v[nx].im = i0 + i2;
449                 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
450             }
451 
452             for(int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
453             {
454                 Complex<T>* v = dst + i + j;
455                 T r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
456                 T i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re;
457                 T i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im;
458                 T r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re;
459                 T r1 = r0 + i2; T i1 = i0 + r2;
460 
461                 r2 = Constants<T>::sin_120*(i0 - r2); i2 = Constants<T>::sin_120*(i2 - r0);
462                 r0 = v[0].re; i0 = v[0].im;
463                 v[0].re = r0 + r1; v[0].im = i0 + i1;
464                 r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
465                 v[nx].re = r0 + r2; v[nx].im = i0 + i2;
466                 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
467             }
468         }
469     }
470 };
471 
472 // Reference radix-5 implementation.
473 template<typename T> struct DFT_R5
474 {
operator ()cv::DFT_R5475     void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
476         const int nx = n / 5;
477         for(int i = 0; i < c_n; i += n )
478         {
479             for(int j = 0, dw = 0; j < nx; j++, dw += dw0 )
480             {
481                 Complex<T>* v0 = dst + i + j;
482                 Complex<T>* v1 = v0 + nx*2;
483                 Complex<T>* v2 = v1 + nx*2;
484 
485                 T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5;
486 
487                 r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im;
488                 i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re;
489                 r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im;
490                 i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re;
491 
492                 r1 = r3 + r2; i1 = i3 + i2;
493                 r3 -= r2; i3 -= i2;
494 
495                 r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
496                 i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
497                 r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im;
498                 i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re;
499 
500                 r2 = r4 + r0; i2 = i4 + i0;
501                 r4 -= r0; i4 -= i0;
502 
503                 r0 = v0[0].re; i0 = v0[0].im;
504                 r5 = r1 + r2; i5 = i1 + i2;
505 
506                 v0[0].re = r0 + r5; v0[0].im = i0 + i5;
507 
508                 r0 -= (T)0.25*r5; i0 -= (T)0.25*i5;
509                 r1 = Constants<T>::fft5_2*(r1 - r2); i1 = Constants<T>::fft5_2*(i1 - i2);
510                 r2 = -Constants<T>::fft5_3*(i3 + i4); i2 = Constants<T>::fft5_3*(r3 + r4);
511 
512                 i3 *= -Constants<T>::fft5_5; r3 *= Constants<T>::fft5_5;
513                 i4 *= -Constants<T>::fft5_4; r4 *= Constants<T>::fft5_4;
514 
515                 r5 = r2 + i3; i5 = i2 + r3;
516                 r2 -= i4; i2 -= r4;
517 
518                 r3 = r0 + r1; i3 = i0 + i1;
519                 r0 -= r1; i0 -= i1;
520 
521                 v0[nx].re = r3 + r2; v0[nx].im = i3 + i2;
522                 v2[0].re = r3 - r2; v2[0].im = i3 - i2;
523 
524                 v1[0].re = r0 + r5; v1[0].im = i0 + i5;
525                 v1[nx].re = r0 - r5; v1[nx].im = i0 - i5;
526             }
527         }
528     }
529 };
530 
531 template<typename T> struct DFT_VecR2
532 {
operator ()cv::DFT_VecR2533     void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
534         DFT_R2<T>()(dst, c_n, n, dw0, wave);
535     }
536 };
537 
538 template<typename T> struct DFT_VecR3
539 {
operator ()cv::DFT_VecR3540     void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
541         DFT_R3<T>()(dst, c_n, n, dw0, wave);
542     }
543 };
544 
545 template<typename T> struct DFT_VecR4
546 {
operator ()cv::DFT_VecR4547     int operator()(Complex<T>*, int, int, int&, const Complex<T>*) const { return 1; }
548 };
549 
550 #if CV_SSE3
551 
552 // multiplies *a and *b:
553 //  r_re + i*r_im = (a_re + i*a_im)*(b_re + i*b_im)
554 // r_re and r_im are placed respectively in bits 31:0 and 63:32 of the resulting
555 // vector register.
complexMul(const Complex<float> * const a,const Complex<float> * const b)556 inline __m128 complexMul(const Complex<float>* const a, const Complex<float>* const b) {
557     const __m128 z = _mm_setzero_ps();
558     const __m128 neg_elem0 = _mm_set_ps(0.0f,0.0f,0.0f,-0.0f);
559     // v_a[31:0] is a->re and v_a[63:32] is a->im.
560     const __m128 v_a = _mm_loadl_pi(z, (const __m64*)a);
561     const __m128 v_b = _mm_loadl_pi(z, (const __m64*)b);
562     // x_1 = v[nx] * wave[dw].
563     const __m128 v_a_riri = _mm_shuffle_ps(v_a, v_a, _MM_SHUFFLE(0, 1, 0, 1));
564     const __m128 v_b_irri = _mm_shuffle_ps(v_b, v_b, _MM_SHUFFLE(1, 0, 0, 1));
565     const __m128 mul = _mm_mul_ps(v_a_riri, v_b_irri);
566     const __m128 xored = _mm_xor_ps(mul, neg_elem0);
567     return _mm_hadd_ps(xored, z);
568 }
569 
570 // optimized radix-2 transform
571 template<> struct DFT_VecR2<float> {
operator ()cv::DFT_VecR2572     void operator()(Complex<float>* dst, const int c_n, const int n, const int dw0, const Complex<float>* wave) const {
573         const __m128 z = _mm_setzero_ps();
574         const int nx = n/2;
575         for(int i = 0 ; i < c_n; i += n)
576         {
577             {
578                 Complex<float>* v = dst + i;
579                 float r0 = v[0].re + v[nx].re;
580                 float i0 = v[0].im + v[nx].im;
581                 float r1 = v[0].re - v[nx].re;
582                 float i1 = v[0].im - v[nx].im;
583                 v[0].re = r0; v[0].im = i0;
584                 v[nx].re = r1; v[nx].im = i1;
585             }
586 
587             for( int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
588             {
589                 Complex<float>* v = dst + i + j;
590                 const __m128 x_1 = complexMul(&v[nx], &wave[dw]);
591                 const __m128 v_0 = _mm_loadl_pi(z, (const __m64*)&v[0]);
592                 _mm_storel_pi((__m64*)&v[0], _mm_add_ps(v_0, x_1));
593                 _mm_storel_pi((__m64*)&v[nx], _mm_sub_ps(v_0, x_1));
594             }
595         }
596     }
597 };
598 
599 // Optimized radix-3 implementation.
600 template<> struct DFT_VecR3<float> {
operator ()cv::DFT_VecR3601     void operator()(Complex<float>* dst, const int c_n, const int n, const int dw0, const Complex<float>* wave) const {
602         const int nx = n / 3;
603         const __m128 z = _mm_setzero_ps();
604         const __m128 neg_elem1 = _mm_set_ps(0.0f,0.0f,-0.0f,0.0f);
605         const __m128 sin_120 = _mm_set1_ps(Constants<float>::sin_120);
606         const __m128 one_half = _mm_set1_ps(0.5f);
607         for(int i = 0; i < c_n; i += n )
608         {
609             {
610                 Complex<float>* v = dst + i;
611 
612                 float r1 = v[nx].re + v[nx*2].re;
613                 float i1 = v[nx].im + v[nx*2].im;
614                 float r0 = v[0].re;
615                 float i0 = v[0].im;
616                 float r2 = Constants<float>::sin_120*(v[nx].im - v[nx*2].im);
617                 float i2 = Constants<float>::sin_120*(v[nx*2].re - v[nx].re);
618                 v[0].re = r0 + r1; v[0].im = i0 + i1;
619                 r0 -= (float)0.5*r1; i0 -= (float)0.5*i1;
620                 v[nx].re = r0 + r2; v[nx].im = i0 + i2;
621                 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
622             }
623 
624             for(int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
625             {
626                 Complex<float>* v = dst + i + j;
627                 const __m128 x_0 = complexMul(&v[nx], &wave[dw]);
628                 const __m128 x_2 = complexMul(&v[nx*2], &wave[dw*2]);
629                 const __m128 x_1 = _mm_add_ps(x_0, x_2);
630 
631                 const __m128 v_0 = _mm_loadl_pi(z, (const __m64*)&v[0]);
632                 _mm_storel_pi((__m64*)&v[0], _mm_add_ps(v_0, x_1));
633 
634                 const __m128 x_3 = _mm_mul_ps(sin_120, _mm_xor_ps(_mm_sub_ps(x_2, x_0), neg_elem1));
635                 const __m128 x_3s = _mm_shuffle_ps(x_3, x_3, _MM_SHUFFLE(0, 1, 0, 1));
636                 const __m128 x_4 = _mm_sub_ps(v_0, _mm_mul_ps(one_half, x_1));
637                 _mm_storel_pi((__m64*)&v[nx], _mm_add_ps(x_4, x_3s));
638                 _mm_storel_pi((__m64*)&v[nx*2], _mm_sub_ps(x_4, x_3s));
639             }
640         }
641     }
642 };
643 
644 // optimized radix-4 transform
645 template<> struct DFT_VecR4<float>
646 {
operator ()cv::DFT_VecR4647     int operator()(Complex<float>* dst, int N, int n0, int& _dw0, const Complex<float>* wave) const
648     {
649         int n = 1, i, j, nx, dw, dw0 = _dw0;
650         __m128 z = _mm_setzero_ps(), x02=z, x13=z, w01=z, w23=z, y01, y23, t0, t1;
651         Cv32suf t; t.i = 0x80000000;
652         __m128 neg0_mask = _mm_load_ss(&t.f);
653         __m128 neg3_mask = _mm_shuffle_ps(neg0_mask, neg0_mask, _MM_SHUFFLE(0,1,2,3));
654 
655         for( ; n*4 <= N; )
656         {
657             nx = n;
658             n *= 4;
659             dw0 /= 4;
660 
661             for( i = 0; i < n0; i += n )
662             {
663                 Complexf *v0, *v1;
664 
665                 v0 = dst + i;
666                 v1 = v0 + nx*2;
667 
668                 x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]);
669                 x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]);
670                 x02 = _mm_loadh_pi(x02, (const __m64*)&v1[0]);
671                 x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]);
672 
673                 y01 = _mm_add_ps(x02, x13);
674                 y23 = _mm_sub_ps(x02, x13);
675                 t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask);
676                 t0 = _mm_movelh_ps(y01, y23);
677                 y01 = _mm_add_ps(t0, t1);
678                 y23 = _mm_sub_ps(t0, t1);
679 
680                 _mm_storel_pi((__m64*)&v0[0], y01);
681                 _mm_storeh_pi((__m64*)&v0[nx], y01);
682                 _mm_storel_pi((__m64*)&v1[0], y23);
683                 _mm_storeh_pi((__m64*)&v1[nx], y23);
684 
685                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
686                 {
687                     v0 = dst + i + j;
688                     v1 = v0 + nx*2;
689 
690                     x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]);
691                     w23 = _mm_loadl_pi(w23, (const __m64*)&wave[dw*2]);
692                     x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]); // x1, x3 = r1 i1 r3 i3
693                     w23 = _mm_loadh_pi(w23, (const __m64*)&wave[dw*3]); // w2, w3 = wr2 wi2 wr3 wi3
694 
695                     t0 = _mm_mul_ps(_mm_moveldup_ps(x13), w23);
696                     t1 = _mm_mul_ps(_mm_movehdup_ps(x13), _mm_shuffle_ps(w23, w23, _MM_SHUFFLE(2,3,0,1)));
697                     x13 = _mm_addsub_ps(t0, t1);
698                     // re(x1*w2), im(x1*w2), re(x3*w3), im(x3*w3)
699                     x02 = _mm_loadl_pi(x02, (const __m64*)&v1[0]); // x2 = r2 i2
700                     w01 = _mm_loadl_pi(w01, (const __m64*)&wave[dw]); // w1 = wr1 wi1
701                     x02 = _mm_shuffle_ps(x02, x02, _MM_SHUFFLE(0,0,1,1));
702                     w01 = _mm_shuffle_ps(w01, w01, _MM_SHUFFLE(1,0,0,1));
703                     x02 = _mm_mul_ps(x02, w01);
704                     x02 = _mm_addsub_ps(x02, _mm_movelh_ps(x02, x02));
705                     // re(x0) im(x0) re(x2*w1), im(x2*w1)
706                     x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]);
707 
708                     y01 = _mm_add_ps(x02, x13);
709                     y23 = _mm_sub_ps(x02, x13);
710                     t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask);
711                     t0 = _mm_movelh_ps(y01, y23);
712                     y01 = _mm_add_ps(t0, t1);
713                     y23 = _mm_sub_ps(t0, t1);
714 
715                     _mm_storel_pi((__m64*)&v0[0], y01);
716                     _mm_storeh_pi((__m64*)&v0[nx], y01);
717                     _mm_storel_pi((__m64*)&v1[0], y23);
718                     _mm_storeh_pi((__m64*)&v1[nx], y23);
719                 }
720             }
721         }
722 
723         _dw0 = dw0;
724         return n;
725     }
726 };
727 
728 #endif
729 
730 #ifdef USE_IPP_DFT
ippsDFTFwd_CToC(const Complex<float> * src,Complex<float> * dst,const void * spec,uchar * buf)731 static IppStatus ippsDFTFwd_CToC( const Complex<float>* src, Complex<float>* dst,
732                              const void* spec, uchar* buf)
733 {
734     return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_CToC_32fc, (const Ipp32fc*)src, (Ipp32fc*)dst,
735                                  (const IppsDFTSpec_C_32fc*)spec, buf);
736 }
737 
ippsDFTFwd_CToC(const Complex<double> * src,Complex<double> * dst,const void * spec,uchar * buf)738 static IppStatus ippsDFTFwd_CToC( const Complex<double>* src, Complex<double>* dst,
739                              const void* spec, uchar* buf)
740 {
741     return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_CToC_64fc, (const Ipp64fc*)src, (Ipp64fc*)dst,
742                                  (const IppsDFTSpec_C_64fc*)spec, buf);
743 }
744 
ippsDFTInv_CToC(const Complex<float> * src,Complex<float> * dst,const void * spec,uchar * buf)745 static IppStatus ippsDFTInv_CToC( const Complex<float>* src, Complex<float>* dst,
746                              const void* spec, uchar* buf)
747 {
748     return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_CToC_32fc, (const Ipp32fc*)src, (Ipp32fc*)dst,
749                                  (const IppsDFTSpec_C_32fc*)spec, buf);
750 }
751 
ippsDFTInv_CToC(const Complex<double> * src,Complex<double> * dst,const void * spec,uchar * buf)752 static IppStatus ippsDFTInv_CToC( const Complex<double>* src, Complex<double>* dst,
753                                   const void* spec, uchar* buf)
754 {
755     return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_CToC_64fc, (const Ipp64fc*)src, (Ipp64fc*)dst,
756                                  (const IppsDFTSpec_C_64fc*)spec, buf);
757 }
758 
ippsDFTFwd_RToPack(const float * src,float * dst,const void * spec,uchar * buf)759 static IppStatus ippsDFTFwd_RToPack( const float* src, float* dst,
760                                      const void* spec, uchar* buf)
761 {
762     return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_RToPack_32f, src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
763 }
764 
ippsDFTFwd_RToPack(const double * src,double * dst,const void * spec,uchar * buf)765 static IppStatus ippsDFTFwd_RToPack( const double* src, double* dst,
766                                      const void* spec, uchar* buf)
767 {
768     return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_RToPack_64f, src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
769 }
770 
ippsDFTInv_PackToR(const float * src,float * dst,const void * spec,uchar * buf)771 static IppStatus ippsDFTInv_PackToR( const float* src, float* dst,
772                                      const void* spec, uchar* buf)
773 {
774     return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_PackToR_32f, src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
775 }
776 
ippsDFTInv_PackToR(const double * src,double * dst,const void * spec,uchar * buf)777 static IppStatus ippsDFTInv_PackToR( const double* src, double* dst,
778                                      const void* spec, uchar* buf)
779 {
780     return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_PackToR_64f, src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
781 }
782 #endif
783 
784 struct OcvDftOptions;
785 
786 typedef void (*DFTFunc)(const OcvDftOptions & c, const void* src, void* dst);
787 
788 struct OcvDftOptions {
789     int nf;
790     int *factors;
791     double scale;
792 
793     int* itab;
794     void* wave;
795     int tab_size;
796     int n;
797 
798     bool isInverse;
799     bool noPermute;
800     bool isComplex;
801 
802     bool haveSSE3;
803 
804     DFTFunc dft_func;
805     bool useIpp;
806 
807 #ifdef USE_IPP_DFT
808     uchar* ipp_spec;
809     uchar* ipp_work;
810 #endif
811 
OcvDftOptionscv::OcvDftOptions812     OcvDftOptions()
813     {
814         nf = 0;
815         factors = 0;
816         scale = 0;
817         itab = 0;
818         wave = 0;
819         tab_size = 0;
820         n = 0;
821         isInverse = false;
822         noPermute = false;
823         isComplex = false;
824         useIpp = false;
825 #ifdef USE_IPP_DFT
826         ipp_spec = 0;
827         ipp_work = 0;
828 #endif
829         dft_func = 0;
830         haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
831     }
832 };
833 
834 // mixed-radix complex discrete Fourier transform: double-precision version
835 template<typename T> static void
DFT(const OcvDftOptions & c,const Complex<T> * src,Complex<T> * dst)836 DFT(const OcvDftOptions & c, const Complex<T>* src, Complex<T>* dst)
837 {
838     const Complex<T>* wave = (Complex<T>*)c.wave;
839     const int * itab = c.itab;
840 
841     int n = c.n;
842     int f_idx, nx;
843     int inv = c.isInverse;
844     int dw0 = c.tab_size, dw;
845     int i, j, k;
846     Complex<T> t;
847     T scale = (T)c.scale;
848 
849     if( c.useIpp )
850     {
851 #ifdef USE_IPP_DFT
852         if( !inv )
853         {
854             if (ippsDFTFwd_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0)
855             {
856                 CV_IMPL_ADD(CV_IMPL_IPP);
857                 return;
858             }
859         }
860         else
861         {
862             if (ippsDFTInv_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0)
863             {
864                 CV_IMPL_ADD(CV_IMPL_IPP);
865                 return;
866             }
867         }
868         setIppErrorStatus();
869 #endif
870     }
871 
872     int tab_step = c.tab_size == n ? 1 : c.tab_size == n*2 ? 2 : c.tab_size/n;
873 
874     // 0. shuffle data
875     if( dst != src )
876     {
877         assert( !c.noPermute );
878         if( !inv )
879         {
880             for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
881             {
882                 int k0 = itab[0], k1 = itab[tab_step];
883                 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
884                 dst[i] = src[k0]; dst[i+1] = src[k1];
885             }
886 
887             if( i < n )
888                 dst[n-1] = src[n-1];
889         }
890         else
891         {
892             for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
893             {
894                 int k0 = itab[0], k1 = itab[tab_step];
895                 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
896                 t.re = src[k0].re; t.im = -src[k0].im;
897                 dst[i] = t;
898                 t.re = src[k1].re; t.im = -src[k1].im;
899                 dst[i+1] = t;
900             }
901 
902             if( i < n )
903             {
904                 t.re = src[n-1].re; t.im = -src[n-1].im;
905                 dst[i] = t;
906             }
907         }
908     }
909     else
910     {
911         if( !c.noPermute )
912         {
913             CV_Assert( c.factors[0] == c.factors[c.nf-1] );
914             if( c.nf == 1 )
915             {
916                 if( (n & 3) == 0 )
917                 {
918                     int n2 = n/2;
919                     Complex<T>* dsth = dst + n2;
920 
921                     for( i = 0; i < n2; i += 2, itab += tab_step*2 )
922                     {
923                         j = itab[0];
924                         assert( (unsigned)j < (unsigned)n2 );
925 
926                         CV_SWAP(dst[i+1], dsth[j], t);
927                         if( j > i )
928                         {
929                             CV_SWAP(dst[i], dst[j], t);
930                             CV_SWAP(dsth[i+1], dsth[j+1], t);
931                         }
932                     }
933                 }
934                 // else do nothing
935             }
936             else
937             {
938                 for( i = 0; i < n; i++, itab += tab_step )
939                 {
940                     j = itab[0];
941                     assert( (unsigned)j < (unsigned)n );
942                     if( j > i )
943                         CV_SWAP(dst[i], dst[j], t);
944                 }
945             }
946         }
947 
948         if( inv )
949         {
950             for( i = 0; i <= n - 2; i += 2 )
951             {
952                 T t0 = -dst[i].im;
953                 T t1 = -dst[i+1].im;
954                 dst[i].im = t0; dst[i+1].im = t1;
955             }
956 
957             if( i < n )
958                 dst[n-1].im = -dst[n-1].im;
959         }
960     }
961 
962     n = 1;
963     // 1. power-2 transforms
964     if( (c.factors[0] & 1) == 0 )
965     {
966         if( c.factors[0] >= 4 && c.haveSSE3)
967         {
968             DFT_VecR4<T> vr4;
969             n = vr4(dst, c.factors[0], c.n, dw0, wave);
970         }
971 
972         // radix-4 transform
973         for( ; n*4 <= c.factors[0]; )
974         {
975             nx = n;
976             n *= 4;
977             dw0 /= 4;
978 
979             for( i = 0; i < c.n; i += n )
980             {
981                 Complex<T> *v0, *v1;
982                 T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4;
983 
984                 v0 = dst + i;
985                 v1 = v0 + nx*2;
986 
987                 r0 = v1[0].re; i0 = v1[0].im;
988                 r4 = v1[nx].re; i4 = v1[nx].im;
989 
990                 r1 = r0 + r4; i1 = i0 + i4;
991                 r3 = i0 - i4; i3 = r4 - r0;
992 
993                 r2 = v0[0].re; i2 = v0[0].im;
994                 r4 = v0[nx].re; i4 = v0[nx].im;
995 
996                 r0 = r2 + r4; i0 = i2 + i4;
997                 r2 -= r4; i2 -= i4;
998 
999                 v0[0].re = r0 + r1; v0[0].im = i0 + i1;
1000                 v1[0].re = r0 - r1; v1[0].im = i0 - i1;
1001                 v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
1002                 v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
1003 
1004                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
1005                 {
1006                     v0 = dst + i + j;
1007                     v1 = v0 + nx*2;
1008 
1009                     r2 = v0[nx].re*wave[dw*2].re - v0[nx].im*wave[dw*2].im;
1010                     i2 = v0[nx].re*wave[dw*2].im + v0[nx].im*wave[dw*2].re;
1011                     r0 = v1[0].re*wave[dw].im + v1[0].im*wave[dw].re;
1012                     i0 = v1[0].re*wave[dw].re - v1[0].im*wave[dw].im;
1013                     r3 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
1014                     i3 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
1015 
1016                     r1 = i0 + i3; i1 = r0 + r3;
1017                     r3 = r0 - r3; i3 = i3 - i0;
1018                     r4 = v0[0].re; i4 = v0[0].im;
1019 
1020                     r0 = r4 + r2; i0 = i4 + i2;
1021                     r2 = r4 - r2; i2 = i4 - i2;
1022 
1023                     v0[0].re = r0 + r1; v0[0].im = i0 + i1;
1024                     v1[0].re = r0 - r1; v1[0].im = i0 - i1;
1025                     v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
1026                     v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
1027                 }
1028             }
1029         }
1030 
1031         for( ; n < c.factors[0]; )
1032         {
1033             // do the remaining radix-2 transform
1034             n *= 2;
1035             dw0 /= 2;
1036 
1037             if(c.haveSSE3)
1038             {
1039                 DFT_VecR2<T> vr2;
1040                 vr2(dst, c.n, n, dw0, wave);
1041             }
1042             else
1043             {
1044                 DFT_R2<T> vr2;
1045                 vr2(dst, c.n, n, dw0, wave);
1046             }
1047         }
1048     }
1049 
1050     // 2. all the other transforms
1051     for( f_idx = (c.factors[0]&1) ? 0 : 1; f_idx < c.nf; f_idx++ )
1052     {
1053         int factor = c.factors[f_idx];
1054         nx = n;
1055         n *= factor;
1056         dw0 /= factor;
1057 
1058         if( factor == 3 )
1059         {
1060             if(c.haveSSE3)
1061             {
1062                 DFT_VecR3<T> vr3;
1063                 vr3(dst, c.n, n, dw0, wave);
1064             }
1065             else
1066             {
1067                 DFT_R3<T> vr3;
1068                 vr3(dst, c.n, n, dw0, wave);
1069             }
1070         }
1071         else if( factor == 5 )
1072         {
1073             DFT_R5<T> vr5;
1074             vr5(dst, c.n, n, dw0, wave);
1075         }
1076         else
1077         {
1078             // radix-"factor" - an odd number
1079             int p, q, factor2 = (factor - 1)/2;
1080             int d, dd, dw_f = c.tab_size/factor;
1081             AutoBuffer<Complex<T> > buf(factor2 * 2);
1082             Complex<T>* a = buf.data();
1083             Complex<T>* b = a + factor2;
1084 
1085             for( i = 0; i < c.n; i += n )
1086             {
1087                 for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
1088                 {
1089                     Complex<T>* v = dst + i + j;
1090                     Complex<T> v_0 = v[0];
1091                     Complex<T> vn_0 = v_0;
1092 
1093                     if( j == 0 )
1094                     {
1095                         for( p = 1, k = nx; p <= factor2; p++, k += nx )
1096                         {
1097                             T r0 = v[k].re + v[n-k].re;
1098                             T i0 = v[k].im - v[n-k].im;
1099                             T r1 = v[k].re - v[n-k].re;
1100                             T i1 = v[k].im + v[n-k].im;
1101 
1102                             vn_0.re += r0; vn_0.im += i1;
1103                             a[p-1].re = r0; a[p-1].im = i0;
1104                             b[p-1].re = r1; b[p-1].im = i1;
1105                         }
1106                     }
1107                     else
1108                     {
1109                         const Complex<T>* wave_ = wave + dw*factor;
1110                         d = dw;
1111 
1112                         for( p = 1, k = nx; p <= factor2; p++, k += nx, d += dw )
1113                         {
1114                             T r2 = v[k].re*wave[d].re - v[k].im*wave[d].im;
1115                             T i2 = v[k].re*wave[d].im + v[k].im*wave[d].re;
1116 
1117                             T r1 = v[n-k].re*wave_[-d].re - v[n-k].im*wave_[-d].im;
1118                             T i1 = v[n-k].re*wave_[-d].im + v[n-k].im*wave_[-d].re;
1119 
1120                             T r0 = r2 + r1;
1121                             T i0 = i2 - i1;
1122                             r1 = r2 - r1;
1123                             i1 = i2 + i1;
1124 
1125                             vn_0.re += r0; vn_0.im += i1;
1126                             a[p-1].re = r0; a[p-1].im = i0;
1127                             b[p-1].re = r1; b[p-1].im = i1;
1128                         }
1129                     }
1130 
1131                     v[0] = vn_0;
1132 
1133                     for( p = 1, k = nx; p <= factor2; p++, k += nx )
1134                     {
1135                         Complex<T> s0 = v_0, s1 = v_0;
1136                         d = dd = dw_f*p;
1137 
1138                         for( q = 0; q < factor2; q++ )
1139                         {
1140                             T r0 = wave[d].re * a[q].re;
1141                             T i0 = wave[d].im * a[q].im;
1142                             T r1 = wave[d].re * b[q].im;
1143                             T i1 = wave[d].im * b[q].re;
1144 
1145                             s1.re += r0 + i0; s0.re += r0 - i0;
1146                             s1.im += r1 - i1; s0.im += r1 + i1;
1147 
1148                             d += dd;
1149                             d -= -(d >= c.tab_size) & c.tab_size;
1150                         }
1151 
1152                         v[k] = s0;
1153                         v[n-k] = s1;
1154                     }
1155                 }
1156             }
1157         }
1158     }
1159 
1160     if( scale != 1 )
1161     {
1162         T re_scale = scale, im_scale = scale;
1163         if( inv )
1164             im_scale = -im_scale;
1165 
1166         for( i = 0; i < c.n; i++ )
1167         {
1168             T t0 = dst[i].re*re_scale;
1169             T t1 = dst[i].im*im_scale;
1170             dst[i].re = t0;
1171             dst[i].im = t1;
1172         }
1173     }
1174     else if( inv )
1175     {
1176         for( i = 0; i <= c.n - 2; i += 2 )
1177         {
1178             T t0 = -dst[i].im;
1179             T t1 = -dst[i+1].im;
1180             dst[i].im = t0;
1181             dst[i+1].im = t1;
1182         }
1183 
1184         if( i < c.n )
1185             dst[c.n-1].im = -dst[c.n-1].im;
1186     }
1187 }
1188 
1189 
1190 /* FFT of real vector
1191    output vector format:
1192      re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ...
1193      re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
1194 template<typename T> static void
RealDFT(const OcvDftOptions & c,const T * src,T * dst)1195 RealDFT(const OcvDftOptions & c, const T* src, T* dst)
1196 {
1197     int n = c.n;
1198     int complex_output = c.isComplex;
1199     T scale = (T)c.scale;
1200     int j;
1201     dst += complex_output;
1202 
1203     if( c.useIpp )
1204     {
1205 #ifdef USE_IPP_DFT
1206         if (ippsDFTFwd_RToPack( src, dst, c.ipp_spec, c.ipp_work ) >=0)
1207         {
1208             if( complex_output )
1209             {
1210                 dst[-1] = dst[0];
1211                 dst[0] = 0;
1212                 if( (n & 1) == 0 )
1213                     dst[n] = 0;
1214             }
1215             CV_IMPL_ADD(CV_IMPL_IPP);
1216             return;
1217         }
1218         setIppErrorStatus();
1219 #endif
1220     }
1221     assert( c.tab_size == n );
1222 
1223     if( n == 1 )
1224     {
1225         dst[0] = src[0]*scale;
1226     }
1227     else if( n == 2 )
1228     {
1229         T t = (src[0] + src[1])*scale;
1230         dst[1] = (src[0] - src[1])*scale;
1231         dst[0] = t;
1232     }
1233     else if( n & 1 )
1234     {
1235         dst -= complex_output;
1236         Complex<T>* _dst = (Complex<T>*)dst;
1237         _dst[0].re = src[0]*scale;
1238         _dst[0].im = 0;
1239         for( j = 1; j < n; j += 2 )
1240         {
1241             T t0 = src[c.itab[j]]*scale;
1242             T t1 = src[c.itab[j+1]]*scale;
1243             _dst[j].re = t0;
1244             _dst[j].im = 0;
1245             _dst[j+1].re = t1;
1246             _dst[j+1].im = 0;
1247         }
1248         OcvDftOptions sub_c = c;
1249         sub_c.isComplex = false;
1250         sub_c.isInverse = false;
1251         sub_c.noPermute = true;
1252         sub_c.scale = 1.;
1253         DFT(sub_c, _dst, _dst);
1254         if( !complex_output )
1255             dst[1] = dst[0];
1256     }
1257     else
1258     {
1259         T t0, t;
1260         T h1_re, h1_im, h2_re, h2_im;
1261         T scale2 = scale*(T)0.5;
1262         int n2 = n >> 1;
1263 
1264         c.factors[0] >>= 1;
1265 
1266         OcvDftOptions sub_c = c;
1267         sub_c.factors += (c.factors[0] == 1);
1268         sub_c.nf -= (c.factors[0] == 1);
1269         sub_c.isComplex = false;
1270         sub_c.isInverse = false;
1271         sub_c.noPermute = false;
1272         sub_c.scale = 1.;
1273         sub_c.n = n2;
1274 
1275         DFT(sub_c, (Complex<T>*)src, (Complex<T>*)dst);
1276 
1277         c.factors[0] <<= 1;
1278 
1279         t = dst[0] - dst[1];
1280         dst[0] = (dst[0] + dst[1])*scale;
1281         dst[1] = t*scale;
1282 
1283         t0 = dst[n2];
1284         t = dst[n-1];
1285         dst[n-1] = dst[1];
1286 
1287         const Complex<T> *wave = (const Complex<T>*)c.wave;
1288 
1289         for( j = 2, wave++; j < n2; j += 2, wave++ )
1290         {
1291             /* calc odd */
1292             h2_re = scale2*(dst[j+1] + t);
1293             h2_im = scale2*(dst[n-j] - dst[j]);
1294 
1295             /* calc even */
1296             h1_re = scale2*(dst[j] + dst[n-j]);
1297             h1_im = scale2*(dst[j+1] - t);
1298 
1299             /* rotate */
1300             t = h2_re*wave->re - h2_im*wave->im;
1301             h2_im = h2_re*wave->im + h2_im*wave->re;
1302             h2_re = t;
1303             t = dst[n-j-1];
1304 
1305             dst[j-1] = h1_re + h2_re;
1306             dst[n-j-1] = h1_re - h2_re;
1307             dst[j] = h1_im + h2_im;
1308             dst[n-j] = h2_im - h1_im;
1309         }
1310 
1311         if( j <= n2 )
1312         {
1313             dst[n2-1] = t0*scale;
1314             dst[n2] = -t*scale;
1315         }
1316     }
1317 
1318     if( complex_output && ((n & 1) == 0 || n == 1))
1319     {
1320         dst[-1] = dst[0];
1321         dst[0] = 0;
1322         if( n > 1 )
1323             dst[n] = 0;
1324     }
1325 }
1326 
1327 /* Inverse FFT of complex conjugate-symmetric vector
1328    input vector format:
1329       re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR
1330       re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
1331 template<typename T> static void
CCSIDFT(const OcvDftOptions & c,const T * src,T * dst)1332 CCSIDFT(const OcvDftOptions & c, const T* src, T* dst)
1333 {
1334     int n = c.n;
1335     int complex_input = c.isComplex;
1336     int j, k;
1337     T scale = (T)c.scale;
1338     T save_s1 = 0.;
1339     T t0, t1, t2, t3, t;
1340 
1341     assert( c.tab_size == n );
1342 
1343     if( complex_input )
1344     {
1345         assert( src != dst );
1346         save_s1 = src[1];
1347         ((T*)src)[1] = src[0];
1348         src++;
1349     }
1350     if( c.useIpp )
1351     {
1352 #ifdef USE_IPP_DFT
1353         if (ippsDFTInv_PackToR( src, dst, c.ipp_spec, c.ipp_work ) >=0)
1354         {
1355             if( complex_input )
1356                 ((T*)src)[0] = (T)save_s1;
1357             CV_IMPL_ADD(CV_IMPL_IPP);
1358             return;
1359         }
1360 
1361         setIppErrorStatus();
1362 #endif
1363     }
1364     if( n == 1 )
1365     {
1366         dst[0] = (T)(src[0]*scale);
1367     }
1368     else if( n == 2 )
1369     {
1370         t = (src[0] + src[1])*scale;
1371         dst[1] = (src[0] - src[1])*scale;
1372         dst[0] = t;
1373     }
1374     else if( n & 1 )
1375     {
1376         Complex<T>* _src = (Complex<T>*)(src-1);
1377         Complex<T>* _dst = (Complex<T>*)dst;
1378 
1379         _dst[0].re = src[0];
1380         _dst[0].im = 0;
1381 
1382         int n2 = (n+1) >> 1;
1383 
1384         for( j = 1; j < n2; j++ )
1385         {
1386             int k0 = c.itab[j], k1 = c.itab[n-j];
1387             t0 = _src[j].re; t1 = _src[j].im;
1388             _dst[k0].re = t0; _dst[k0].im = -t1;
1389             _dst[k1].re = t0; _dst[k1].im = t1;
1390         }
1391 
1392         OcvDftOptions sub_c = c;
1393         sub_c.isComplex = false;
1394         sub_c.isInverse = false;
1395         sub_c.noPermute = true;
1396         sub_c.scale = 1.;
1397         sub_c.n = n;
1398 
1399         DFT(sub_c, _dst, _dst);
1400         dst[0] *= scale;
1401         for( j = 1; j < n; j += 2 )
1402         {
1403             t0 = dst[j*2]*scale;
1404             t1 = dst[j*2+2]*scale;
1405             dst[j] = t0;
1406             dst[j+1] = t1;
1407         }
1408     }
1409     else
1410     {
1411         int inplace = src == dst;
1412         const Complex<T>* w = (const Complex<T>*)c.wave;
1413 
1414         t = src[1];
1415         t0 = (src[0] + src[n-1]);
1416         t1 = (src[n-1] - src[0]);
1417         dst[0] = t0;
1418         dst[1] = t1;
1419 
1420         int n2 = (n+1) >> 1;
1421 
1422         for( j = 2, w++; j < n2; j += 2, w++ )
1423         {
1424             T h1_re, h1_im, h2_re, h2_im;
1425 
1426             h1_re = (t + src[n-j-1]);
1427             h1_im = (src[j] - src[n-j]);
1428 
1429             h2_re = (t - src[n-j-1]);
1430             h2_im = (src[j] + src[n-j]);
1431 
1432             t = h2_re*w->re + h2_im*w->im;
1433             h2_im = h2_im*w->re - h2_re*w->im;
1434             h2_re = t;
1435 
1436             t = src[j+1];
1437             t0 = h1_re - h2_im;
1438             t1 = -h1_im - h2_re;
1439             t2 = h1_re + h2_im;
1440             t3 = h1_im - h2_re;
1441 
1442             if( inplace )
1443             {
1444                 dst[j] = t0;
1445                 dst[j+1] = t1;
1446                 dst[n-j] = t2;
1447                 dst[n-j+1]= t3;
1448             }
1449             else
1450             {
1451                 int j2 = j >> 1;
1452                 k = c.itab[j2];
1453                 dst[k] = t0;
1454                 dst[k+1] = t1;
1455                 k = c.itab[n2-j2];
1456                 dst[k] = t2;
1457                 dst[k+1]= t3;
1458             }
1459         }
1460 
1461         if( j <= n2 )
1462         {
1463             t0 = t*2;
1464             t1 = src[n2]*2;
1465 
1466             if( inplace )
1467             {
1468                 dst[n2] = t0;
1469                 dst[n2+1] = t1;
1470             }
1471             else
1472             {
1473                 k = c.itab[n2];
1474                 dst[k*2] = t0;
1475                 dst[k*2+1] = t1;
1476             }
1477         }
1478 
1479         c.factors[0] >>= 1;
1480 
1481         OcvDftOptions sub_c = c;
1482         sub_c.factors += (c.factors[0] == 1);
1483         sub_c.nf -= (c.factors[0] == 1);
1484         sub_c.isComplex = false;
1485         sub_c.isInverse = false;
1486         sub_c.noPermute = !inplace;
1487         sub_c.scale = 1.;
1488         sub_c.n = n2;
1489 
1490         DFT(sub_c, (Complex<T>*)dst, (Complex<T>*)dst);
1491 
1492         c.factors[0] <<= 1;
1493 
1494         for( j = 0; j < n; j += 2 )
1495         {
1496             t0 = dst[j]*scale;
1497             t1 = dst[j+1]*(-scale);
1498             dst[j] = t0;
1499             dst[j+1] = t1;
1500         }
1501     }
1502     if( complex_input )
1503         ((T*)src)[0] = (T)save_s1;
1504 }
1505 
1506 static void
CopyColumn(const uchar * _src,size_t src_step,uchar * _dst,size_t dst_step,int len,size_t elem_size)1507 CopyColumn( const uchar* _src, size_t src_step,
1508             uchar* _dst, size_t dst_step,
1509             int len, size_t elem_size )
1510 {
1511     int i, t0, t1;
1512     const int* src = (const int*)_src;
1513     int* dst = (int*)_dst;
1514     src_step /= sizeof(src[0]);
1515     dst_step /= sizeof(dst[0]);
1516 
1517     if( elem_size == sizeof(int) )
1518     {
1519         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1520             dst[0] = src[0];
1521     }
1522     else if( elem_size == sizeof(int)*2 )
1523     {
1524         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1525         {
1526             t0 = src[0]; t1 = src[1];
1527             dst[0] = t0; dst[1] = t1;
1528         }
1529     }
1530     else if( elem_size == sizeof(int)*4 )
1531     {
1532         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1533         {
1534             t0 = src[0]; t1 = src[1];
1535             dst[0] = t0; dst[1] = t1;
1536             t0 = src[2]; t1 = src[3];
1537             dst[2] = t0; dst[3] = t1;
1538         }
1539     }
1540 }
1541 
1542 
1543 static void
CopyFrom2Columns(const uchar * _src,size_t src_step,uchar * _dst0,uchar * _dst1,int len,size_t elem_size)1544 CopyFrom2Columns( const uchar* _src, size_t src_step,
1545                   uchar* _dst0, uchar* _dst1,
1546                   int len, size_t elem_size )
1547 {
1548     int i, t0, t1;
1549     const int* src = (const int*)_src;
1550     int* dst0 = (int*)_dst0;
1551     int* dst1 = (int*)_dst1;
1552     src_step /= sizeof(src[0]);
1553 
1554     if( elem_size == sizeof(int) )
1555     {
1556         for( i = 0; i < len; i++, src += src_step )
1557         {
1558             t0 = src[0]; t1 = src[1];
1559             dst0[i] = t0; dst1[i] = t1;
1560         }
1561     }
1562     else if( elem_size == sizeof(int)*2 )
1563     {
1564         for( i = 0; i < len*2; i += 2, src += src_step )
1565         {
1566             t0 = src[0]; t1 = src[1];
1567             dst0[i] = t0; dst0[i+1] = t1;
1568             t0 = src[2]; t1 = src[3];
1569             dst1[i] = t0; dst1[i+1] = t1;
1570         }
1571     }
1572     else if( elem_size == sizeof(int)*4 )
1573     {
1574         for( i = 0; i < len*4; i += 4, src += src_step )
1575         {
1576             t0 = src[0]; t1 = src[1];
1577             dst0[i] = t0; dst0[i+1] = t1;
1578             t0 = src[2]; t1 = src[3];
1579             dst0[i+2] = t0; dst0[i+3] = t1;
1580             t0 = src[4]; t1 = src[5];
1581             dst1[i] = t0; dst1[i+1] = t1;
1582             t0 = src[6]; t1 = src[7];
1583             dst1[i+2] = t0; dst1[i+3] = t1;
1584         }
1585     }
1586 }
1587 
1588 
1589 static void
CopyTo2Columns(const uchar * _src0,const uchar * _src1,uchar * _dst,size_t dst_step,int len,size_t elem_size)1590 CopyTo2Columns( const uchar* _src0, const uchar* _src1,
1591                 uchar* _dst, size_t dst_step,
1592                 int len, size_t elem_size )
1593 {
1594     int i, t0, t1;
1595     const int* src0 = (const int*)_src0;
1596     const int* src1 = (const int*)_src1;
1597     int* dst = (int*)_dst;
1598     dst_step /= sizeof(dst[0]);
1599 
1600     if( elem_size == sizeof(int) )
1601     {
1602         for( i = 0; i < len; i++, dst += dst_step )
1603         {
1604             t0 = src0[i]; t1 = src1[i];
1605             dst[0] = t0; dst[1] = t1;
1606         }
1607     }
1608     else if( elem_size == sizeof(int)*2 )
1609     {
1610         for( i = 0; i < len*2; i += 2, dst += dst_step )
1611         {
1612             t0 = src0[i]; t1 = src0[i+1];
1613             dst[0] = t0; dst[1] = t1;
1614             t0 = src1[i]; t1 = src1[i+1];
1615             dst[2] = t0; dst[3] = t1;
1616         }
1617     }
1618     else if( elem_size == sizeof(int)*4 )
1619     {
1620         for( i = 0; i < len*4; i += 4, dst += dst_step )
1621         {
1622             t0 = src0[i]; t1 = src0[i+1];
1623             dst[0] = t0; dst[1] = t1;
1624             t0 = src0[i+2]; t1 = src0[i+3];
1625             dst[2] = t0; dst[3] = t1;
1626             t0 = src1[i]; t1 = src1[i+1];
1627             dst[4] = t0; dst[5] = t1;
1628             t0 = src1[i+2]; t1 = src1[i+3];
1629             dst[6] = t0; dst[7] = t1;
1630         }
1631     }
1632 }
1633 
1634 
1635 static void
ExpandCCS(uchar * _ptr,int n,int elem_size)1636 ExpandCCS( uchar* _ptr, int n, int elem_size )
1637 {
1638     int i;
1639     if( elem_size == (int)sizeof(float) )
1640     {
1641         float* p = (float*)_ptr;
1642         for( i = 1; i < (n+1)/2; i++ )
1643         {
1644             p[(n-i)*2] = p[i*2-1];
1645             p[(n-i)*2+1] = -p[i*2];
1646         }
1647         if( (n & 1) == 0 )
1648         {
1649             p[n] = p[n-1];
1650             p[n+1] = 0.f;
1651             n--;
1652         }
1653         for( i = n-1; i > 0; i-- )
1654             p[i+1] = p[i];
1655         p[1] = 0.f;
1656     }
1657     else
1658     {
1659         double* p = (double*)_ptr;
1660         for( i = 1; i < (n+1)/2; i++ )
1661         {
1662             p[(n-i)*2] = p[i*2-1];
1663             p[(n-i)*2+1] = -p[i*2];
1664         }
1665         if( (n & 1) == 0 )
1666         {
1667             p[n] = p[n-1];
1668             p[n+1] = 0.f;
1669             n--;
1670         }
1671         for( i = n-1; i > 0; i-- )
1672             p[i+1] = p[i];
1673         p[1] = 0.f;
1674     }
1675 }
1676 
DFT_32f(const OcvDftOptions & c,const Complexf * src,Complexf * dst)1677 static void DFT_32f(const OcvDftOptions & c, const Complexf* src, Complexf* dst)
1678 {
1679     DFT(c, src, dst);
1680 }
1681 
DFT_64f(const OcvDftOptions & c,const Complexd * src,Complexd * dst)1682 static void DFT_64f(const OcvDftOptions & c, const Complexd* src, Complexd* dst)
1683 {
1684     DFT(c, src, dst);
1685 }
1686 
1687 
RealDFT_32f(const OcvDftOptions & c,const float * src,float * dst)1688 static void RealDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
1689 {
1690     RealDFT(c, src, dst);
1691 }
1692 
RealDFT_64f(const OcvDftOptions & c,const double * src,double * dst)1693 static void RealDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
1694 {
1695     RealDFT(c, src, dst);
1696 }
1697 
CCSIDFT_32f(const OcvDftOptions & c,const float * src,float * dst)1698 static void CCSIDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
1699 {
1700     CCSIDFT(c, src, dst);
1701 }
1702 
CCSIDFT_64f(const OcvDftOptions & c,const double * src,double * dst)1703 static void CCSIDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
1704 {
1705     CCSIDFT(c, src, dst);
1706 }
1707 
1708 }
1709 
1710 #ifdef USE_IPP_DFT
1711 typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*);
1712 typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
1713 #endif
1714 
1715 namespace cv
1716 {
1717 #if defined USE_IPP_DFT
1718 
1719 typedef IppStatus (CV_STDCALL* ippiDFT_C_Func)(const Ipp32fc*, int, Ipp32fc*, int, const IppiDFTSpec_C_32fc*, Ipp8u*);
1720 typedef IppStatus (CV_STDCALL* ippiDFT_R_Func)(const Ipp32f* , int, Ipp32f* , int, const IppiDFTSpec_R_32f* , Ipp8u*);
1721 
1722 template <typename Dft>
1723 class Dft_C_IPPLoop_Invoker : public ParallelLoopBody
1724 {
1725 public:
1726 
Dft_C_IPPLoop_Invoker(const uchar * _src,size_t _src_step,uchar * _dst,size_t _dst_step,int _width,const Dft & _ippidft,int _norm_flag,bool * _ok)1727     Dft_C_IPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width,
1728                           const Dft& _ippidft, int _norm_flag, bool *_ok) :
1729         ParallelLoopBody(),
1730         src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
1731         ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
1732     {
1733         *ok = true;
1734     }
1735 
operator ()(const Range & range) const1736     virtual void operator()(const Range& range) const CV_OVERRIDE
1737     {
1738         IppStatus status;
1739         Ipp8u* pBuffer = 0;
1740         Ipp8u* pMemInit= 0;
1741         int sizeBuffer=0;
1742         int sizeSpec=0;
1743         int sizeInit=0;
1744 
1745         IppiSize srcRoiSize = {width, 1};
1746 
1747         status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1748         if ( status < 0 )
1749         {
1750             *ok = false;
1751             return;
1752         }
1753 
1754         IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)CV_IPP_MALLOC( sizeSpec );
1755 
1756         if ( sizeInit > 0 )
1757             pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1758 
1759         if ( sizeBuffer > 0 )
1760             pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1761 
1762         status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1763 
1764         if ( sizeInit > 0 )
1765             ippFree( pMemInit );
1766 
1767         if ( status < 0 )
1768         {
1769             ippFree( pDFTSpec );
1770             if ( sizeBuffer > 0 )
1771                 ippFree( pBuffer );
1772             *ok = false;
1773             return;
1774         }
1775 
1776         for( int i = range.start; i < range.end; ++i)
1777             if(!ippidft((Ipp32fc*)(src + src_step * i), src_step, (Ipp32fc*)(dst + dst_step * i), dst_step,
1778                         pDFTSpec, (Ipp8u*)pBuffer))
1779             {
1780                 *ok = false;
1781             }
1782 
1783         if ( sizeBuffer > 0 )
1784             ippFree( pBuffer );
1785 
1786         ippFree( pDFTSpec );
1787         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1788     }
1789 
1790 private:
1791     const uchar * src;
1792     size_t src_step;
1793     uchar * dst;
1794     size_t dst_step;
1795     int width;
1796     const Dft& ippidft;
1797     int norm_flag;
1798     bool *ok;
1799 
1800     const Dft_C_IPPLoop_Invoker& operator= (const Dft_C_IPPLoop_Invoker&);
1801 };
1802 
1803 template <typename Dft>
1804 class Dft_R_IPPLoop_Invoker : public ParallelLoopBody
1805 {
1806 public:
1807 
Dft_R_IPPLoop_Invoker(const uchar * _src,size_t _src_step,uchar * _dst,size_t _dst_step,int _width,const Dft & _ippidft,int _norm_flag,bool * _ok)1808     Dft_R_IPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width,
1809                           const Dft& _ippidft, int _norm_flag, bool *_ok) :
1810         ParallelLoopBody(),
1811         src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
1812         ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
1813     {
1814         *ok = true;
1815     }
1816 
operator ()(const Range & range) const1817     virtual void operator()(const Range& range) const CV_OVERRIDE
1818     {
1819         IppStatus status;
1820         Ipp8u* pBuffer = 0;
1821         Ipp8u* pMemInit= 0;
1822         int sizeBuffer=0;
1823         int sizeSpec=0;
1824         int sizeInit=0;
1825 
1826         IppiSize srcRoiSize = {width, 1};
1827 
1828         status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1829         if ( status < 0 )
1830         {
1831             *ok = false;
1832             return;
1833         }
1834 
1835         IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)CV_IPP_MALLOC( sizeSpec );
1836 
1837         if ( sizeInit > 0 )
1838             pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1839 
1840         if ( sizeBuffer > 0 )
1841             pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1842 
1843         status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1844 
1845         if ( sizeInit > 0 )
1846             ippFree( pMemInit );
1847 
1848         if ( status < 0 )
1849         {
1850             ippFree( pDFTSpec );
1851             if ( sizeBuffer > 0 )
1852                 ippFree( pBuffer );
1853             *ok = false;
1854             return;
1855         }
1856 
1857         for( int i = range.start; i < range.end; ++i)
1858             if(!ippidft((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step,
1859                         pDFTSpec, (Ipp8u*)pBuffer))
1860             {
1861                 *ok = false;
1862             }
1863 
1864         if ( sizeBuffer > 0 )
1865             ippFree( pBuffer );
1866 
1867         ippFree( pDFTSpec );
1868         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1869     }
1870 
1871 private:
1872     const uchar * src;
1873     size_t src_step;
1874     uchar * dst;
1875     size_t dst_step;
1876     int width;
1877     const Dft& ippidft;
1878     int norm_flag;
1879     bool *ok;
1880 
1881     const Dft_R_IPPLoop_Invoker& operator= (const Dft_R_IPPLoop_Invoker&);
1882 };
1883 
1884 template <typename Dft>
Dft_C_IPPLoop(const uchar * src,size_t src_step,uchar * dst,size_t dst_step,int width,int height,const Dft & ippidft,int norm_flag)1885 bool Dft_C_IPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, const Dft& ippidft, int norm_flag)
1886 {
1887     bool ok;
1888     parallel_for_(Range(0, height), Dft_C_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
1889     return ok;
1890 }
1891 
1892 template <typename Dft>
Dft_R_IPPLoop(const uchar * src,size_t src_step,uchar * dst,size_t dst_step,int width,int height,const Dft & ippidft,int norm_flag)1893 bool Dft_R_IPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, const Dft& ippidft, int norm_flag)
1894 {
1895     bool ok;
1896     parallel_for_(Range(0, height), Dft_R_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
1897     return ok;
1898 }
1899 
1900 struct IPPDFT_C_Functor
1901 {
IPPDFT_C_Functorcv::IPPDFT_C_Functor1902     IPPDFT_C_Functor(ippiDFT_C_Func _func) : ippiDFT_CToC_32fc_C1R(_func){}
1903 
operator ()cv::IPPDFT_C_Functor1904     bool operator()(const Ipp32fc* src, size_t srcStep, Ipp32fc* dst, size_t dstStep, const IppiDFTSpec_C_32fc* pDFTSpec, Ipp8u* pBuffer) const
1905     {
1906         return ippiDFT_CToC_32fc_C1R ? CV_INSTRUMENT_FUN_IPP(ippiDFT_CToC_32fc_C1R, src, static_cast<int>(srcStep), dst, static_cast<int>(dstStep), pDFTSpec, pBuffer) >= 0 : false;
1907     }
1908 private:
1909     ippiDFT_C_Func ippiDFT_CToC_32fc_C1R;
1910 };
1911 
1912 struct IPPDFT_R_Functor
1913 {
IPPDFT_R_Functorcv::IPPDFT_R_Functor1914     IPPDFT_R_Functor(ippiDFT_R_Func _func) : ippiDFT_PackToR_32f_C1R(_func){}
1915 
operator ()cv::IPPDFT_R_Functor1916     bool operator()(const Ipp32f* src, size_t srcStep, Ipp32f* dst, size_t dstStep, const IppiDFTSpec_R_32f* pDFTSpec, Ipp8u* pBuffer) const
1917     {
1918         return ippiDFT_PackToR_32f_C1R ? CV_INSTRUMENT_FUN_IPP(ippiDFT_PackToR_32f_C1R, src, static_cast<int>(srcStep), dst, static_cast<int>(dstStep), pDFTSpec, pBuffer) >= 0 : false;
1919     }
1920 private:
1921     ippiDFT_R_Func ippiDFT_PackToR_32f_C1R;
1922 };
1923 
ippi_DFT_C_32F(const uchar * src,size_t src_step,uchar * dst,size_t dst_step,int width,int height,bool inv,int norm_flag)1924 static bool ippi_DFT_C_32F(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, int norm_flag)
1925 {
1926     CV_INSTRUMENT_REGION_IPP();
1927 
1928     IppStatus status;
1929     Ipp8u* pBuffer = 0;
1930     Ipp8u* pMemInit= 0;
1931     int sizeBuffer=0;
1932     int sizeSpec=0;
1933     int sizeInit=0;
1934 
1935     IppiSize srcRoiSize = {width, height};
1936 
1937     status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1938     if ( status < 0 )
1939         return false;
1940 
1941     IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)CV_IPP_MALLOC( sizeSpec );
1942 
1943     if ( sizeInit > 0 )
1944         pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1945 
1946     if ( sizeBuffer > 0 )
1947         pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1948 
1949     status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1950 
1951     if ( sizeInit > 0 )
1952         ippFree( pMemInit );
1953 
1954     if ( status < 0 )
1955     {
1956         ippFree( pDFTSpec );
1957         if ( sizeBuffer > 0 )
1958             ippFree( pBuffer );
1959         return false;
1960     }
1961 
1962     if (!inv)
1963         status = CV_INSTRUMENT_FUN_IPP(ippiDFTFwd_CToC_32fc_C1R, (Ipp32fc*)src, static_cast<int>(src_step), (Ipp32fc*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
1964     else
1965         status = CV_INSTRUMENT_FUN_IPP(ippiDFTInv_CToC_32fc_C1R, (Ipp32fc*)src, static_cast<int>(src_step), (Ipp32fc*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
1966 
1967     if ( sizeBuffer > 0 )
1968         ippFree( pBuffer );
1969 
1970     ippFree( pDFTSpec );
1971 
1972     if(status >= 0)
1973     {
1974         CV_IMPL_ADD(CV_IMPL_IPP);
1975         return true;
1976     }
1977     return false;
1978 }
1979 
ippi_DFT_R_32F(const uchar * src,size_t src_step,uchar * dst,size_t dst_step,int width,int height,bool inv,int norm_flag)1980 static bool ippi_DFT_R_32F(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, int norm_flag)
1981 {
1982     CV_INSTRUMENT_REGION_IPP();
1983 
1984     IppStatus status;
1985     Ipp8u* pBuffer = 0;
1986     Ipp8u* pMemInit= 0;
1987     int sizeBuffer=0;
1988     int sizeSpec=0;
1989     int sizeInit=0;
1990 
1991     IppiSize srcRoiSize = {width, height};
1992 
1993     status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1994     if ( status < 0 )
1995         return false;
1996 
1997     IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)CV_IPP_MALLOC( sizeSpec );
1998 
1999     if ( sizeInit > 0 )
2000         pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
2001 
2002     if ( sizeBuffer > 0 )
2003         pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
2004 
2005     status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
2006 
2007     if ( sizeInit > 0 )
2008         ippFree( pMemInit );
2009 
2010     if ( status < 0 )
2011     {
2012         ippFree( pDFTSpec );
2013         if ( sizeBuffer > 0 )
2014             ippFree( pBuffer );
2015         return false;
2016     }
2017 
2018     if (!inv)
2019         status = CV_INSTRUMENT_FUN_IPP(ippiDFTFwd_RToPack_32f_C1R, (float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
2020     else
2021         status = CV_INSTRUMENT_FUN_IPP(ippiDFTInv_PackToR_32f_C1R, (float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
2022 
2023     if ( sizeBuffer > 0 )
2024         ippFree( pBuffer );
2025 
2026     ippFree( pDFTSpec );
2027 
2028     if(status >= 0)
2029     {
2030         CV_IMPL_ADD(CV_IMPL_IPP);
2031         return true;
2032     }
2033     return false;
2034 }
2035 
2036 #endif
2037 }
2038 
2039 #ifdef HAVE_OPENCL
2040 
2041 namespace cv
2042 {
2043 
2044 enum FftType
2045 {
2046     R2R = 0, // real to CCS in case forward transform, CCS to real otherwise
2047     C2R = 1, // complex to real in case inverse transform
2048     R2C = 2, // real to complex in case forward transform
2049     C2C = 3  // complex to complex
2050 };
2051 
2052 struct OCL_FftPlan
2053 {
2054 private:
2055     UMat twiddles;
2056     String buildOptions;
2057     int thread_count;
2058     int dft_size;
2059     int dft_depth;
2060     bool status;
2061 
2062 public:
OCL_FftPlancv::OCL_FftPlan2063     OCL_FftPlan(int _size, int _depth) : dft_size(_size), dft_depth(_depth), status(true)
2064     {
2065         CV_Assert( dft_depth == CV_32F || dft_depth == CV_64F );
2066 
2067         int min_radix;
2068         std::vector<int> radixes, blocks;
2069         ocl_getRadixes(dft_size, radixes, blocks, min_radix);
2070         thread_count = dft_size / min_radix;
2071 
2072         if (thread_count > (int) ocl::Device::getDefault().maxWorkGroupSize())
2073         {
2074             status = false;
2075             return;
2076         }
2077 
2078         // generate string with radix calls
2079         String radix_processing;
2080         int n = 1, twiddle_size = 0;
2081         for (size_t i=0; i<radixes.size(); i++)
2082         {
2083             int radix = radixes[i], block = blocks[i];
2084             if (block > 1)
2085                 radix_processing += format("fft_radix%d_B%d(smem,twiddles+%d,ind,%d,%d);", radix, block, twiddle_size, n, dft_size/radix);
2086             else
2087                 radix_processing += format("fft_radix%d(smem,twiddles+%d,ind,%d,%d);", radix, twiddle_size, n, dft_size/radix);
2088             twiddle_size += (radix-1)*n;
2089             n *= radix;
2090         }
2091 
2092         twiddles.create(1, twiddle_size, CV_MAKE_TYPE(dft_depth, 2));
2093         if (dft_depth == CV_32F)
2094             fillRadixTable<float>(twiddles, radixes);
2095         else
2096             fillRadixTable<double>(twiddles, radixes);
2097 
2098         buildOptions = format("-D LOCAL_SIZE=%d -D kercn=%d -D FT=%s -D CT=%s%s -D RADIX_PROCESS=%s",
2099                               dft_size, min_radix, ocl::typeToStr(dft_depth), ocl::typeToStr(CV_MAKE_TYPE(dft_depth, 2)),
2100                               dft_depth == CV_64F ? " -D DOUBLE_SUPPORT" : "", radix_processing.c_str());
2101     }
2102 
enqueueTransformcv::OCL_FftPlan2103     bool enqueueTransform(InputArray _src, OutputArray _dst, int num_dfts, int flags, int fftType, bool rows = true) const
2104     {
2105         if (!status)
2106             return false;
2107 
2108         UMat src = _src.getUMat();
2109         UMat dst = _dst.getUMat();
2110 
2111         size_t globalsize[2];
2112         size_t localsize[2];
2113         String kernel_name;
2114 
2115         bool is1d = (flags & DFT_ROWS) != 0 || num_dfts == 1;
2116         bool inv = (flags & DFT_INVERSE) != 0;
2117         String options = buildOptions;
2118 
2119         if (rows)
2120         {
2121             globalsize[0] = thread_count; globalsize[1] = src.rows;
2122             localsize[0] = thread_count; localsize[1] = 1;
2123             kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows";
2124             if ((is1d || inv) && (flags & DFT_SCALE))
2125                 options += " -D DFT_SCALE";
2126         }
2127         else
2128         {
2129             globalsize[0] = num_dfts; globalsize[1] = thread_count;
2130             localsize[0] = 1; localsize[1] = thread_count;
2131             kernel_name = !inv ? "fft_multi_radix_cols" : "ifft_multi_radix_cols";
2132             if (flags & DFT_SCALE)
2133                 options += " -D DFT_SCALE";
2134         }
2135 
2136         options += src.channels() == 1 ? " -D REAL_INPUT" : " -D COMPLEX_INPUT";
2137         options += dst.channels() == 1 ? " -D REAL_OUTPUT" : " -D COMPLEX_OUTPUT";
2138         options += is1d ? " -D IS_1D" : "";
2139 
2140         if (!inv)
2141         {
2142             if ((is1d && src.channels() == 1) || (rows && (fftType == R2R)))
2143                 options += " -D NO_CONJUGATE";
2144         }
2145         else
2146         {
2147             if (rows && (fftType == C2R || fftType == R2R))
2148                 options += " -D NO_CONJUGATE";
2149             if (dst.cols % 2 == 0)
2150                 options += " -D EVEN";
2151         }
2152 
2153         ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options);
2154         if (k.empty())
2155             return false;
2156 
2157         k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::ReadOnlyNoSize(twiddles), thread_count, num_dfts);
2158         return k.run(2, globalsize, localsize, false);
2159     }
2160 
2161 private:
ocl_getRadixescv::OCL_FftPlan2162     static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>& blocks, int& min_radix)
2163     {
2164         int factors[34];
2165         int nf = DFTFactorize(cols, factors);
2166 
2167         int n = 1;
2168         int factor_index = 0;
2169         min_radix = INT_MAX;
2170 
2171         // 2^n transforms
2172         if ((factors[factor_index] & 1) == 0)
2173         {
2174             for( ; n < factors[factor_index];)
2175             {
2176                 int radix = 2, block = 1;
2177                 if (8*n <= factors[0])
2178                     radix = 8;
2179                 else if (4*n <= factors[0])
2180                 {
2181                     radix = 4;
2182                     if (cols % 12 == 0)
2183                         block = 3;
2184                     else if (cols % 8 == 0)
2185                         block = 2;
2186                 }
2187                 else
2188                 {
2189                     if (cols % 10 == 0)
2190                         block = 5;
2191                     else if (cols % 8 == 0)
2192                         block = 4;
2193                     else if (cols % 6 == 0)
2194                         block = 3;
2195                     else if (cols % 4 == 0)
2196                         block = 2;
2197                 }
2198 
2199                 radixes.push_back(radix);
2200                 blocks.push_back(block);
2201                 min_radix = min(min_radix, block*radix);
2202                 n *= radix;
2203             }
2204             factor_index++;
2205         }
2206 
2207         // all the other transforms
2208         for( ; factor_index < nf; factor_index++)
2209         {
2210             int radix = factors[factor_index], block = 1;
2211             if (radix == 3)
2212             {
2213                 if (cols % 12 == 0)
2214                     block = 4;
2215                 else if (cols % 9 == 0)
2216                     block = 3;
2217                 else if (cols % 6 == 0)
2218                     block = 2;
2219             }
2220             else if (radix == 5)
2221             {
2222                 if (cols % 10 == 0)
2223                     block = 2;
2224             }
2225             radixes.push_back(radix);
2226             blocks.push_back(block);
2227             min_radix = min(min_radix, block*radix);
2228         }
2229     }
2230 
2231     template <typename T>
fillRadixTablecv::OCL_FftPlan2232     static void fillRadixTable(UMat twiddles, const std::vector<int>& radixes)
2233     {
2234         Mat tw = twiddles.getMat(ACCESS_WRITE);
2235         T* ptr = tw.ptr<T>();
2236         int ptr_index = 0;
2237 
2238         int n = 1;
2239         for (size_t i=0; i<radixes.size(); i++)
2240         {
2241             int radix = radixes[i];
2242             n *= radix;
2243 
2244             for (int j=1; j<radix; j++)
2245             {
2246                 double theta = -CV_2PI*j/n;
2247 
2248                 for (int k=0; k<(n/radix); k++)
2249                 {
2250                     ptr[ptr_index++] = (T) cos(k*theta);
2251                     ptr[ptr_index++] = (T) sin(k*theta);
2252                 }
2253             }
2254         }
2255     }
2256 };
2257 
2258 class OCL_FftPlanCache
2259 {
2260 public:
getInstance()2261     static OCL_FftPlanCache & getInstance()
2262     {
2263         CV_SINGLETON_LAZY_INIT_REF(OCL_FftPlanCache, new OCL_FftPlanCache())
2264     }
2265 
getFftPlan(int dft_size,int depth)2266     Ptr<OCL_FftPlan> getFftPlan(int dft_size, int depth)
2267     {
2268         int key = (dft_size << 16) | (depth & 0xFFFF);
2269         std::map<int, Ptr<OCL_FftPlan> >::iterator f = planStorage.find(key);
2270         if (f != planStorage.end())
2271         {
2272             return f->second;
2273         }
2274         else
2275         {
2276             Ptr<OCL_FftPlan> newPlan = Ptr<OCL_FftPlan>(new OCL_FftPlan(dft_size, depth));
2277             planStorage[key] = newPlan;
2278             return newPlan;
2279         }
2280     }
2281 
~OCL_FftPlanCache()2282     ~OCL_FftPlanCache()
2283     {
2284         planStorage.clear();
2285     }
2286 
2287 protected:
OCL_FftPlanCache()2288     OCL_FftPlanCache() :
2289         planStorage()
2290     {
2291     }
2292     std::map<int, Ptr<OCL_FftPlan> > planStorage;
2293 };
2294 
ocl_dft_rows(InputArray _src,OutputArray _dst,int nonzero_rows,int flags,int fftType)2295 static bool ocl_dft_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType)
2296 {
2297     int type = _src.type(), depth = CV_MAT_DEPTH(type);
2298     Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), depth);
2299     return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true);
2300 }
2301 
ocl_dft_cols(InputArray _src,OutputArray _dst,int nonzero_cols,int flags,int fftType)2302 static bool ocl_dft_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType)
2303 {
2304     int type = _src.type(), depth = CV_MAT_DEPTH(type);
2305     Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), depth);
2306     return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false);
2307 }
2308 
determineFFTType(bool real_input,bool complex_input,bool real_output,bool complex_output,bool inv)2309 inline FftType determineFFTType(bool real_input, bool complex_input, bool real_output, bool complex_output, bool inv)
2310 {
2311     // output format is not specified
2312     if (!real_output && !complex_output)
2313         complex_output = true;
2314 
2315     // input or output format is ambiguous
2316     if (real_input == complex_input || real_output == complex_output)
2317         CV_Error(Error::StsBadArg, "Invalid FFT input or output format");
2318 
2319     FftType result = real_input ? (real_output ? R2R : R2C) : (real_output ? C2R : C2C);
2320 
2321     // Forward Complex to CCS not supported
2322     if (result == C2R && !inv)
2323         result = C2C;
2324 
2325     // Inverse CCS to Complex not supported
2326     if (result == R2C && inv)
2327         result = R2R;
2328 
2329     return result;
2330 }
2331 
ocl_dft(InputArray _src,OutputArray _dst,int flags,int nonzero_rows)2332 static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows)
2333 {
2334     int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
2335     Size ssize = _src.size();
2336     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
2337 
2338     if (!(cn == 1 || cn == 2)
2339         || !(depth == CV_32F || (depth == CV_64F && doubleSupport))
2340         || ((flags & DFT_REAL_OUTPUT) && (flags & DFT_COMPLEX_OUTPUT)))
2341         return false;
2342 
2343     // if is not a multiplication of prime numbers { 2, 3, 5 }
2344     if (ssize.area() != getOptimalDFTSize(ssize.area()))
2345         return false;
2346 
2347     UMat src = _src.getUMat();
2348     bool inv = (flags & DFT_INVERSE) != 0 ? 1 : 0;
2349 
2350     if( nonzero_rows <= 0 || nonzero_rows > _src.rows() )
2351         nonzero_rows = _src.rows();
2352     bool is1d = (flags & DFT_ROWS) != 0 || nonzero_rows == 1;
2353 
2354     FftType fftType = determineFFTType(cn == 1, cn == 2,
2355         (flags & DFT_REAL_OUTPUT) != 0, (flags & DFT_COMPLEX_OUTPUT) != 0, inv);
2356 
2357     UMat output;
2358     if (fftType == C2C || fftType == R2C)
2359     {
2360         // complex output
2361         _dst.create(src.size(), CV_MAKETYPE(depth, 2));
2362         output = _dst.getUMat();
2363     }
2364     else
2365     {
2366         // real output
2367         if (is1d)
2368         {
2369             _dst.create(src.size(), CV_MAKETYPE(depth, 1));
2370             output = _dst.getUMat();
2371         }
2372         else
2373         {
2374             _dst.create(src.size(), CV_MAKETYPE(depth, 1));
2375             output.create(src.size(), CV_MAKETYPE(depth, 2));
2376         }
2377     }
2378 
2379     bool result = false;
2380     if (!inv)
2381     {
2382         int nonzero_cols = fftType == R2R ? output.cols/2 + 1 : output.cols;
2383         result = ocl_dft_rows(src, output, nonzero_rows, flags, fftType);
2384         if (!is1d)
2385             result = result && ocl_dft_cols(output, _dst, nonzero_cols, flags, fftType);
2386     }
2387     else
2388     {
2389         if (fftType == C2C)
2390         {
2391             // complex output
2392             result = ocl_dft_rows(src, output, nonzero_rows, flags, fftType);
2393             if (!is1d)
2394                 result = result && ocl_dft_cols(output, output, output.cols, flags, fftType);
2395         }
2396         else
2397         {
2398             if (is1d)
2399             {
2400                 result = ocl_dft_rows(src, output, nonzero_rows, flags, fftType);
2401             }
2402             else
2403             {
2404                 int nonzero_cols = src.cols/2 + 1;
2405                 result = ocl_dft_cols(src, output, nonzero_cols, flags, fftType);
2406                 result = result && ocl_dft_rows(output, _dst, nonzero_rows, flags, fftType);
2407             }
2408         }
2409     }
2410     return result;
2411 }
2412 
2413 } // namespace cv;
2414 
2415 #endif
2416 
2417 #ifdef HAVE_CLAMDFFT
2418 
2419 namespace cv {
2420 
2421 #define CLAMDDFT_Assert(func) \
2422     { \
2423         clfftStatus s = (func); \
2424         CV_Assert(s == CLFFT_SUCCESS); \
2425     }
2426 
2427 class PlanCache
2428 {
2429     struct FftPlan
2430     {
FftPlancv::PlanCache::FftPlan2431         FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) :
2432             dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step),
2433             doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType),
2434             context((cl_context)ocl::Context::getDefault().ptr()), plHandle(0)
2435         {
2436             bool dft_inverse = (flags & DFT_INVERSE) != 0;
2437             bool dft_scale = (flags & DFT_SCALE) != 0;
2438             bool dft_rows = (flags & DFT_ROWS) != 0;
2439 
2440             clfftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL;
2441             clfftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D;
2442 
2443             size_t batchSize = dft_rows ? dft_size.height : 1;
2444             size_t clLengthsIn[3] = { (size_t)dft_size.width, dft_rows ? 1 : (size_t)dft_size.height, 1 };
2445             size_t clStridesIn[3] = { 1, 1, 1 };
2446             size_t clStridesOut[3]  = { 1, 1, 1 };
2447             int elemSize = doubleFP ? sizeof(double) : sizeof(float);
2448 
2449             switch (fftType)
2450             {
2451             case C2C:
2452                 inLayout = CLFFT_COMPLEX_INTERLEAVED;
2453                 outLayout = CLFFT_COMPLEX_INTERLEAVED;
2454                 clStridesIn[1] = src_step / (elemSize << 1);
2455                 clStridesOut[1] = dst_step / (elemSize << 1);
2456                 break;
2457             case R2C:
2458                 inLayout = CLFFT_REAL;
2459                 outLayout = CLFFT_HERMITIAN_INTERLEAVED;
2460                 clStridesIn[1] = src_step / elemSize;
2461                 clStridesOut[1] = dst_step / (elemSize << 1);
2462                 break;
2463             case C2R:
2464                 inLayout = CLFFT_HERMITIAN_INTERLEAVED;
2465                 outLayout = CLFFT_REAL;
2466                 clStridesIn[1] = src_step / (elemSize << 1);
2467                 clStridesOut[1] = dst_step / elemSize;
2468                 break;
2469             case R2R:
2470             default:
2471                 CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type");
2472                 break;
2473             }
2474 
2475             clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1];
2476             clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1];
2477 
2478             CLAMDDFT_Assert(clfftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context::getDefault().ptr(), dim, clLengthsIn))
2479 
2480             // setting plan properties
2481             CLAMDDFT_Assert(clfftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE));
2482             CLAMDDFT_Assert(clfftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE))
2483             CLAMDDFT_Assert(clfftSetLayout(plHandle, inLayout, outLayout))
2484             CLAMDDFT_Assert(clfftSetPlanBatchSize(plHandle, batchSize))
2485             CLAMDDFT_Assert(clfftSetPlanInStride(plHandle, dim, clStridesIn))
2486             CLAMDDFT_Assert(clfftSetPlanOutStride(plHandle, dim, clStridesOut))
2487             CLAMDDFT_Assert(clfftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim]))
2488 
2489             float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f;
2490             CLAMDDFT_Assert(clfftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale))
2491 
2492             // ready to bake
2493             cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
2494             CLAMDDFT_Assert(clfftBakePlan(plHandle, 1, &queue, NULL, NULL))
2495         }
2496 
~FftPlancv::PlanCache::FftPlan2497         ~FftPlan()
2498         {
2499             // Do not tear down clFFT.
2500             // The user application may still use clFFT even after OpenCV is unloaded.
2501             /*clfftDestroyPlan(&plHandle);*/
2502         }
2503 
2504         friend class PlanCache;
2505 
2506     private:
2507         Size dft_size;
2508         int src_step, dst_step;
2509         bool doubleFP;
2510         bool inplace;
2511         int flags;
2512         FftType fftType;
2513 
2514         cl_context context;
2515         clfftPlanHandle plHandle;
2516     };
2517 
2518 public:
getInstance()2519     static PlanCache & getInstance()
2520     {
2521         CV_SINGLETON_LAZY_INIT_REF(PlanCache, new PlanCache())
2522     }
2523 
getPlanHandle(const Size & dft_size,int src_step,int dst_step,bool doubleFP,bool inplace,int flags,FftType fftType)2524     clfftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP,
2525                                   bool inplace, int flags, FftType fftType)
2526     {
2527         cl_context currentContext = (cl_context)ocl::Context::getDefault().ptr();
2528 
2529         for (size_t i = 0, size = planStorage.size(); i < size; ++i)
2530         {
2531             const FftPlan * const plan = planStorage[i];
2532 
2533             if (plan->dft_size == dft_size &&
2534                 plan->flags == flags &&
2535                 plan->src_step == src_step &&
2536                 plan->dst_step == dst_step &&
2537                 plan->doubleFP == doubleFP &&
2538                 plan->fftType == fftType &&
2539                 plan->inplace == inplace)
2540             {
2541                 if (plan->context != currentContext)
2542                 {
2543                     planStorage.erase(planStorage.begin() + i);
2544                     break;
2545                 }
2546 
2547                 return plan->plHandle;
2548             }
2549         }
2550 
2551         // no baked plan is found, so let's create a new one
2552         Ptr<FftPlan> newPlan = Ptr<FftPlan>(new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType));
2553         planStorage.push_back(newPlan);
2554 
2555         return newPlan->plHandle;
2556     }
2557 
~PlanCache()2558     ~PlanCache()
2559     {
2560         planStorage.clear();
2561     }
2562 
2563 protected:
PlanCache()2564     PlanCache() :
2565         planStorage()
2566     {
2567     }
2568 
2569     std::vector<Ptr<FftPlan> > planStorage;
2570 };
2571 
2572 extern "C" {
2573 
oclCleanupCallback(cl_event e,cl_int,void * p)2574 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
2575 {
2576     UMatData * u = (UMatData *)p;
2577 
2578     if( u && CV_XADD(&u->urefcount, -1) == 1 )
2579         u->currAllocator->deallocate(u);
2580     u = 0;
2581 
2582     clReleaseEvent(e), e = 0;
2583 }
2584 
2585 }
2586 
ocl_dft_amdfft(InputArray _src,OutputArray _dst,int flags)2587 static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags)
2588 {
2589     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2590     Size ssize = _src.size();
2591 
2592     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
2593     if ( (!doubleSupport && depth == CV_64F) ||
2594          !(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) ||
2595          _src.offset() != 0)
2596         return false;
2597 
2598     // if is not a multiplication of prime numbers { 2, 3, 5 }
2599     if (ssize.area() != getOptimalDFTSize(ssize.area()))
2600         return false;
2601 
2602     int dst_complex_input = cn == 2 ? 1 : 0;
2603     bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0;
2604     int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0;
2605     bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0;
2606 
2607     CV_Assert(dft_complex_output + dft_real_output < 2);
2608     FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1);
2609 
2610     switch (fftType)
2611     {
2612     case C2C:
2613         _dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2));
2614         break;
2615     case R2C: // TODO implement it if possible
2616     case C2R: // TODO implement it if possible
2617     case R2R: // AMD Fft does not support this type
2618     default:
2619         return false;
2620     }
2621 
2622     UMat src = _src.getUMat(), dst = _dst.getUMat();
2623     bool inplace = src.u == dst.u;
2624 
2625     clfftPlanHandle plHandle = PlanCache::getInstance().
2626             getPlanHandle(ssize, (int)src.step, (int)dst.step,
2627                           depth == CV_64F, inplace, flags, fftType);
2628 
2629     // get the bufferSize
2630     size_t bufferSize = 0;
2631     CLAMDDFT_Assert(clfftGetTmpBufSize(plHandle, &bufferSize))
2632     UMat tmpBuffer(1, (int)bufferSize, CV_8UC1);
2633 
2634     cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ);
2635     cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW);
2636 
2637     cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
2638     cl_event e = 0;
2639 
2640     CLAMDDFT_Assert(clfftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
2641                                           1, &queue, 0, NULL, &e,
2642                                           &srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW)))
2643 
2644     tmpBuffer.addref();
2645     clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u);
2646     return true;
2647 }
2648 
2649 #undef DFT_ASSERT
2650 
2651 }
2652 
2653 #endif // HAVE_CLAMDFFT
2654 
2655 namespace cv
2656 {
2657 
2658 template <typename T>
complementComplex(T * ptr,size_t step,int n,int len,int dft_dims)2659 static void complementComplex(T * ptr, size_t step, int n, int len, int dft_dims)
2660 {
2661     T* p0 = (T*)ptr;
2662     size_t dstep = step/sizeof(p0[0]);
2663     for(int i = 0; i < len; i++ )
2664     {
2665         T* p = p0 + dstep*i;
2666         T* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
2667 
2668         for( int j = 1; j < (n+1)/2; j++ )
2669         {
2670             p[(n-j)*2] = q[j*2];
2671             p[(n-j)*2+1] = -q[j*2+1];
2672         }
2673     }
2674 }
2675 
complementComplexOutput(int depth,uchar * ptr,size_t step,int count,int len,int dft_dims)2676 static void complementComplexOutput(int depth, uchar * ptr, size_t step, int count, int len, int dft_dims)
2677 {
2678     if( depth == CV_32F )
2679         complementComplex((float*)ptr, step, count, len, dft_dims);
2680     else
2681         complementComplex((double*)ptr, step, count, len, dft_dims);
2682 }
2683 
2684 enum DftMode {
2685     InvalidDft = 0,
2686     FwdRealToCCS,
2687     FwdRealToComplex,
2688     FwdComplexToComplex,
2689     InvCCSToReal,
2690     InvComplexToReal,
2691     InvComplexToComplex,
2692 };
2693 
2694 enum DftDims {
2695     InvalidDim = 0,
2696     OneDim,
2697     OneDimColWise,
2698     TwoDims
2699 };
2700 
modeName(DftMode m)2701 inline const char * modeName(DftMode m)
2702 {
2703     switch (m)
2704     {
2705     case InvalidDft: return "InvalidDft";
2706     case FwdRealToCCS: return "FwdRealToCCS";
2707     case FwdRealToComplex: return "FwdRealToComplex";
2708     case FwdComplexToComplex: return "FwdComplexToComplex";
2709     case InvCCSToReal: return "InvCCSToReal";
2710     case InvComplexToReal: return "InvComplexToReal";
2711     case InvComplexToComplex: return "InvComplexToComplex";
2712     }
2713     return 0;
2714 }
2715 
dimsName(DftDims d)2716 inline const char * dimsName(DftDims d)
2717 {
2718     switch (d)
2719     {
2720     case InvalidDim: return "InvalidDim";
2721     case OneDim: return "OneDim";
2722     case OneDimColWise: return "OneDimColWise";
2723     case TwoDims: return "TwoDims";
2724     };
2725     return 0;
2726 }
2727 
2728 template <typename T>
isInv(T mode)2729 inline bool isInv(T mode)
2730 {
2731     switch ((DftMode)mode)
2732     {
2733         case InvCCSToReal:
2734         case InvComplexToReal:
2735         case InvComplexToComplex: return true;
2736         default: return false;
2737     }
2738 }
2739 
determineMode(bool inv,int cn1,int cn2)2740 inline DftMode determineMode(bool inv, int cn1, int cn2)
2741 {
2742     if (!inv)
2743     {
2744         if (cn1 == 1 && cn2 == 1)
2745             return FwdRealToCCS;
2746         else if (cn1 == 1 && cn2 == 2)
2747             return FwdRealToComplex;
2748         else if (cn1 == 2 && cn2 == 2)
2749             return FwdComplexToComplex;
2750     }
2751     else
2752     {
2753         if (cn1 == 1 && cn2 == 1)
2754             return InvCCSToReal;
2755         else if (cn1 == 2 && cn2 == 1)
2756             return InvComplexToReal;
2757         else if (cn1 == 2 && cn2 == 2)
2758             return InvComplexToComplex;
2759     }
2760     return InvalidDft;
2761 }
2762 
2763 
determineDims(int rows,int cols,bool isRowWise,bool isContinuous)2764 inline DftDims determineDims(int rows, int cols, bool isRowWise, bool isContinuous)
2765 {
2766     // printf("%d x %d (%d, %d)\n", rows, cols, isRowWise, isContinuous);
2767     if (isRowWise)
2768         return OneDim;
2769     if (cols == 1 && rows > 1) // one-column-shaped input
2770     {
2771         if (isContinuous)
2772             return OneDim;
2773         else
2774             return OneDimColWise;
2775     }
2776     if (rows == 1)
2777         return OneDim;
2778     if (cols > 1 && rows > 1)
2779         return TwoDims;
2780     return InvalidDim;
2781 }
2782 
2783 class OcvDftImpl CV_FINAL : public hal::DFT2D
2784 {
2785 protected:
2786     Ptr<hal::DFT1D> contextA;
2787     Ptr<hal::DFT1D> contextB;
2788     bool needBufferA;
2789     bool needBufferB;
2790     bool inv;
2791     int width;
2792     int height;
2793     DftMode mode;
2794     int elem_size;
2795     int complex_elem_size;
2796     int depth;
2797     bool real_transform;
2798     int nonzero_rows;
2799     bool isRowTransform;
2800     bool isScaled;
2801     std::vector<int> stages;
2802     bool useIpp;
2803     int src_channels;
2804     int dst_channels;
2805 
2806     AutoBuffer<uchar> tmp_bufA;
2807     AutoBuffer<uchar> tmp_bufB;
2808     AutoBuffer<uchar> buf0;
2809     AutoBuffer<uchar> buf1;
2810 
2811 public:
OcvDftImpl()2812     OcvDftImpl()
2813     {
2814         needBufferA = false;
2815         needBufferB = false;
2816         inv = false;
2817         width = 0;
2818         height = 0;
2819         mode = InvalidDft;
2820         elem_size = 0;
2821         complex_elem_size = 0;
2822         depth = 0;
2823         real_transform = false;
2824         nonzero_rows = 0;
2825         isRowTransform = false;
2826         isScaled = false;
2827         useIpp = false;
2828         src_channels = 0;
2829         dst_channels = 0;
2830     }
2831 
init(int _width,int _height,int _depth,int _src_channels,int _dst_channels,int flags,int _nonzero_rows)2832     void init(int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows)
2833     {
2834         bool isComplex = _src_channels != _dst_channels;
2835         nonzero_rows = _nonzero_rows;
2836         width = _width;
2837         height = _height;
2838         depth = _depth;
2839         src_channels = _src_channels;
2840         dst_channels = _dst_channels;
2841         bool isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
2842         bool isInplace = (flags & CV_HAL_DFT_IS_INPLACE) != 0;
2843         bool isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0;
2844         mode = determineMode(isInverse, _src_channels, _dst_channels);
2845         inv = isInverse;
2846         isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0;
2847         isScaled = (flags & CV_HAL_DFT_SCALE) != 0;
2848         needBufferA = false;
2849         needBufferB = false;
2850         real_transform = (mode != FwdComplexToComplex && mode != InvComplexToComplex);
2851 
2852         elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
2853         complex_elem_size = elem_size * 2;
2854         if( !real_transform )
2855             elem_size = complex_elem_size;
2856 
2857 #if defined USE_IPP_DFT
2858         CV_IPP_CHECK()
2859         {
2860             if (nonzero_rows == 0 && depth == CV_32F && ((width * height)>(int)(1<<6)))
2861             {
2862                 if (mode == FwdComplexToComplex || mode == InvComplexToComplex || mode == FwdRealToCCS || mode == InvCCSToReal)
2863                 {
2864                     useIpp = true;
2865                     return;
2866                 }
2867             }
2868         }
2869 #endif
2870 
2871         DftDims dims = determineDims(height, width, isRowTransform, isContinuous);
2872         if (dims == TwoDims)
2873         {
2874             stages.resize(2);
2875             if (mode == InvCCSToReal || mode == InvComplexToReal)
2876             {
2877                 stages[0] = 1;
2878                 stages[1] = 0;
2879             }
2880             else
2881             {
2882                 stages[0] = 0;
2883                 stages[1] = 1;
2884             }
2885         }
2886         else
2887         {
2888             stages.resize(1);
2889             if (dims == OneDimColWise)
2890                 stages[0] = 1;
2891             else
2892                 stages[0] = 0;
2893         }
2894 
2895         for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex)
2896         {
2897             if (stageIndex == 1)
2898             {
2899                 isInplace = true;
2900                 isComplex = false;
2901             }
2902 
2903             int stage = stages[stageIndex];
2904             bool isLastStage = (stageIndex + 1 == stages.size());
2905 
2906             int len, count;
2907 
2908             int f = 0;
2909             if (inv)
2910                 f |= CV_HAL_DFT_INVERSE;
2911             if (isScaled)
2912                 f |= CV_HAL_DFT_SCALE;
2913             if (isRowTransform)
2914                 f |= CV_HAL_DFT_ROWS;
2915             if (isComplex)
2916                 f |= CV_HAL_DFT_COMPLEX_OUTPUT;
2917             if (real_transform)
2918                 f |= CV_HAL_DFT_REAL_OUTPUT;
2919             if (!isLastStage)
2920                 f |= CV_HAL_DFT_TWO_STAGE;
2921 
2922             if( stage == 0 ) // row-wise transform
2923             {
2924                 if (width == 1 && !isRowTransform )
2925                 {
2926                     len = height;
2927                     count = width;
2928                 }
2929                 else
2930                 {
2931                     len = width;
2932                     count = height;
2933                 }
2934                 needBufferA = isInplace;
2935                 contextA = hal::DFT1D::create(len, count, depth, f, &needBufferA);
2936                 if (needBufferA)
2937                     tmp_bufA.allocate(len * complex_elem_size);
2938             }
2939             else
2940             {
2941                 len = height;
2942                 count = width;
2943                 f |= CV_HAL_DFT_STAGE_COLS;
2944                 needBufferB = isInplace;
2945                 contextB = hal::DFT1D::create(len, count, depth, f, &needBufferB);
2946                 if (needBufferB)
2947                     tmp_bufB.allocate(len * complex_elem_size);
2948 
2949                 buf0.allocate(len * complex_elem_size);
2950                 buf1.allocate(len * complex_elem_size);
2951             }
2952         }
2953     }
2954 
apply(const uchar * src,size_t src_step,uchar * dst,size_t dst_step)2955     void apply(const uchar * src, size_t src_step, uchar * dst, size_t dst_step) CV_OVERRIDE
2956     {
2957 #if defined USE_IPP_DFT
2958         if (useIpp)
2959         {
2960             int ipp_norm_flag = !isScaled ? 8 : inv ? 2 : 1;
2961             if (!isRowTransform)
2962             {
2963                 if (mode == FwdComplexToComplex || mode == InvComplexToComplex)
2964                 {
2965                     if (ippi_DFT_C_32F(src, src_step, dst, dst_step, width, height, inv, ipp_norm_flag))
2966                     {
2967                         CV_IMPL_ADD(CV_IMPL_IPP);
2968                         return;
2969                     }
2970                     setIppErrorStatus();
2971                 }
2972                 else if (mode == FwdRealToCCS || mode == InvCCSToReal)
2973                 {
2974                     if (ippi_DFT_R_32F(src, src_step, dst, dst_step, width, height, inv, ipp_norm_flag))
2975                     {
2976                         CV_IMPL_ADD(CV_IMPL_IPP);
2977                         return;
2978                     }
2979                     setIppErrorStatus();
2980                 }
2981             }
2982             else
2983             {
2984                 if (mode == FwdComplexToComplex || mode == InvComplexToComplex)
2985                 {
2986                     ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R;
2987                     if (Dft_C_IPPLoop(src, src_step, dst, dst_step, width, height, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag))
2988                     {
2989                         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2990                         return;
2991                     }
2992                     setIppErrorStatus();
2993                 }
2994                 else if (mode == FwdRealToCCS || mode == InvCCSToReal)
2995                 {
2996                     ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R;
2997                     if (Dft_R_IPPLoop(src, src_step, dst, dst_step, width, height, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag))
2998                     {
2999                         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
3000                         return;
3001                     }
3002                     setIppErrorStatus();
3003                 }
3004             }
3005             return;
3006         }
3007 #endif
3008 
3009         for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex)
3010         {
3011             int stage_src_channels = src_channels;
3012             int stage_dst_channels = dst_channels;
3013 
3014             if (stageIndex == 1)
3015             {
3016                 src = dst;
3017                 src_step = dst_step;
3018                 stage_src_channels = stage_dst_channels;
3019             }
3020 
3021             int stage = stages[stageIndex];
3022             bool isLastStage = (stageIndex + 1 == stages.size());
3023             bool isComplex = stage_src_channels != stage_dst_channels;
3024 
3025             if( stage == 0 )
3026                 rowDft(src, src_step, dst, dst_step, isComplex, isLastStage);
3027             else
3028                 colDft(src, src_step, dst, dst_step, stage_src_channels, stage_dst_channels, isLastStage);
3029         }
3030     }
3031 
3032 protected:
3033 
rowDft(const uchar * src_data,size_t src_step,uchar * dst_data,size_t dst_step,bool isComplex,bool isLastStage)3034     void rowDft(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, bool isComplex, bool isLastStage)
3035     {
3036         int len, count;
3037         if (width == 1 && !isRowTransform )
3038         {
3039             len = height;
3040             count = width;
3041         }
3042         else
3043         {
3044             len = width;
3045             count = height;
3046         }
3047         int dptr_offset = 0;
3048         int dst_full_len = len*elem_size;
3049 
3050         if( needBufferA )
3051         {
3052             if (mode == FwdRealToCCS && (len & 1) && len > 1)
3053                 dptr_offset = elem_size;
3054         }
3055 
3056         if( !inv && isComplex )
3057             dst_full_len += (len & 1) ? elem_size : complex_elem_size;
3058 
3059         int nz = nonzero_rows;
3060         if( nz <= 0 || nz > count )
3061             nz = count;
3062 
3063         int i;
3064         for( i = 0; i < nz; i++ )
3065         {
3066             const uchar* sptr = src_data + src_step * i;
3067             uchar* dptr0 = dst_data + dst_step * i;
3068             uchar* dptr = dptr0;
3069 
3070             if( needBufferA )
3071                 dptr = tmp_bufA.data();
3072 
3073             contextA->apply(sptr, dptr);
3074 
3075             if( needBufferA )
3076                 memcpy( dptr0, dptr + dptr_offset, dst_full_len );
3077         }
3078 
3079         for( ; i < count; i++ )
3080         {
3081             uchar* dptr0 = dst_data + dst_step * i;
3082             memset( dptr0, 0, dst_full_len );
3083         }
3084         if(isLastStage &&  mode == FwdRealToComplex)
3085             complementComplexOutput(depth, dst_data, dst_step, len, nz, 1);
3086     }
3087 
colDft(const uchar * src_data,size_t src_step,uchar * dst_data,size_t dst_step,int stage_src_channels,int stage_dst_channels,bool isLastStage)3088     void colDft(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage)
3089     {
3090         int len = height;
3091         int count = width;
3092         int a = 0, b = count;
3093         uchar *dbuf0, *dbuf1;
3094         const uchar* sptr0 = src_data;
3095         uchar* dptr0 = dst_data;
3096 
3097         dbuf0 = buf0.data(), dbuf1 = buf1.data();
3098 
3099         if( needBufferB )
3100         {
3101             dbuf1 = tmp_bufB.data();
3102             dbuf0 = buf1.data();
3103         }
3104 
3105         if( real_transform )
3106         {
3107             int even;
3108             a = 1;
3109             even = (count & 1) == 0;
3110             b = (count+1)/2;
3111             if( !inv )
3112             {
3113                 memset( buf0.data(), 0, len*complex_elem_size );
3114                 CopyColumn( sptr0, src_step, buf0.data(), complex_elem_size, len, elem_size );
3115                 sptr0 += stage_dst_channels*elem_size;
3116                 if( even )
3117                 {
3118                     memset( buf1.data(), 0, len*complex_elem_size );
3119                     CopyColumn( sptr0 + (count-2)*elem_size, src_step,
3120                                 buf1.data(), complex_elem_size, len, elem_size );
3121                 }
3122             }
3123             else if( stage_src_channels == 1 )
3124             {
3125                 CopyColumn( sptr0, src_step, buf0.data(), elem_size, len, elem_size );
3126                 ExpandCCS( buf0.data(), len, elem_size );
3127                 if( even )
3128                 {
3129                     CopyColumn( sptr0 + (count-1)*elem_size, src_step,
3130                                 buf1.data(), elem_size, len, elem_size );
3131                     ExpandCCS( buf1.data(), len, elem_size );
3132                 }
3133                 sptr0 += elem_size;
3134             }
3135             else
3136             {
3137                 CopyColumn( sptr0, src_step, buf0.data(), complex_elem_size, len, complex_elem_size );
3138                 if( even )
3139                 {
3140                     CopyColumn( sptr0 + b*complex_elem_size, src_step,
3141                                    buf1.data(), complex_elem_size, len, complex_elem_size );
3142                 }
3143                 sptr0 += complex_elem_size;
3144             }
3145 
3146             if( even )
3147                 contextB->apply(buf1.data(), dbuf1);
3148             contextB->apply(buf0.data(), dbuf0);
3149 
3150             if( stage_dst_channels == 1 )
3151             {
3152                 if( !inv )
3153                 {
3154                     // copy the half of output vector to the first/last column.
3155                     // before doing that, defgragment the vector
3156                     memcpy( dbuf0 + elem_size, dbuf0, elem_size );
3157                     CopyColumn( dbuf0 + elem_size, elem_size, dptr0,
3158                                    dst_step, len, elem_size );
3159                     if( even )
3160                     {
3161                         memcpy( dbuf1 + elem_size, dbuf1, elem_size );
3162                         CopyColumn( dbuf1 + elem_size, elem_size,
3163                                        dptr0 + (count-1)*elem_size,
3164                                        dst_step, len, elem_size );
3165                     }
3166                     dptr0 += elem_size;
3167                 }
3168                 else
3169                 {
3170                     // copy the real part of the complex vector to the first/last column
3171                     CopyColumn( dbuf0, complex_elem_size, dptr0, dst_step, len, elem_size );
3172                     if( even )
3173                         CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size,
3174                                        dst_step, len, elem_size );
3175                     dptr0 += elem_size;
3176                 }
3177             }
3178             else
3179             {
3180                 assert( !inv );
3181                 CopyColumn( dbuf0, complex_elem_size, dptr0,
3182                                dst_step, len, complex_elem_size );
3183                 if( even )
3184                     CopyColumn( dbuf1, complex_elem_size,
3185                                    dptr0 + b*complex_elem_size,
3186                                    dst_step, len, complex_elem_size );
3187                 dptr0 += complex_elem_size;
3188             }
3189         }
3190 
3191         for(int i = a; i < b; i += 2 )
3192         {
3193             if( i+1 < b )
3194             {
3195                 CopyFrom2Columns( sptr0, src_step, buf0.data(), buf1.data(), len, complex_elem_size );
3196                 contextB->apply(buf1.data(), dbuf1);
3197             }
3198             else
3199                 CopyColumn( sptr0, src_step, buf0.data(), complex_elem_size, len, complex_elem_size );
3200 
3201             contextB->apply(buf0.data(), dbuf0);
3202 
3203             if( i+1 < b )
3204                 CopyTo2Columns( dbuf0, dbuf1, dptr0, dst_step, len, complex_elem_size );
3205             else
3206                 CopyColumn( dbuf0, complex_elem_size, dptr0, dst_step, len, complex_elem_size );
3207             sptr0 += 2*complex_elem_size;
3208             dptr0 += 2*complex_elem_size;
3209         }
3210         if(isLastStage && mode == FwdRealToComplex)
3211             complementComplexOutput(depth, dst_data, dst_step, count, len, 2);
3212     }
3213 };
3214 
3215 class OcvDftBasicImpl CV_FINAL : public hal::DFT1D
3216 {
3217 public:
3218     OcvDftOptions opt;
3219     int _factors[34];
3220     AutoBuffer<uchar> wave_buf;
3221     AutoBuffer<int> itab_buf;
3222 #ifdef USE_IPP_DFT
3223     AutoBuffer<uchar> ippbuf;
3224     AutoBuffer<uchar> ippworkbuf;
3225 #endif
3226 
3227 public:
OcvDftBasicImpl()3228     OcvDftBasicImpl()
3229     {
3230         opt.factors = _factors;
3231     }
init(int len,int count,int depth,int flags,bool * needBuffer)3232     void init(int len, int count, int depth, int flags, bool *needBuffer)
3233     {
3234         int prev_len = opt.n;
3235 
3236         int stage = (flags & CV_HAL_DFT_STAGE_COLS) != 0 ? 1 : 0;
3237         int complex_elem_size = depth == CV_32F ? sizeof(Complex<float>) : sizeof(Complex<double>);
3238         opt.isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
3239         bool real_transform = (flags & CV_HAL_DFT_REAL_OUTPUT) != 0;
3240         opt.isComplex = (stage == 0) && (flags & CV_HAL_DFT_COMPLEX_OUTPUT) != 0;
3241         bool needAnotherStage = (flags & CV_HAL_DFT_TWO_STAGE) != 0;
3242 
3243         opt.scale = 1;
3244         opt.tab_size = len;
3245         opt.n = len;
3246 
3247         opt.useIpp = false;
3248     #ifdef USE_IPP_DFT
3249         opt.ipp_spec = 0;
3250         opt.ipp_work = 0;
3251 
3252         if( CV_IPP_CHECK_COND && (opt.n*count >= 64) ) // use IPP DFT if available
3253         {
3254             int ipp_norm_flag = (flags & CV_HAL_DFT_SCALE) == 0 ? 8 : opt.isInverse ? 2 : 1;
3255             int specsize=0, initsize=0, worksize=0;
3256             IppDFTGetSizeFunc getSizeFunc = 0;
3257             IppDFTInitFunc initFunc = 0;
3258 
3259             if( real_transform && stage == 0 )
3260             {
3261                 if( depth == CV_32F )
3262                 {
3263                     getSizeFunc = ippsDFTGetSize_R_32f;
3264                     initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
3265                 }
3266                 else
3267                 {
3268                     getSizeFunc = ippsDFTGetSize_R_64f;
3269                     initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
3270                 }
3271             }
3272             else
3273             {
3274                 if( depth == CV_32F )
3275                 {
3276                     getSizeFunc = ippsDFTGetSize_C_32fc;
3277                     initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
3278                 }
3279                 else
3280                 {
3281                     getSizeFunc = ippsDFTGetSize_C_64fc;
3282                     initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
3283                 }
3284             }
3285             if( getSizeFunc(opt.n, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
3286             {
3287                 ippbuf.allocate(specsize + initsize + 64);
3288                 opt.ipp_spec = alignPtr(&ippbuf[0], 32);
3289                 ippworkbuf.allocate(worksize + 32);
3290                 opt.ipp_work = alignPtr(&ippworkbuf[0], 32);
3291                 uchar* initbuf = alignPtr((uchar*)opt.ipp_spec + specsize, 32);
3292                 if( initFunc(opt.n, ipp_norm_flag, ippAlgHintNone, opt.ipp_spec, initbuf) >= 0 )
3293                     opt.useIpp = true;
3294             }
3295             else
3296                 setIppErrorStatus();
3297         }
3298     #endif
3299 
3300         if (!opt.useIpp)
3301         {
3302             if (len != prev_len)
3303             {
3304                 opt.nf = DFTFactorize( opt.n, opt.factors );
3305             }
3306             bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1];
3307             if (len != prev_len || (!inplace_transform && opt.isInverse && real_transform))
3308             {
3309                 wave_buf.allocate(opt.n*complex_elem_size);
3310                 opt.wave = wave_buf.data();
3311                 itab_buf.allocate(opt.n);
3312                 opt.itab = itab_buf.data();
3313                 DFTInit( opt.n, opt.nf, opt.factors, opt.itab, complex_elem_size,
3314                          opt.wave, stage == 0 && opt.isInverse && real_transform );
3315             }
3316             // otherwise reuse the tables calculated on the previous stage
3317             if (needBuffer)
3318             {
3319                 if( (stage == 0 && ((*needBuffer && !inplace_transform) || (real_transform && (len & 1)))) ||
3320                     (stage == 1 && !inplace_transform) )
3321                 {
3322                     *needBuffer = true;
3323                 }
3324             }
3325         }
3326         else
3327         {
3328             if (needBuffer)
3329             {
3330                 *needBuffer = false;
3331             }
3332         }
3333 
3334         {
3335             static DFTFunc dft_tbl[6] =
3336             {
3337                 (DFTFunc)DFT_32f,
3338                 (DFTFunc)RealDFT_32f,
3339                 (DFTFunc)CCSIDFT_32f,
3340                 (DFTFunc)DFT_64f,
3341                 (DFTFunc)RealDFT_64f,
3342                 (DFTFunc)CCSIDFT_64f
3343             };
3344             int idx = 0;
3345             if (stage == 0)
3346             {
3347                 if (real_transform)
3348                 {
3349                     if (!opt.isInverse)
3350                         idx = 1;
3351                     else
3352                         idx = 2;
3353                 }
3354             }
3355             if (depth == CV_64F)
3356                 idx += 3;
3357 
3358             opt.dft_func = dft_tbl[idx];
3359         }
3360 
3361         if(!needAnotherStage && (flags & CV_HAL_DFT_SCALE) != 0)
3362         {
3363             int rowCount = count;
3364             if (stage == 0 && (flags & CV_HAL_DFT_ROWS) != 0)
3365                 rowCount = 1;
3366             opt.scale = 1./(len * rowCount);
3367         }
3368     }
3369 
apply(const uchar * src,uchar * dst)3370     void apply(const uchar *src, uchar *dst) CV_OVERRIDE
3371     {
3372         opt.dft_func(opt, src, dst);
3373     }
3374 
free()3375     void free() {}
3376 };
3377 
3378 struct ReplacementDFT1D : public hal::DFT1D
3379 {
3380     cvhalDFT *context;
3381     bool isInitialized;
3382 
ReplacementDFT1Dcv::ReplacementDFT1D3383     ReplacementDFT1D() : context(0), isInitialized(false) {}
initcv::ReplacementDFT1D3384     bool init(int len, int count, int depth, int flags, bool *needBuffer)
3385     {
3386         int res = cv_hal_dftInit1D(&context, len, count, depth, flags, needBuffer);
3387         isInitialized = (res == CV_HAL_ERROR_OK);
3388         return isInitialized;
3389     }
applycv::ReplacementDFT1D3390     void apply(const uchar *src, uchar *dst) CV_OVERRIDE
3391     {
3392         if (isInitialized)
3393         {
3394             CALL_HAL(dft1D, cv_hal_dft1D, context, src, dst);
3395         }
3396     }
~ReplacementDFT1Dcv::ReplacementDFT1D3397     ~ReplacementDFT1D()
3398     {
3399         if (isInitialized)
3400         {
3401             CALL_HAL(dftFree1D, cv_hal_dftFree1D, context);
3402         }
3403     }
3404 };
3405 
3406 struct ReplacementDFT2D : public hal::DFT2D
3407 {
3408     cvhalDFT *context;
3409     bool isInitialized;
3410 
ReplacementDFT2Dcv::ReplacementDFT2D3411     ReplacementDFT2D() : context(0), isInitialized(false) {}
initcv::ReplacementDFT2D3412     bool init(int width, int height, int depth,
3413               int src_channels, int dst_channels,
3414               int flags, int nonzero_rows)
3415     {
3416         int res = cv_hal_dftInit2D(&context, width, height, depth, src_channels, dst_channels, flags, nonzero_rows);
3417         isInitialized = (res == CV_HAL_ERROR_OK);
3418         return isInitialized;
3419     }
applycv::ReplacementDFT2D3420     void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step) CV_OVERRIDE
3421     {
3422         if (isInitialized)
3423         {
3424             CALL_HAL(dft2D, cv_hal_dft2D, context, src, src_step, dst, dst_step);
3425         }
3426     }
~ReplacementDFT2Dcv::ReplacementDFT2D3427     ~ReplacementDFT2D()
3428     {
3429         if (isInitialized)
3430         {
3431             CALL_HAL(dftFree2D, cv_hal_dftFree1D, context);
3432         }
3433     }
3434 };
3435 
3436 namespace hal {
3437 
3438 //================== 1D ======================
3439 
create(int len,int count,int depth,int flags,bool * needBuffer)3440 Ptr<DFT1D> DFT1D::create(int len, int count, int depth, int flags, bool *needBuffer)
3441 {
3442     {
3443         ReplacementDFT1D *impl = new ReplacementDFT1D();
3444         if (impl->init(len, count, depth, flags, needBuffer))
3445         {
3446             return Ptr<DFT1D>(impl);
3447         }
3448         delete impl;
3449     }
3450     {
3451         OcvDftBasicImpl *impl = new OcvDftBasicImpl();
3452         impl->init(len, count, depth, flags, needBuffer);
3453         return Ptr<DFT1D>(impl);
3454     }
3455 }
3456 
3457 //================== 2D ======================
3458 
create(int width,int height,int depth,int src_channels,int dst_channels,int flags,int nonzero_rows)3459 Ptr<DFT2D> DFT2D::create(int width, int height, int depth,
3460                          int src_channels, int dst_channels,
3461                          int flags, int nonzero_rows)
3462 {
3463     {
3464         ReplacementDFT2D *impl = new ReplacementDFT2D();
3465         if (impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows))
3466         {
3467             return Ptr<DFT2D>(impl);
3468         }
3469         delete impl;
3470     }
3471     {
3472         if(width == 1 && nonzero_rows > 0 )
3473         {
3474             CV_Error( CV_StsNotImplemented,
3475             "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
3476             "For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
3477         }
3478         OcvDftImpl *impl = new OcvDftImpl();
3479         impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows);
3480         return Ptr<DFT2D>(impl);
3481     }
3482 }
3483 
3484 } // cv::hal::
3485 } // cv::
3486 
3487 
dft(InputArray _src0,OutputArray _dst,int flags,int nonzero_rows)3488 void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
3489 {
3490     CV_INSTRUMENT_REGION();
3491 
3492 #ifdef HAVE_CLAMDFFT
3493     CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
3494             _dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0,
3495                ocl_dft_amdfft(_src0, _dst, flags))
3496 #endif
3497 
3498 #ifdef HAVE_OPENCL
3499     CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2,
3500                ocl_dft(_src0, _dst, flags, nonzero_rows))
3501 #endif
3502 
3503     Mat src0 = _src0.getMat(), src = src0;
3504     bool inv = (flags & DFT_INVERSE) != 0;
3505     int type = src.type();
3506     int depth = src.depth();
3507 
3508     CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
3509 
3510     // Fail if DFT_COMPLEX_INPUT is specified, but src is not 2 channels.
3511     CV_Assert( !((flags & DFT_COMPLEX_INPUT) && src.channels() != 2) );
3512 
3513     if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
3514         _dst.create( src.size(), CV_MAKETYPE(depth, 2) );
3515     else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
3516         _dst.create( src.size(), depth );
3517     else
3518         _dst.create( src.size(), type );
3519 
3520     Mat dst = _dst.getMat();
3521 
3522     int f = 0;
3523     if (src.isContinuous() && dst.isContinuous())
3524         f |= CV_HAL_DFT_IS_CONTINUOUS;
3525     if (inv)
3526         f |= CV_HAL_DFT_INVERSE;
3527     if (flags & DFT_ROWS)
3528         f |= CV_HAL_DFT_ROWS;
3529     if (flags & DFT_SCALE)
3530         f |= CV_HAL_DFT_SCALE;
3531     if (src.data == dst.data)
3532         f |= CV_HAL_DFT_IS_INPLACE;
3533     Ptr<hal::DFT2D> c = hal::DFT2D::create(src.cols, src.rows, depth, src.channels(), dst.channels(), f, nonzero_rows);
3534     c->apply(src.data, src.step, dst.data, dst.step);
3535 }
3536 
3537 
idft(InputArray src,OutputArray dst,int flags,int nonzero_rows)3538 void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows )
3539 {
3540     CV_INSTRUMENT_REGION();
3541 
3542     dft( src, dst, flags | DFT_INVERSE, nonzero_rows );
3543 }
3544 
3545 #ifdef HAVE_OPENCL
3546 
3547 namespace cv {
3548 
ocl_mulSpectrums(InputArray _srcA,InputArray _srcB,OutputArray _dst,int flags,bool conjB)3549 static bool ocl_mulSpectrums( InputArray _srcA, InputArray _srcB,
3550                               OutputArray _dst, int flags, bool conjB )
3551 {
3552     int atype = _srcA.type(), btype = _srcB.type(),
3553             rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
3554     Size asize = _srcA.size(), bsize = _srcB.size();
3555     CV_Assert(asize == bsize);
3556 
3557     if ( !(atype == CV_32FC2 && btype == CV_32FC2) || flags != 0 )
3558         return false;
3559 
3560     UMat A = _srcA.getUMat(), B = _srcB.getUMat();
3561     CV_Assert(A.size() == B.size());
3562 
3563     _dst.create(A.size(), atype);
3564     UMat dst = _dst.getUMat();
3565 
3566     ocl::Kernel k("mulAndScaleSpectrums",
3567                   ocl::core::mulspectrums_oclsrc,
3568                   format("%s", conjB ? "-D CONJ" : ""));
3569     if (k.empty())
3570         return false;
3571 
3572     k.args(ocl::KernelArg::ReadOnlyNoSize(A), ocl::KernelArg::ReadOnlyNoSize(B),
3573            ocl::KernelArg::WriteOnly(dst), rowsPerWI);
3574 
3575     size_t globalsize[2] = { (size_t)asize.width, ((size_t)asize.height + rowsPerWI - 1) / rowsPerWI };
3576     return k.run(2, globalsize, NULL, false);
3577 }
3578 
3579 }
3580 
3581 #endif
3582 
3583 namespace {
3584 
3585 #define VAL(buf, elem) (((T*)((char*)data ## buf + (step ## buf * (elem))))[0])
3586 #define MUL_SPECTRUMS_COL(A, B, C) \
3587     VAL(C, 0) = VAL(A, 0) * VAL(B, 0); \
3588     for (size_t j = 1; j <= rows - 2; j += 2) \
3589     { \
3590         double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
3591         double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
3592         if (conjB) b_im = -b_im; \
3593         double c_re = a_re * b_re - a_im * b_im; \
3594         double c_im = a_re * b_im + a_im * b_re; \
3595         VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
3596     } \
3597     if ((rows & 1) == 0) \
3598         VAL(C, rows-1) = VAL(A, rows-1) * VAL(B, rows-1)
3599 
3600 template <typename T, bool conjB> static inline
mulSpectrums_processCol_noinplace(const T * dataA,const T * dataB,T * dataC,size_t stepA,size_t stepB,size_t stepC,size_t rows)3601 void mulSpectrums_processCol_noinplace(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
3602 {
3603     MUL_SPECTRUMS_COL(A, B, C);
3604 }
3605 
3606 template <typename T, bool conjB> static inline
mulSpectrums_processCol_inplaceA(const T * dataB,T * dataAC,size_t stepB,size_t stepAC,size_t rows)3607 void mulSpectrums_processCol_inplaceA(const T* dataB, T* dataAC, size_t stepB, size_t stepAC, size_t rows)
3608 {
3609     MUL_SPECTRUMS_COL(AC, B, AC);
3610 }
3611 template <typename T, bool conjB, bool inplaceA> static inline
mulSpectrums_processCol(const T * dataA,const T * dataB,T * dataC,size_t stepA,size_t stepB,size_t stepC,size_t rows)3612 void mulSpectrums_processCol(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
3613 {
3614     if (inplaceA)
3615         mulSpectrums_processCol_inplaceA<T, conjB>(dataB, dataC, stepB, stepC, rows);
3616     else
3617         mulSpectrums_processCol_noinplace<T, conjB>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
3618 }
3619 #undef MUL_SPECTRUMS_COL
3620 #undef VAL
3621 
3622 template <typename T, bool conjB, bool inplaceA> static inline
mulSpectrums_processCols(const T * dataA,const T * dataB,T * dataC,size_t stepA,size_t stepB,size_t stepC,size_t rows,size_t cols)3623 void mulSpectrums_processCols(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols)
3624 {
3625     mulSpectrums_processCol<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
3626     if ((cols & 1) == 0)
3627     {
3628         mulSpectrums_processCol<T, conjB, inplaceA>(dataA + cols - 1, dataB + cols - 1, dataC + cols - 1, stepA, stepB, stepC, rows);
3629     }
3630 }
3631 
3632 #define VAL(buf, elem) (data ## buf[(elem)])
3633 #define MUL_SPECTRUMS_ROW(A, B, C) \
3634     for (size_t j = j0; j < j1; j += 2) \
3635     { \
3636         double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
3637         double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
3638         if (conjB) b_im = -b_im; \
3639         double c_re = a_re * b_re - a_im * b_im; \
3640         double c_im = a_re * b_im + a_im * b_re; \
3641         VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
3642     }
3643 template <typename T, bool conjB> static inline
mulSpectrums_processRow_noinplace(const T * dataA,const T * dataB,T * dataC,size_t j0,size_t j1)3644 void mulSpectrums_processRow_noinplace(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
3645 {
3646     MUL_SPECTRUMS_ROW(A, B, C);
3647 }
3648 template <typename T, bool conjB> static inline
mulSpectrums_processRow_inplaceA(const T * dataB,T * dataAC,size_t j0,size_t j1)3649 void mulSpectrums_processRow_inplaceA(const T* dataB, T* dataAC, size_t j0, size_t j1)
3650 {
3651     MUL_SPECTRUMS_ROW(AC, B, AC);
3652 }
3653 template <typename T, bool conjB, bool inplaceA> static inline
mulSpectrums_processRow(const T * dataA,const T * dataB,T * dataC,size_t j0,size_t j1)3654 void mulSpectrums_processRow(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
3655 {
3656     if (inplaceA)
3657         mulSpectrums_processRow_inplaceA<T, conjB>(dataB, dataC, j0, j1);
3658     else
3659         mulSpectrums_processRow_noinplace<T, conjB>(dataA, dataB, dataC, j0, j1);
3660 }
3661 #undef MUL_SPECTRUMS_ROW
3662 #undef VAL
3663 
3664 template <typename T, bool conjB, bool inplaceA> static inline
mulSpectrums_processRows(const T * dataA,const T * dataB,T * dataC,size_t stepA,size_t stepB,size_t stepC,size_t rows,size_t cols,size_t j0,size_t j1,bool is_1d_CN1)3665 void mulSpectrums_processRows(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d_CN1)
3666 {
3667     while (rows-- > 0)
3668     {
3669         if (is_1d_CN1)
3670             dataC[0] = dataA[0]*dataB[0];
3671         mulSpectrums_processRow<T, conjB, inplaceA>(dataA, dataB, dataC, j0, j1);
3672         if (is_1d_CN1 && (cols & 1) == 0)
3673             dataC[j1] = dataA[j1]*dataB[j1];
3674 
3675         dataA = (const T*)(((char*)dataA) + stepA);
3676         dataB = (const T*)(((char*)dataB) + stepB);
3677         dataC =       (T*)(((char*)dataC) + stepC);
3678     }
3679 }
3680 
3681 
3682 template <typename T, bool conjB, bool inplaceA> static inline
mulSpectrums_Impl_(const T * dataA,const T * dataB,T * dataC,size_t stepA,size_t stepB,size_t stepC,size_t rows,size_t cols,size_t j0,size_t j1,bool is_1d,bool isCN1)3683 void mulSpectrums_Impl_(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
3684 {
3685     if (!is_1d && isCN1)
3686     {
3687         mulSpectrums_processCols<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols);
3688     }
3689     mulSpectrums_processRows<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d && isCN1);
3690 }
3691 template <typename T, bool conjB> static inline
mulSpectrums_Impl(const T * dataA,const T * dataB,T * dataC,size_t stepA,size_t stepB,size_t stepC,size_t rows,size_t cols,size_t j0,size_t j1,bool is_1d,bool isCN1)3692 void mulSpectrums_Impl(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
3693 {
3694     if (dataA == dataC)
3695         mulSpectrums_Impl_<T, conjB, true>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
3696     else
3697         mulSpectrums_Impl_<T, conjB, false>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
3698 }
3699 
3700 } // namespace
3701 
mulSpectrums(InputArray _srcA,InputArray _srcB,OutputArray _dst,int flags,bool conjB)3702 void cv::mulSpectrums( InputArray _srcA, InputArray _srcB,
3703                        OutputArray _dst, int flags, bool conjB )
3704 {
3705     CV_INSTRUMENT_REGION();
3706 
3707     CV_OCL_RUN(_dst.isUMat() && _srcA.dims() <= 2 && _srcB.dims() <= 2,
3708             ocl_mulSpectrums(_srcA, _srcB, _dst, flags, conjB))
3709 
3710     Mat srcA = _srcA.getMat(), srcB = _srcB.getMat();
3711     int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type();
3712     size_t rows = srcA.rows, cols = srcA.cols;
3713 
3714     CV_Assert( type == srcB.type() && srcA.size() == srcB.size() );
3715     CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
3716 
3717     _dst.create( srcA.rows, srcA.cols, type );
3718     Mat dst = _dst.getMat();
3719 
3720     // correct inplace support
3721     // Case 'dst.data == srcA.data' is handled by implementation,
3722     // because it is used frequently (filter2D, matchTemplate)
3723     if (dst.data == srcB.data)
3724         srcB = srcB.clone(); // workaround for B only
3725 
3726     bool is_1d = (flags & DFT_ROWS)
3727         || (rows == 1)
3728         || (cols == 1 && srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous());
3729 
3730     if( is_1d && !(flags & DFT_ROWS) )
3731         cols = cols + rows - 1, rows = 1;
3732 
3733     bool isCN1 = cn == 1;
3734     size_t j0 = isCN1 ? 1 : 0;
3735     size_t j1 = cols*cn - (((cols & 1) == 0 && cn == 1) ? 1 : 0);
3736 
3737     if (depth == CV_32F)
3738     {
3739         const float* dataA = srcA.ptr<float>();
3740         const float* dataB = srcB.ptr<float>();
3741         float* dataC = dst.ptr<float>();
3742         if (!conjB)
3743             mulSpectrums_Impl<float, false>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
3744         else
3745             mulSpectrums_Impl<float, true>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
3746     }
3747     else
3748     {
3749         const double* dataA = srcA.ptr<double>();
3750         const double* dataB = srcB.ptr<double>();
3751         double* dataC = dst.ptr<double>();
3752         if (!conjB)
3753             mulSpectrums_Impl<double, false>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
3754         else
3755             mulSpectrums_Impl<double, true>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
3756     }
3757 }
3758 
3759 
3760 /****************************************************************************************\
3761                                Discrete Cosine Transform
3762 \****************************************************************************************/
3763 
3764 namespace cv
3765 {
3766 
3767 /* DCT is calculated using DFT, as described here:
3768    http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/:
3769 */
3770 template<typename T> static void
DCT(const OcvDftOptions & c,const T * src,size_t src_step,T * dft_src,T * dft_dst,T * dst,size_t dst_step,const Complex<T> * dct_wave)3771 DCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step,
3772      const Complex<T>* dct_wave )
3773 {
3774     static const T sin_45 = (T)0.70710678118654752440084436210485;
3775 
3776     int n = c.n;
3777     int j, n2 = n >> 1;
3778 
3779     src_step /= sizeof(src[0]);
3780     dst_step /= sizeof(dst[0]);
3781     T* dst1 = dst + (n-1)*dst_step;
3782 
3783     if( n == 1 )
3784     {
3785         dst[0] = src[0];
3786         return;
3787     }
3788 
3789     for( j = 0; j < n2; j++, src += src_step*2 )
3790     {
3791         dft_src[j] = src[0];
3792         dft_src[n-j-1] = src[src_step];
3793     }
3794 
3795     RealDFT(c, dft_src, dft_dst);
3796     src = dft_dst;
3797 
3798     dst[0] = (T)(src[0]*dct_wave->re*sin_45);
3799     dst += dst_step;
3800     for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
3801                                     dst += dst_step, dst1 -= dst_step )
3802     {
3803         T t0 = dct_wave->re*src[j*2-1] - dct_wave->im*src[j*2];
3804         T t1 = -dct_wave->im*src[j*2-1] - dct_wave->re*src[j*2];
3805         dst[0] = t0;
3806         dst1[0] = t1;
3807     }
3808 
3809     dst[0] = src[n-1]*dct_wave->re;
3810 }
3811 
3812 
3813 template<typename T> static void
IDCT(const OcvDftOptions & c,const T * src,size_t src_step,T * dft_src,T * dft_dst,T * dst,size_t dst_step,const Complex<T> * dct_wave)3814 IDCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step,
3815       const Complex<T>* dct_wave)
3816 {
3817     static const T sin_45 = (T)0.70710678118654752440084436210485;
3818     int n = c.n;
3819     int j, n2 = n >> 1;
3820 
3821     src_step /= sizeof(src[0]);
3822     dst_step /= sizeof(dst[0]);
3823     const T* src1 = src + (n-1)*src_step;
3824 
3825     if( n == 1 )
3826     {
3827         dst[0] = src[0];
3828         return;
3829     }
3830 
3831     dft_src[0] = (T)(src[0]*2*dct_wave->re*sin_45);
3832     src += src_step;
3833     for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
3834                                     src += src_step, src1 -= src_step )
3835     {
3836         T t0 = dct_wave->re*src[0] - dct_wave->im*src1[0];
3837         T t1 = -dct_wave->im*src[0] - dct_wave->re*src1[0];
3838         dft_src[j*2-1] = t0;
3839         dft_src[j*2] = t1;
3840     }
3841 
3842     dft_src[n-1] = (T)(src[0]*2*dct_wave->re);
3843     CCSIDFT(c, dft_src, dft_dst);
3844 
3845     for( j = 0; j < n2; j++, dst += dst_step*2 )
3846     {
3847         dst[0] = dft_dst[j];
3848         dst[dst_step] = dft_dst[n-j-1];
3849     }
3850 }
3851 
3852 
3853 static void
DCTInit(int n,int elem_size,void * _wave,int inv)3854 DCTInit( int n, int elem_size, void* _wave, int inv )
3855 {
3856     static const double DctScale[] =
3857     {
3858     0.707106781186547570, 0.500000000000000000, 0.353553390593273790,
3859     0.250000000000000000, 0.176776695296636890, 0.125000000000000000,
3860     0.088388347648318447, 0.062500000000000000, 0.044194173824159223,
3861     0.031250000000000000, 0.022097086912079612, 0.015625000000000000,
3862     0.011048543456039806, 0.007812500000000000, 0.005524271728019903,
3863     0.003906250000000000, 0.002762135864009952, 0.001953125000000000,
3864     0.001381067932004976, 0.000976562500000000, 0.000690533966002488,
3865     0.000488281250000000, 0.000345266983001244, 0.000244140625000000,
3866     0.000172633491500622, 0.000122070312500000, 0.000086316745750311,
3867     0.000061035156250000, 0.000043158372875155, 0.000030517578125000
3868     };
3869 
3870     int i;
3871     Complex<double> w, w1;
3872     double t, scale;
3873 
3874     if( n == 1 )
3875         return;
3876 
3877     assert( (n&1) == 0 );
3878 
3879     if( (n & (n - 1)) == 0 )
3880     {
3881         int m;
3882         for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
3883             ;
3884         scale = (!inv ? 2 : 1)*DctScale[m];
3885         w1.re = DFTTab[m+2][0];
3886         w1.im = -DFTTab[m+2][1];
3887     }
3888     else
3889     {
3890         t = 1./(2*n);
3891         scale = (!inv ? 2 : 1)*std::sqrt(t);
3892         w1.im = sin(-CV_PI*t);
3893         w1.re = std::sqrt(1. - w1.im*w1.im);
3894     }
3895     n >>= 1;
3896 
3897     if( elem_size == sizeof(Complex<double>) )
3898     {
3899         Complex<double>* wave = (Complex<double>*)_wave;
3900 
3901         w.re = scale;
3902         w.im = 0.;
3903 
3904         for( i = 0; i <= n; i++ )
3905         {
3906             wave[i] = w;
3907             t = w.re*w1.re - w.im*w1.im;
3908             w.im = w.re*w1.im + w.im*w1.re;
3909             w.re = t;
3910         }
3911     }
3912     else
3913     {
3914         Complex<float>* wave = (Complex<float>*)_wave;
3915         assert( elem_size == sizeof(Complex<float>) );
3916 
3917         w.re = (float)scale;
3918         w.im = 0.f;
3919 
3920         for( i = 0; i <= n; i++ )
3921         {
3922             wave[i].re = (float)w.re;
3923             wave[i].im = (float)w.im;
3924             t = w.re*w1.re - w.im*w1.im;
3925             w.im = w.re*w1.im + w.im*w1.re;
3926             w.re = t;
3927         }
3928     }
3929 }
3930 
3931 
3932 typedef void (*DCTFunc)(const OcvDftOptions & c, const void* src, size_t src_step, void* dft_src,
3933                         void* dft_dst, void* dst, size_t dst_step, const void* dct_wave);
3934 
DCT_32f(const OcvDftOptions & c,const float * src,size_t src_step,float * dft_src,float * dft_dst,float * dst,size_t dst_step,const Complexf * dct_wave)3935 static void DCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst,
3936                     float* dst, size_t dst_step, const Complexf* dct_wave)
3937 {
3938     DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3939 }
3940 
IDCT_32f(const OcvDftOptions & c,const float * src,size_t src_step,float * dft_src,float * dft_dst,float * dst,size_t dst_step,const Complexf * dct_wave)3941 static void IDCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst,
3942                     float* dst, size_t dst_step, const Complexf* dct_wave)
3943 {
3944     IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3945 }
3946 
DCT_64f(const OcvDftOptions & c,const double * src,size_t src_step,double * dft_src,double * dft_dst,double * dst,size_t dst_step,const Complexd * dct_wave)3947 static void DCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst,
3948                     double* dst, size_t dst_step, const Complexd* dct_wave)
3949 {
3950     DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3951 }
3952 
IDCT_64f(const OcvDftOptions & c,const double * src,size_t src_step,double * dft_src,double * dft_dst,double * dst,size_t dst_step,const Complexd * dct_wave)3953 static void IDCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst,
3954                      double* dst, size_t dst_step, const Complexd* dct_wave)
3955 {
3956     IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3957 }
3958 
3959 }
3960 
3961 #ifdef HAVE_IPP
3962 namespace cv
3963 {
3964 
3965 #if IPP_VERSION_X100 >= 900
3966 typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer);
3967 typedef IppStatus (CV_STDCALL * ippiDCTInit)(void* pDCTSpec, IppiSize roiSize, Ipp8u* pMemInit );
3968 typedef IppStatus (CV_STDCALL * ippiDCTGetSize)(IppiSize roiSize, int* pSizeSpec, int* pSizeInit, int* pSizeBuf);
3969 #elif IPP_VERSION_X100 >= 700
3970 typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f*, int, Ipp32f*, int, const void*, Ipp8u*);
3971 typedef IppStatus (CV_STDCALL * ippiDCTInitAlloc)(void**, IppiSize, IppHintAlgorithm);
3972 typedef IppStatus (CV_STDCALL * ippiDCTFree)(void* pDCTSpec);
3973 typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*);
3974 #endif
3975 
3976 class DctIPPLoop_Invoker : public ParallelLoopBody
3977 {
3978 public:
DctIPPLoop_Invoker(const uchar * _src,size_t _src_step,uchar * _dst,size_t _dst_step,int _width,bool _inv,bool * _ok)3979     DctIPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width, bool _inv, bool *_ok) :
3980         ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), inv(_inv), ok(_ok)
3981     {
3982         *ok = true;
3983     }
3984 
operator ()(const Range & range) const3985     virtual void operator()(const Range& range) const CV_OVERRIDE
3986     {
3987         if(*ok == false)
3988             return;
3989 
3990 #if IPP_VERSION_X100 >= 900
3991         IppiSize srcRoiSize = {width, 1};
3992 
3993         int specSize    = 0;
3994         int initSize    = 0;
3995         int bufferSize  = 0;
3996 
3997         Ipp8u* pDCTSpec = NULL;
3998         Ipp8u* pBuffer  = NULL;
3999         Ipp8u* pInitBuf = NULL;
4000 
4001         #define IPP_RETURN              \
4002             if(pDCTSpec)                \
4003                 ippFree(pDCTSpec);      \
4004             if(pBuffer)                 \
4005                 ippFree(pBuffer);       \
4006             if(pInitBuf)                \
4007                 ippFree(pInitBuf);      \
4008             return;
4009 
4010         ippiDCTFunc     ippiDCT_32f_C1R   = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R         : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4011         ippiDCTInit     ippDctInit     = inv ? (ippiDCTInit)ippiDCTInvInit_32f         : (ippiDCTInit)ippiDCTFwdInit_32f;
4012         ippiDCTGetSize  ippDctGetSize  = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f   : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
4013 
4014         if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < 0)
4015         {
4016             *ok = false;
4017             return;
4018         }
4019 
4020         pDCTSpec = (Ipp8u*)CV_IPP_MALLOC(specSize);
4021         if(!pDCTSpec && specSize)
4022         {
4023             *ok = false;
4024             return;
4025         }
4026 
4027         pBuffer  = (Ipp8u*)CV_IPP_MALLOC(bufferSize);
4028         if(!pBuffer && bufferSize)
4029         {
4030             *ok = false;
4031             IPP_RETURN
4032         }
4033         pInitBuf = (Ipp8u*)CV_IPP_MALLOC(initSize);
4034         if(!pInitBuf && initSize)
4035         {
4036             *ok = false;
4037             IPP_RETURN
4038         }
4039 
4040         if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < 0)
4041         {
4042             *ok = false;
4043             IPP_RETURN
4044         }
4045 
4046         for(int i = range.start; i < range.end; ++i)
4047         {
4048             if(CV_INSTRUMENT_FUN_IPP(ippiDCT_32f_C1R, (float*)(src + src_step * i), static_cast<int>(src_step), (float*)(dst + dst_step * i), static_cast<int>(dst_step), pDCTSpec, pBuffer) < 0)
4049             {
4050                 *ok = false;
4051                 IPP_RETURN
4052             }
4053         }
4054         IPP_RETURN
4055 #undef IPP_RETURN
4056 #elif IPP_VERSION_X100 >= 700
4057         void* pDCTSpec;
4058         AutoBuffer<uchar> buf;
4059         uchar* pBuffer = 0;
4060         int bufSize=0;
4061 
4062         IppiSize srcRoiSize = {width, 1};
4063 
4064         CV_SUPPRESS_DEPRECATED_START
4065 
4066         ippiDCTFunc ippDctFun           = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R             : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4067         ippiDCTInitAlloc ippInitAlloc   = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f   : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
4068         ippiDCTFree ippFree             = inv ? (ippiDCTFree)ippiDCTInvFree_32f             : (ippiDCTFree)ippiDCTFwdFree_32f;
4069         ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
4070 
4071         if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
4072         {
4073             buf.allocate( bufSize );
4074             pBuffer = (uchar*)buf;
4075 
4076             for( int i = range.start; i < range.end; ++i)
4077             {
4078                 if(ippDctFun((float*)(src + src_step * i), static_cast<int>(src_step), (float*)(dst + dst_step * i), static_cast<int>(dst_step), pDCTSpec, (Ipp8u*)pBuffer) < 0)
4079                 {
4080                     *ok = false;
4081                     break;
4082                 }
4083             }
4084         }
4085         else
4086             *ok = false;
4087 
4088         if (pDCTSpec)
4089             ippFree(pDCTSpec);
4090 
4091         CV_SUPPRESS_DEPRECATED_END
4092 #else
4093         CV_UNUSED(range);
4094         *ok = false;
4095 #endif
4096     }
4097 
4098 private:
4099     const uchar * src;
4100     size_t src_step;
4101     uchar * dst;
4102     size_t dst_step;
4103     int width;
4104     bool inv;
4105     bool *ok;
4106 };
4107 
DctIPPLoop(const uchar * src,size_t src_step,uchar * dst,size_t dst_step,int width,int height,bool inv)4108 static bool DctIPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv)
4109 {
4110     bool ok;
4111     parallel_for_(Range(0, height), DctIPPLoop_Invoker(src, src_step, dst, dst_step, width, inv, &ok), height/(double)(1<<4) );
4112     return ok;
4113 }
4114 
ippi_DCT_32f(const uchar * src,size_t src_step,uchar * dst,size_t dst_step,int width,int height,bool inv,bool row)4115 static bool ippi_DCT_32f(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, bool row)
4116 {
4117     CV_INSTRUMENT_REGION_IPP();
4118 
4119     if(row)
4120         return DctIPPLoop(src, src_step, dst, dst_step, width, height, inv);
4121     else
4122     {
4123 #if IPP_VERSION_X100 >= 900
4124         IppiSize srcRoiSize = {width, height};
4125 
4126         int specSize    = 0;
4127         int initSize    = 0;
4128         int bufferSize  = 0;
4129 
4130         Ipp8u* pDCTSpec = NULL;
4131         Ipp8u* pBuffer  = NULL;
4132         Ipp8u* pInitBuf = NULL;
4133 
4134         #define IPP_RELEASE             \
4135             if(pDCTSpec)                \
4136                 ippFree(pDCTSpec);      \
4137             if(pBuffer)                 \
4138                 ippFree(pBuffer);       \
4139             if(pInitBuf)                \
4140                 ippFree(pInitBuf);      \
4141 
4142         ippiDCTFunc     ippiDCT_32f_C1R      = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R         : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4143         ippiDCTInit     ippDctInit     = inv ? (ippiDCTInit)ippiDCTInvInit_32f         : (ippiDCTInit)ippiDCTFwdInit_32f;
4144         ippiDCTGetSize  ippDctGetSize  = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f   : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
4145 
4146         if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < 0)
4147             return false;
4148 
4149         pDCTSpec = (Ipp8u*)CV_IPP_MALLOC(specSize);
4150         if(!pDCTSpec && specSize)
4151             return false;
4152 
4153         pBuffer  = (Ipp8u*)CV_IPP_MALLOC(bufferSize);
4154         if(!pBuffer && bufferSize)
4155         {
4156             IPP_RELEASE
4157             return false;
4158         }
4159         pInitBuf = (Ipp8u*)CV_IPP_MALLOC(initSize);
4160         if(!pInitBuf && initSize)
4161         {
4162             IPP_RELEASE
4163             return false;
4164         }
4165 
4166         if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < 0)
4167         {
4168             IPP_RELEASE
4169             return false;
4170         }
4171 
4172         if(CV_INSTRUMENT_FUN_IPP(ippiDCT_32f_C1R, (float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDCTSpec, pBuffer) < 0)
4173         {
4174             IPP_RELEASE
4175             return false;
4176         }
4177 
4178         IPP_RELEASE
4179         return true;
4180 #undef IPP_RELEASE
4181 #elif IPP_VERSION_X100 >= 700
4182         IppStatus status;
4183         void* pDCTSpec;
4184         AutoBuffer<uchar> buf;
4185         uchar* pBuffer = 0;
4186         int bufSize=0;
4187 
4188         IppiSize srcRoiSize = {width, height};
4189 
4190         CV_SUPPRESS_DEPRECATED_START
4191 
4192         ippiDCTFunc ippDctFun           = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R             : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4193         ippiDCTInitAlloc ippInitAlloc   = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f   : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
4194         ippiDCTFree ippFree             = inv ? (ippiDCTFree)ippiDCTInvFree_32f             : (ippiDCTFree)ippiDCTFwdFree_32f;
4195         ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
4196 
4197         status = ippStsErr;
4198 
4199         if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
4200         {
4201             buf.allocate( bufSize );
4202             pBuffer = (uchar*)buf;
4203 
4204             status = ippDctFun((float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDCTSpec, (Ipp8u*)pBuffer);
4205         }
4206 
4207         if (pDCTSpec)
4208             ippFree(pDCTSpec);
4209 
4210         CV_SUPPRESS_DEPRECATED_END
4211 
4212         return status >= 0;
4213 #else
4214         CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(inv); CV_UNUSED(row);
4215         return false;
4216 #endif
4217     }
4218 }
4219 }
4220 #endif
4221 
4222 namespace cv {
4223 
4224 class OcvDctImpl CV_FINAL : public hal::DCT2D
4225 {
4226 public:
4227     OcvDftOptions opt;
4228 
4229     int _factors[34];
4230     AutoBuffer<uint> wave_buf;
4231     AutoBuffer<int> itab_buf;
4232 
4233     DCTFunc dct_func;
4234     bool isRowTransform;
4235     bool isInverse;
4236     bool isContinuous;
4237     int start_stage;
4238     int end_stage;
4239     int width;
4240     int height;
4241     int depth;
4242 
init(int _width,int _height,int _depth,int flags)4243     void init(int _width, int _height, int _depth, int flags)
4244     {
4245         width = _width;
4246         height = _height;
4247         depth = _depth;
4248         isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
4249         isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0;
4250         isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0;
4251         static DCTFunc dct_tbl[4] =
4252         {
4253             (DCTFunc)DCT_32f,
4254             (DCTFunc)IDCT_32f,
4255             (DCTFunc)DCT_64f,
4256             (DCTFunc)IDCT_64f
4257         };
4258         dct_func = dct_tbl[(int)isInverse + (depth == CV_64F)*2];
4259         opt.nf = 0;
4260         opt.isComplex = false;
4261         opt.isInverse = false;
4262         opt.noPermute = false;
4263         opt.scale = 1.;
4264         opt.factors = _factors;
4265 
4266         if (isRowTransform || height == 1 || (width == 1 && isContinuous))
4267         {
4268             start_stage = end_stage = 0;
4269         }
4270         else
4271         {
4272             start_stage = (width == 1);
4273             end_stage = 1;
4274         }
4275     }
apply(const uchar * src,size_t src_step,uchar * dst,size_t dst_step)4276     void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step) CV_OVERRIDE
4277     {
4278         CV_IPP_RUN(IPP_VERSION_X100 >= 700 && depth == CV_32F, ippi_DCT_32f(src, src_step, dst, dst_step, width, height, isInverse, isRowTransform))
4279 
4280         AutoBuffer<uchar> dct_wave;
4281         AutoBuffer<uchar> src_buf, dst_buf;
4282         uchar *src_dft_buf = 0, *dst_dft_buf = 0;
4283         int prev_len = 0;
4284         int elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
4285         int complex_elem_size = elem_size*2;
4286 
4287         for(int stage = start_stage ; stage <= end_stage; stage++ )
4288         {
4289             const uchar* sptr = src;
4290             uchar* dptr = dst;
4291             size_t sstep0, sstep1, dstep0, dstep1;
4292             int len, count;
4293 
4294             if( stage == 0 )
4295             {
4296                 len = width;
4297                 count = height;
4298                 if( len == 1 && !isRowTransform )
4299                 {
4300                     len = height;
4301                     count = 1;
4302                 }
4303                 sstep0 = src_step;
4304                 dstep0 = dst_step;
4305                 sstep1 = dstep1 = elem_size;
4306             }
4307             else
4308             {
4309                 len = height;
4310                 count = width;
4311                 sstep1 = src_step;
4312                 dstep1 = dst_step;
4313                 sstep0 = dstep0 = elem_size;
4314             }
4315 
4316             opt.n = len;
4317             opt.tab_size = len;
4318 
4319             if( len != prev_len )
4320             {
4321                 if( len > 1 && (len & 1) )
4322                     CV_Error( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" );
4323 
4324                 opt.nf = DFTFactorize( len, opt.factors );
4325                 bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1];
4326 
4327                 wave_buf.allocate(len*complex_elem_size);
4328                 opt.wave = wave_buf.data();
4329                 itab_buf.allocate(len);
4330                 opt.itab = itab_buf.data();
4331                 DFTInit( len, opt.nf, opt.factors, opt.itab, complex_elem_size, opt.wave, isInverse );
4332 
4333                 dct_wave.allocate((len/2 + 1)*complex_elem_size);
4334                 src_buf.allocate(len*elem_size);
4335                 src_dft_buf = src_buf.data();
4336                 if(!inplace_transform)
4337                 {
4338                     dst_buf.allocate(len*elem_size);
4339                     dst_dft_buf = dst_buf.data();
4340                 }
4341                 else
4342                 {
4343                     dst_dft_buf = src_buf.data();
4344                 }
4345                 DCTInit( len, complex_elem_size, dct_wave.data(), isInverse);
4346                 prev_len = len;
4347             }
4348             // otherwise reuse the tables calculated on the previous stage
4349             for(unsigned i = 0; i < static_cast<unsigned>(count); i++ )
4350             {
4351                 dct_func( opt, sptr + i*sstep0, sstep1, src_dft_buf, dst_dft_buf,
4352                           dptr + i*dstep0, dstep1, dct_wave.data());
4353             }
4354             src = dst;
4355             src_step = dst_step;
4356         }
4357     }
4358 };
4359 
4360 struct ReplacementDCT2D : public hal::DCT2D
4361 {
4362     cvhalDFT *context;
4363     bool isInitialized;
4364 
ReplacementDCT2Dcv::ReplacementDCT2D4365     ReplacementDCT2D() : context(0), isInitialized(false) {}
initcv::ReplacementDCT2D4366     bool init(int width, int height, int depth, int flags)
4367     {
4368         int res = hal_ni_dctInit2D(&context, width, height, depth, flags);
4369         isInitialized = (res == CV_HAL_ERROR_OK);
4370         return isInitialized;
4371     }
applycv::ReplacementDCT2D4372     void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) CV_OVERRIDE
4373     {
4374         if (isInitialized)
4375         {
4376             CALL_HAL(dct2D, cv_hal_dct2D, context, src_data, src_step, dst_data, dst_step);
4377         }
4378     }
~ReplacementDCT2Dcv::ReplacementDCT2D4379     ~ReplacementDCT2D()
4380     {
4381         if (isInitialized)
4382         {
4383             CALL_HAL(dctFree2D, cv_hal_dctFree2D, context);
4384         }
4385     }
4386 };
4387 
4388 namespace hal {
4389 
create(int width,int height,int depth,int flags)4390 Ptr<DCT2D> DCT2D::create(int width, int height, int depth, int flags)
4391 {
4392     {
4393         ReplacementDCT2D *impl = new ReplacementDCT2D();
4394         if (impl->init(width, height, depth, flags))
4395         {
4396             return Ptr<DCT2D>(impl);
4397         }
4398         delete impl;
4399     }
4400     {
4401         OcvDctImpl *impl = new OcvDctImpl();
4402         impl->init(width, height, depth, flags);
4403         return Ptr<DCT2D>(impl);
4404     }
4405 }
4406 
4407 } // cv::hal::
4408 } // cv::
4409 
dct(InputArray _src0,OutputArray _dst,int flags)4410 void cv::dct( InputArray _src0, OutputArray _dst, int flags )
4411 {
4412     CV_INSTRUMENT_REGION();
4413 
4414     Mat src0 = _src0.getMat(), src = src0;
4415     int type = src.type(), depth = src.depth();
4416 
4417     CV_Assert( type == CV_32FC1 || type == CV_64FC1 );
4418     _dst.create( src.rows, src.cols, type );
4419     Mat dst = _dst.getMat();
4420 
4421     int f = 0;
4422     if ((flags & DFT_ROWS) != 0)
4423         f |= CV_HAL_DFT_ROWS;
4424     if ((flags & DCT_INVERSE) != 0)
4425         f |= CV_HAL_DFT_INVERSE;
4426     if (src.isContinuous() && dst.isContinuous())
4427         f |= CV_HAL_DFT_IS_CONTINUOUS;
4428 
4429     Ptr<hal::DCT2D> c = hal::DCT2D::create(src.cols, src.rows, depth, f);
4430     c->apply(src.data, src.step, dst.data, dst.step);
4431 }
4432 
4433 
idct(InputArray src,OutputArray dst,int flags)4434 void cv::idct( InputArray src, OutputArray dst, int flags )
4435 {
4436     CV_INSTRUMENT_REGION();
4437 
4438     dct( src, dst, flags | DCT_INVERSE );
4439 }
4440 
4441 namespace cv
4442 {
4443 
4444 static const int optimalDFTSizeTab[] = {
4445 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24, 25, 27, 30, 32, 36, 40, 45, 48,
4446 50, 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 128, 135, 144, 150, 160,
4447 162, 180, 192, 200, 216, 225, 240, 243, 250, 256, 270, 288, 300, 320, 324, 360, 375,
4448 384, 400, 405, 432, 450, 480, 486, 500, 512, 540, 576, 600, 625, 640, 648, 675, 720,
4449 729, 750, 768, 800, 810, 864, 900, 960, 972, 1000, 1024, 1080, 1125, 1152, 1200,
4450 1215, 1250, 1280, 1296, 1350, 1440, 1458, 1500, 1536, 1600, 1620, 1728, 1800, 1875,
4451 1920, 1944, 2000, 2025, 2048, 2160, 2187, 2250, 2304, 2400, 2430, 2500, 2560, 2592,
4452 2700, 2880, 2916, 3000, 3072, 3125, 3200, 3240, 3375, 3456, 3600, 3645, 3750, 3840,
4453 3888, 4000, 4050, 4096, 4320, 4374, 4500, 4608, 4800, 4860, 5000, 5120, 5184, 5400,
4454 5625, 5760, 5832, 6000, 6075, 6144, 6250, 6400, 6480, 6561, 6750, 6912, 7200, 7290,
4455 7500, 7680, 7776, 8000, 8100, 8192, 8640, 8748, 9000, 9216, 9375, 9600, 9720, 10000,
4456 10125, 10240, 10368, 10800, 10935, 11250, 11520, 11664, 12000, 12150, 12288, 12500,
4457 12800, 12960, 13122, 13500, 13824, 14400, 14580, 15000, 15360, 15552, 15625, 16000,
4458 16200, 16384, 16875, 17280, 17496, 18000, 18225, 18432, 18750, 19200, 19440, 19683,
4459 20000, 20250, 20480, 20736, 21600, 21870, 22500, 23040, 23328, 24000, 24300, 24576,
4460 25000, 25600, 25920, 26244, 27000, 27648, 28125, 28800, 29160, 30000, 30375, 30720,
4461 31104, 31250, 32000, 32400, 32768, 32805, 33750, 34560, 34992, 36000, 36450, 36864,
4462 37500, 38400, 38880, 39366, 40000, 40500, 40960, 41472, 43200, 43740, 45000, 46080,
4463 46656, 46875, 48000, 48600, 49152, 50000, 50625, 51200, 51840, 52488, 54000, 54675,
4464 55296, 56250, 57600, 58320, 59049, 60000, 60750, 61440, 62208, 62500, 64000, 64800,
4465 65536, 65610, 67500, 69120, 69984, 72000, 72900, 73728, 75000, 76800, 77760, 78125,
4466 78732, 80000, 81000, 81920, 82944, 84375, 86400, 87480, 90000, 91125, 92160, 93312,
4467 93750, 96000, 97200, 98304, 98415, 100000, 101250, 102400, 103680, 104976, 108000,
4468 109350, 110592, 112500, 115200, 116640, 118098, 120000, 121500, 122880, 124416, 125000,
4469 128000, 129600, 131072, 131220, 135000, 138240, 139968, 140625, 144000, 145800, 147456,
4470 150000, 151875, 153600, 155520, 156250, 157464, 160000, 162000, 163840, 164025, 165888,
4471 168750, 172800, 174960, 177147, 180000, 182250, 184320, 186624, 187500, 192000, 194400,
4472 196608, 196830, 200000, 202500, 204800, 207360, 209952, 216000, 218700, 221184, 225000,
4473 230400, 233280, 234375, 236196, 240000, 243000, 245760, 248832, 250000, 253125, 256000,
4474 259200, 262144, 262440, 270000, 273375, 276480, 279936, 281250, 288000, 291600, 294912,
4475 295245, 300000, 303750, 307200, 311040, 312500, 314928, 320000, 324000, 327680, 328050,
4476 331776, 337500, 345600, 349920, 354294, 360000, 364500, 368640, 373248, 375000, 384000,
4477 388800, 390625, 393216, 393660, 400000, 405000, 409600, 414720, 419904, 421875, 432000,
4478 437400, 442368, 450000, 455625, 460800, 466560, 468750, 472392, 480000, 486000, 491520,
4479 492075, 497664, 500000, 506250, 512000, 518400, 524288, 524880, 531441, 540000, 546750,
4480 552960, 559872, 562500, 576000, 583200, 589824, 590490, 600000, 607500, 614400, 622080,
4481 625000, 629856, 640000, 648000, 655360, 656100, 663552, 675000, 691200, 699840, 703125,
4482 708588, 720000, 729000, 737280, 746496, 750000, 759375, 768000, 777600, 781250, 786432,
4483 787320, 800000, 810000, 819200, 820125, 829440, 839808, 843750, 864000, 874800, 884736,
4484 885735, 900000, 911250, 921600, 933120, 937500, 944784, 960000, 972000, 983040, 984150,
4485 995328, 1000000, 1012500, 1024000, 1036800, 1048576, 1049760, 1062882, 1080000, 1093500,
4486 1105920, 1119744, 1125000, 1152000, 1166400, 1171875, 1179648, 1180980, 1200000,
4487 1215000, 1228800, 1244160, 1250000, 1259712, 1265625, 1280000, 1296000, 1310720,
4488 1312200, 1327104, 1350000, 1366875, 1382400, 1399680, 1406250, 1417176, 1440000,
4489 1458000, 1474560, 1476225, 1492992, 1500000, 1518750, 1536000, 1555200, 1562500,
4490 1572864, 1574640, 1594323, 1600000, 1620000, 1638400, 1640250, 1658880, 1679616,
4491 1687500, 1728000, 1749600, 1769472, 1771470, 1800000, 1822500, 1843200, 1866240,
4492 1875000, 1889568, 1920000, 1944000, 1953125, 1966080, 1968300, 1990656, 2000000,
4493 2025000, 2048000, 2073600, 2097152, 2099520, 2109375, 2125764, 2160000, 2187000,
4494 2211840, 2239488, 2250000, 2278125, 2304000, 2332800, 2343750, 2359296, 2361960,
4495 2400000, 2430000, 2457600, 2460375, 2488320, 2500000, 2519424, 2531250, 2560000,
4496 2592000, 2621440, 2624400, 2654208, 2657205, 2700000, 2733750, 2764800, 2799360,
4497 2812500, 2834352, 2880000, 2916000, 2949120, 2952450, 2985984, 3000000, 3037500,
4498 3072000, 3110400, 3125000, 3145728, 3149280, 3188646, 3200000, 3240000, 3276800,
4499 3280500, 3317760, 3359232, 3375000, 3456000, 3499200, 3515625, 3538944, 3542940,
4500 3600000, 3645000, 3686400, 3732480, 3750000, 3779136, 3796875, 3840000, 3888000,
4501 3906250, 3932160, 3936600, 3981312, 4000000, 4050000, 4096000, 4100625, 4147200,
4502 4194304, 4199040, 4218750, 4251528, 4320000, 4374000, 4423680, 4428675, 4478976,
4503 4500000, 4556250, 4608000, 4665600, 4687500, 4718592, 4723920, 4782969, 4800000,
4504 4860000, 4915200, 4920750, 4976640, 5000000, 5038848, 5062500, 5120000, 5184000,
4505 5242880, 5248800, 5308416, 5314410, 5400000, 5467500, 5529600, 5598720, 5625000,
4506 5668704, 5760000, 5832000, 5859375, 5898240, 5904900, 5971968, 6000000, 6075000,
4507 6144000, 6220800, 6250000, 6291456, 6298560, 6328125, 6377292, 6400000, 6480000,
4508 6553600, 6561000, 6635520, 6718464, 6750000, 6834375, 6912000, 6998400, 7031250,
4509 7077888, 7085880, 7200000, 7290000, 7372800, 7381125, 7464960, 7500000, 7558272,
4510 7593750, 7680000, 7776000, 7812500, 7864320, 7873200, 7962624, 7971615, 8000000,
4511 8100000, 8192000, 8201250, 8294400, 8388608, 8398080, 8437500, 8503056, 8640000,
4512 8748000, 8847360, 8857350, 8957952, 9000000, 9112500, 9216000, 9331200, 9375000,
4513 9437184, 9447840, 9565938, 9600000, 9720000, 9765625, 9830400, 9841500, 9953280,
4514 10000000, 10077696, 10125000, 10240000, 10368000, 10485760, 10497600, 10546875, 10616832,
4515 10628820, 10800000, 10935000, 11059200, 11197440, 11250000, 11337408, 11390625, 11520000,
4516 11664000, 11718750, 11796480, 11809800, 11943936, 12000000, 12150000, 12288000, 12301875,
4517 12441600, 12500000, 12582912, 12597120, 12656250, 12754584, 12800000, 12960000, 13107200,
4518 13122000, 13271040, 13286025, 13436928, 13500000, 13668750, 13824000, 13996800, 14062500,
4519 14155776, 14171760, 14400000, 14580000, 14745600, 14762250, 14929920, 15000000, 15116544,
4520 15187500, 15360000, 15552000, 15625000, 15728640, 15746400, 15925248, 15943230, 16000000,
4521 16200000, 16384000, 16402500, 16588800, 16777216, 16796160, 16875000, 17006112, 17280000,
4522 17496000, 17578125, 17694720, 17714700, 17915904, 18000000, 18225000, 18432000, 18662400,
4523 18750000, 18874368, 18895680, 18984375, 19131876, 19200000, 19440000, 19531250, 19660800,
4524 19683000, 19906560, 20000000, 20155392, 20250000, 20480000, 20503125, 20736000, 20971520,
4525 20995200, 21093750, 21233664, 21257640, 21600000, 21870000, 22118400, 22143375, 22394880,
4526 22500000, 22674816, 22781250, 23040000, 23328000, 23437500, 23592960, 23619600, 23887872,
4527 23914845, 24000000, 24300000, 24576000, 24603750, 24883200, 25000000, 25165824, 25194240,
4528 25312500, 25509168, 25600000, 25920000, 26214400, 26244000, 26542080, 26572050, 26873856,
4529 27000000, 27337500, 27648000, 27993600, 28125000, 28311552, 28343520, 28800000, 29160000,
4530 29296875, 29491200, 29524500, 29859840, 30000000, 30233088, 30375000, 30720000, 31104000,
4531 31250000, 31457280, 31492800, 31640625, 31850496, 31886460, 32000000, 32400000, 32768000,
4532 32805000, 33177600, 33554432, 33592320, 33750000, 34012224, 34171875, 34560000, 34992000,
4533 35156250, 35389440, 35429400, 35831808, 36000000, 36450000, 36864000, 36905625, 37324800,
4534 37500000, 37748736, 37791360, 37968750, 38263752, 38400000, 38880000, 39062500, 39321600,
4535 39366000, 39813120, 39858075, 40000000, 40310784, 40500000, 40960000, 41006250, 41472000,
4536 41943040, 41990400, 42187500, 42467328, 42515280, 43200000, 43740000, 44236800, 44286750,
4537 44789760, 45000000, 45349632, 45562500, 46080000, 46656000, 46875000, 47185920, 47239200,
4538 47775744, 47829690, 48000000, 48600000, 48828125, 49152000, 49207500, 49766400, 50000000,
4539 50331648, 50388480, 50625000, 51018336, 51200000, 51840000, 52428800, 52488000, 52734375,
4540 53084160, 53144100, 53747712, 54000000, 54675000, 55296000, 55987200, 56250000, 56623104,
4541 56687040, 56953125, 57600000, 58320000, 58593750, 58982400, 59049000, 59719680, 60000000,
4542 60466176, 60750000, 61440000, 61509375, 62208000, 62500000, 62914560, 62985600, 63281250,
4543 63700992, 63772920, 64000000, 64800000, 65536000, 65610000, 66355200, 66430125, 67108864,
4544 67184640, 67500000, 68024448, 68343750, 69120000, 69984000, 70312500, 70778880, 70858800,
4545 71663616, 72000000, 72900000, 73728000, 73811250, 74649600, 75000000, 75497472, 75582720,
4546 75937500, 76527504, 76800000, 77760000, 78125000, 78643200, 78732000, 79626240, 79716150,
4547 80000000, 80621568, 81000000, 81920000, 82012500, 82944000, 83886080, 83980800, 84375000,
4548 84934656, 85030560, 86400000, 87480000, 87890625, 88473600, 88573500, 89579520, 90000000,
4549 90699264, 91125000, 92160000, 93312000, 93750000, 94371840, 94478400, 94921875, 95551488,
4550 95659380, 96000000, 97200000, 97656250, 98304000, 98415000, 99532800, 100000000,
4551 100663296, 100776960, 101250000, 102036672, 102400000, 102515625, 103680000, 104857600,
4552 104976000, 105468750, 106168320, 106288200, 107495424, 108000000, 109350000, 110592000,
4553 110716875, 111974400, 112500000, 113246208, 113374080, 113906250, 115200000, 116640000,
4554 117187500, 117964800, 118098000, 119439360, 119574225, 120000000, 120932352, 121500000,
4555 122880000, 123018750, 124416000, 125000000, 125829120, 125971200, 126562500, 127401984,
4556 127545840, 128000000, 129600000, 131072000, 131220000, 132710400, 132860250, 134217728,
4557 134369280, 135000000, 136048896, 136687500, 138240000, 139968000, 140625000, 141557760,
4558 141717600, 143327232, 144000000, 145800000, 146484375, 147456000, 147622500, 149299200,
4559 150000000, 150994944, 151165440, 151875000, 153055008, 153600000, 155520000, 156250000,
4560 157286400, 157464000, 158203125, 159252480, 159432300, 160000000, 161243136, 162000000,
4561 163840000, 164025000, 165888000, 167772160, 167961600, 168750000, 169869312, 170061120,
4562 170859375, 172800000, 174960000, 175781250, 176947200, 177147000, 179159040, 180000000,
4563 181398528, 182250000, 184320000, 184528125, 186624000, 187500000, 188743680, 188956800,
4564 189843750, 191102976, 191318760, 192000000, 194400000, 195312500, 196608000, 196830000,
4565 199065600, 199290375, 200000000, 201326592, 201553920, 202500000, 204073344, 204800000,
4566 205031250, 207360000, 209715200, 209952000, 210937500, 212336640, 212576400, 214990848,
4567 216000000, 218700000, 221184000, 221433750, 223948800, 225000000, 226492416, 226748160,
4568 227812500, 230400000, 233280000, 234375000, 235929600, 236196000, 238878720, 239148450,
4569 240000000, 241864704, 243000000, 244140625, 245760000, 246037500, 248832000, 250000000,
4570 251658240, 251942400, 253125000, 254803968, 255091680, 256000000, 259200000, 262144000,
4571 262440000, 263671875, 265420800, 265720500, 268435456, 268738560, 270000000, 272097792,
4572 273375000, 276480000, 279936000, 281250000, 283115520, 283435200, 284765625, 286654464,
4573 288000000, 291600000, 292968750, 294912000, 295245000, 298598400, 300000000, 301989888,
4574 302330880, 303750000, 306110016, 307200000, 307546875, 311040000, 312500000, 314572800,
4575 314928000, 316406250, 318504960, 318864600, 320000000, 322486272, 324000000, 327680000,
4576 328050000, 331776000, 332150625, 335544320, 335923200, 337500000, 339738624, 340122240,
4577 341718750, 345600000, 349920000, 351562500, 353894400, 354294000, 358318080, 360000000,
4578 362797056, 364500000, 368640000, 369056250, 373248000, 375000000, 377487360, 377913600,
4579 379687500, 382205952, 382637520, 384000000, 388800000, 390625000, 393216000, 393660000,
4580 398131200, 398580750, 400000000, 402653184, 403107840, 405000000, 408146688, 409600000,
4581 410062500, 414720000, 419430400, 419904000, 421875000, 424673280, 425152800, 429981696,
4582 432000000, 437400000, 439453125, 442368000, 442867500, 447897600, 450000000, 452984832,
4583 453496320, 455625000, 460800000, 466560000, 468750000, 471859200, 472392000, 474609375,
4584 477757440, 478296900, 480000000, 483729408, 486000000, 488281250, 491520000, 492075000,
4585 497664000, 500000000, 503316480, 503884800, 506250000, 509607936, 510183360, 512000000,
4586 512578125, 518400000, 524288000, 524880000, 527343750, 530841600, 531441000, 536870912,
4587 537477120, 540000000, 544195584, 546750000, 552960000, 553584375, 559872000, 562500000,
4588 566231040, 566870400, 569531250, 573308928, 576000000, 583200000, 585937500, 589824000,
4589 590490000, 597196800, 597871125, 600000000, 603979776, 604661760, 607500000, 612220032,
4590 614400000, 615093750, 622080000, 625000000, 629145600, 629856000, 632812500, 637009920,
4591 637729200, 640000000, 644972544, 648000000, 655360000, 656100000, 663552000, 664301250,
4592 671088640, 671846400, 675000000, 679477248, 680244480, 683437500, 691200000, 699840000,
4593 703125000, 707788800, 708588000, 716636160, 720000000, 725594112, 729000000, 732421875,
4594 737280000, 738112500, 746496000, 750000000, 754974720, 755827200, 759375000, 764411904,
4595 765275040, 768000000, 777600000, 781250000, 786432000, 787320000, 791015625, 796262400,
4596 797161500, 800000000, 805306368, 806215680, 810000000, 816293376, 819200000, 820125000,
4597 829440000, 838860800, 839808000, 843750000, 849346560, 850305600, 854296875, 859963392,
4598 864000000, 874800000, 878906250, 884736000, 885735000, 895795200, 900000000, 905969664,
4599 906992640, 911250000, 921600000, 922640625, 933120000, 937500000, 943718400, 944784000,
4600 949218750, 955514880, 956593800, 960000000, 967458816, 972000000, 976562500, 983040000,
4601 984150000, 995328000, 996451875, 1000000000, 1006632960, 1007769600, 1012500000,
4602 1019215872, 1020366720, 1024000000, 1025156250, 1036800000, 1048576000, 1049760000,
4603 1054687500, 1061683200, 1062882000, 1073741824, 1074954240, 1080000000, 1088391168,
4604 1093500000, 1105920000, 1107168750, 1119744000, 1125000000, 1132462080, 1133740800,
4605 1139062500, 1146617856, 1152000000, 1166400000, 1171875000, 1179648000, 1180980000,
4606 1194393600, 1195742250, 1200000000, 1207959552, 1209323520, 1215000000, 1220703125,
4607 1224440064, 1228800000, 1230187500, 1244160000, 1250000000, 1258291200, 1259712000,
4608 1265625000, 1274019840, 1275458400, 1280000000, 1289945088, 1296000000, 1310720000,
4609 1312200000, 1318359375, 1327104000, 1328602500, 1342177280, 1343692800, 1350000000,
4610 1358954496, 1360488960, 1366875000, 1382400000, 1399680000, 1406250000, 1415577600,
4611 1417176000, 1423828125, 1433272320, 1440000000, 1451188224, 1458000000, 1464843750,
4612 1474560000, 1476225000, 1492992000, 1500000000, 1509949440, 1511654400, 1518750000,
4613 1528823808, 1530550080, 1536000000, 1537734375, 1555200000, 1562500000, 1572864000,
4614 1574640000, 1582031250, 1592524800, 1594323000, 1600000000, 1610612736, 1612431360,
4615 1620000000, 1632586752, 1638400000, 1640250000, 1658880000, 1660753125, 1677721600,
4616 1679616000, 1687500000, 1698693120, 1700611200, 1708593750, 1719926784, 1728000000,
4617 1749600000, 1757812500, 1769472000, 1771470000, 1791590400, 1800000000, 1811939328,
4618 1813985280, 1822500000, 1843200000, 1845281250, 1866240000, 1875000000, 1887436800,
4619 1889568000, 1898437500, 1911029760, 1913187600, 1920000000, 1934917632, 1944000000,
4620 1953125000, 1966080000, 1968300000, 1990656000, 1992903750, 2000000000, 2013265920,
4621 2015539200, 2025000000, 2038431744, 2040733440, 2048000000, 2050312500, 2073600000,
4622 2097152000, 2099520000, 2109375000, 2123366400, 2125764000
4623 };
4624 
4625 }
4626 
getOptimalDFTSize(int size0)4627 int cv::getOptimalDFTSize( int size0 )
4628 {
4629     int a = 0, b = sizeof(optimalDFTSizeTab)/sizeof(optimalDFTSizeTab[0]) - 1;
4630     if( (unsigned)size0 >= (unsigned)optimalDFTSizeTab[b] )
4631         return -1;
4632 
4633     while( a < b )
4634     {
4635         int c = (a + b) >> 1;
4636         if( size0 <= optimalDFTSizeTab[c] )
4637             b = c;
4638         else
4639             a = c+1;
4640     }
4641 
4642     return optimalDFTSizeTab[b];
4643 }
4644 
4645 
4646 #ifndef OPENCV_EXCLUDE_C_API
4647 
4648 CV_IMPL void
cvDFT(const CvArr * srcarr,CvArr * dstarr,int flags,int nonzero_rows)4649 cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows )
4650 {
4651     cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0;
4652     int _flags = ((flags & CV_DXT_INVERSE) ? cv::DFT_INVERSE : 0) |
4653         ((flags & CV_DXT_SCALE) ? cv::DFT_SCALE : 0) |
4654         ((flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0);
4655 
4656     CV_Assert( src.size == dst.size );
4657 
4658     if( src.type() != dst.type() )
4659     {
4660         if( dst.channels() == 2 )
4661             _flags |= cv::DFT_COMPLEX_OUTPUT;
4662         else
4663             _flags |= cv::DFT_REAL_OUTPUT;
4664     }
4665 
4666     cv::dft( src, dst, _flags, nonzero_rows );
4667     CV_Assert( dst.data == dst0.data ); // otherwise it means that the destination size or type was incorrect
4668 }
4669 
4670 
4671 CV_IMPL void
cvMulSpectrums(const CvArr * srcAarr,const CvArr * srcBarr,CvArr * dstarr,int flags)4672 cvMulSpectrums( const CvArr* srcAarr, const CvArr* srcBarr,
4673                 CvArr* dstarr, int flags )
4674 {
4675     cv::Mat srcA = cv::cvarrToMat(srcAarr),
4676         srcB = cv::cvarrToMat(srcBarr),
4677         dst = cv::cvarrToMat(dstarr);
4678     CV_Assert( srcA.size == dst.size && srcA.type() == dst.type() );
4679 
4680     cv::mulSpectrums(srcA, srcB, dst,
4681         (flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0,
4682         (flags & CV_DXT_MUL_CONJ) != 0 );
4683 }
4684 
4685 
4686 CV_IMPL void
cvDCT(const CvArr * srcarr,CvArr * dstarr,int flags)4687 cvDCT( const CvArr* srcarr, CvArr* dstarr, int flags )
4688 {
4689     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
4690     CV_Assert( src.size == dst.size && src.type() == dst.type() );
4691     int _flags = ((flags & CV_DXT_INVERSE) ? cv::DCT_INVERSE : 0) |
4692             ((flags & CV_DXT_ROWS) ? cv::DCT_ROWS : 0);
4693     cv::dct( src, dst, _flags );
4694 }
4695 
4696 
4697 CV_IMPL int
cvGetOptimalDFTSize(int size0)4698 cvGetOptimalDFTSize( int size0 )
4699 {
4700     return cv::getOptimalDFTSize(size0);
4701 }
4702 
4703 #endif  // OPENCV_EXCLUDE_C_API
4704 /* End of file. */
4705