1 #if !defined  HAVE_FHTLOC2_H__
2 #define       HAVE_FHTLOC2_H__
3 // This file is part of the FXT library.
4 // Copyright (C) 2010, 2012, 2013 Joerg Arndt
5 // License: GNU General Public License version 3 or later,
6 // see the file COPYING.txt in the main directory.
7 
8 
9 #include "fht/fht.h"  // fht_dif_core(), fht_dit_core()
10 #include "fht/hartleyshift.h"  // hartley_shift_05()
11 #include "aux0/sumdiff.h"
12 #include "perm/revbinpermute.h"
13 #include "fht/shortfhtdifcore.h"
14 #include "fht/shortfhtditcore.h"
15 
16 #include "fxttypes.h"
17 
18 // whether to use trig recursion:
19 #define USE_TRIG_REC  // (default: yes)
20 
21 
22 template <typename Type>
fht_loc_dif2_core(Type * f,ulong ldn)23 void fht_loc_dif2_core(Type *f, ulong ldn)
24 // Fast Hartley transform (FHT).
25 // Recursive decimation in frequency (DIF) algorithm.
26 // Excellent performance for large array sizes.
27 {
28     if ( ldn<=13 )  // sizeof(Type)*(2**threshold) <= L1_CACHE_BYTES
29     {
30         fht_dif_core(f, ldn);
31         return;
32     }
33 
34     for (ulong ldm=ldn; ldm>=1; --ldm)
35     {
36         const ulong m = (1UL<<ldm);
37         const ulong mh = (m>>1);
38 
39         for (ulong t1=0, t2=mh;  t1<mh;  ++t1, ++t2)
40             sumdiff(f[t1], f[t2]);
41 
42 #ifdef USE_TRIG_REC
43         hartley_shift_05_v2rec(f+mh, mh);
44 #else
45         hartley_shift_05_v2(f+mh, mh);
46 #endif
47     }
48 
49     // Recursion:
50 //    for (ulong ldm=1; ldm<ldn; ++ldm)
51 //        fht_loc_dif2_core(f+(1UL<<ldm), ldm);
52 
53     fht_dif_core_2(f+2);  // ldm==1
54     fht_dif_core_4(f+4);  // ldm==2
55     fht_dif_core_8(f+8);  // ldm==3
56 //    fht_dif_core_16(f+16);  // ldm==4
57     for (ulong ldm=4; ldm<ldn; ++ldm)
58         fht_loc_dif2_core(f+(1UL<<ldm), ldm);
59 }
60 // -------------------------
61 
62 
63 template <typename Type>
fht_loc_dit2_core(Type * f,ulong ldn)64 void fht_loc_dit2_core(Type *f, ulong ldn)
65 // Fast Hartley transform (FHT).
66 // Recursive decimation in time (DIT) algorithm.
67 // Excellent performance for large array sizes.
68 {
69     if ( ldn<=13 )
70     {
71         fht_dit_core(f, ldn);
72         return;
73     }
74 
75     // Recursion:
76 //    for (ulong ldm=1; ldm<ldn; ++ldm)
77 //        fht_loc_dit2_core(f+(1UL<<ldm), ldm);
78 
79     fht_dit_core_2(f+2);  // ldm==1
80     fht_dit_core_4(f+4);  // ldm==2
81     fht_dit_core_8(f+8);  // ldm==3
82 //    fht_dit_core_16(f+16);  // ldm==4
83     for (ulong ldm=4; ldm<ldn; ++ldm)
84         fht_loc_dit2_core(f+(1UL<<ldm), ldm);
85 
86 
87     for (ulong ldm=1; ldm<=ldn; ++ldm)
88     {
89         const ulong m = (1UL<<ldm);
90         const ulong mh = (m>>1);
91 #ifdef USE_TRIG_REC
92         hartley_shift_05_v2rec(f+mh, mh);
93 #else
94         hartley_shift_05_v2(f+mh, mh);
95 #endif
96         for (ulong t1=0, t2=mh;  t1<mh;  ++t1, ++t2)
97             sumdiff(f[t1], f[t2]);
98     }
99 }
100 // -------------------------
101 
102 
103 //: inlines give default implementations:
104 
105 template <typename Type>
fht_loc(Type * f,ulong ldn)106 inline void fht_loc(Type *f, ulong ldn)
107 {
108     fht_loc_dif2_core(f, ldn);
109     revbin_permute(f, 1UL<<ldn);
110 }
111 // -------------------------
112 
113 template <typename Type>
fht0_loc(Type * f,ulong ldn)114 inline void fht0_loc(Type *f, ulong ldn)
115 {
116     // todo: no special version for zero padded data yet:
117     fht_loc_dif2_core(f, ldn);
118     revbin_permute(f, 1UL<<ldn);
119 }
120 // -------------------------
121 
122 #undef USE_TRIG_REC
123 
124 #endif  // !defined HAVE_FHTLOC2_H__
125