1 #if !defined HAVE_FHTLOC2_H__
2 #define HAVE_FHTLOC2_H__
3 // This file is part of the FXT library.
4 // Copyright (C) 2010, 2012, 2013 Joerg Arndt
5 // License: GNU General Public License version 3 or later,
6 // see the file COPYING.txt in the main directory.
7
8
9 #include "fht/fht.h" // fht_dif_core(), fht_dit_core()
10 #include "fht/hartleyshift.h" // hartley_shift_05()
11 #include "aux0/sumdiff.h"
12 #include "perm/revbinpermute.h"
13 #include "fht/shortfhtdifcore.h"
14 #include "fht/shortfhtditcore.h"
15
16 #include "fxttypes.h"
17
18 // whether to use trig recursion:
19 #define USE_TRIG_REC // (default: yes)
20
21
22 template <typename Type>
fht_loc_dif2_core(Type * f,ulong ldn)23 void fht_loc_dif2_core(Type *f, ulong ldn)
24 // Fast Hartley transform (FHT).
25 // Recursive decimation in frequency (DIF) algorithm.
26 // Excellent performance for large array sizes.
27 {
28 if ( ldn<=13 ) // sizeof(Type)*(2**threshold) <= L1_CACHE_BYTES
29 {
30 fht_dif_core(f, ldn);
31 return;
32 }
33
34 for (ulong ldm=ldn; ldm>=1; --ldm)
35 {
36 const ulong m = (1UL<<ldm);
37 const ulong mh = (m>>1);
38
39 for (ulong t1=0, t2=mh; t1<mh; ++t1, ++t2)
40 sumdiff(f[t1], f[t2]);
41
42 #ifdef USE_TRIG_REC
43 hartley_shift_05_v2rec(f+mh, mh);
44 #else
45 hartley_shift_05_v2(f+mh, mh);
46 #endif
47 }
48
49 // Recursion:
50 // for (ulong ldm=1; ldm<ldn; ++ldm)
51 // fht_loc_dif2_core(f+(1UL<<ldm), ldm);
52
53 fht_dif_core_2(f+2); // ldm==1
54 fht_dif_core_4(f+4); // ldm==2
55 fht_dif_core_8(f+8); // ldm==3
56 // fht_dif_core_16(f+16); // ldm==4
57 for (ulong ldm=4; ldm<ldn; ++ldm)
58 fht_loc_dif2_core(f+(1UL<<ldm), ldm);
59 }
60 // -------------------------
61
62
63 template <typename Type>
fht_loc_dit2_core(Type * f,ulong ldn)64 void fht_loc_dit2_core(Type *f, ulong ldn)
65 // Fast Hartley transform (FHT).
66 // Recursive decimation in time (DIT) algorithm.
67 // Excellent performance for large array sizes.
68 {
69 if ( ldn<=13 )
70 {
71 fht_dit_core(f, ldn);
72 return;
73 }
74
75 // Recursion:
76 // for (ulong ldm=1; ldm<ldn; ++ldm)
77 // fht_loc_dit2_core(f+(1UL<<ldm), ldm);
78
79 fht_dit_core_2(f+2); // ldm==1
80 fht_dit_core_4(f+4); // ldm==2
81 fht_dit_core_8(f+8); // ldm==3
82 // fht_dit_core_16(f+16); // ldm==4
83 for (ulong ldm=4; ldm<ldn; ++ldm)
84 fht_loc_dit2_core(f+(1UL<<ldm), ldm);
85
86
87 for (ulong ldm=1; ldm<=ldn; ++ldm)
88 {
89 const ulong m = (1UL<<ldm);
90 const ulong mh = (m>>1);
91 #ifdef USE_TRIG_REC
92 hartley_shift_05_v2rec(f+mh, mh);
93 #else
94 hartley_shift_05_v2(f+mh, mh);
95 #endif
96 for (ulong t1=0, t2=mh; t1<mh; ++t1, ++t2)
97 sumdiff(f[t1], f[t2]);
98 }
99 }
100 // -------------------------
101
102
103 //: inlines give default implementations:
104
105 template <typename Type>
fht_loc(Type * f,ulong ldn)106 inline void fht_loc(Type *f, ulong ldn)
107 {
108 fht_loc_dif2_core(f, ldn);
109 revbin_permute(f, 1UL<<ldn);
110 }
111 // -------------------------
112
113 template <typename Type>
fht0_loc(Type * f,ulong ldn)114 inline void fht0_loc(Type *f, ulong ldn)
115 {
116 // todo: no special version for zero padded data yet:
117 fht_loc_dif2_core(f, ldn);
118 revbin_permute(f, 1UL<<ldn);
119 }
120 // -------------------------
121
122 #undef USE_TRIG_REC
123
124 #endif // !defined HAVE_FHTLOC2_H__
125