1 /*****************************************************************************
2 
3         Downsampler2xF64Sse2.hpp
4         Author: Laurent de Soras, 2020
5 
6 --- Legal stuff ---
7 
8 This program is free software. It comes without any warranty, to
9 the extent permitted by applicable law. You can redistribute it
10 and/or modify it under the terms of the Do What The Fuck You Want
11 To Public License, Version 2, as published by Sam Hocevar. See
12 http://www.wtfpl.net/ for more details.
13 
14 *Tab=3***********************************************************************/
15 
16 
17 
18 #if ! defined (hiir_Downsampler2xF64Sse2_CODEHEADER_INCLUDED)
19 #define hiir_Downsampler2xF64Sse2_CODEHEADER_INCLUDED
20 
21 
22 
23 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
24 
25 #include "hiir/StageProcF64Sse2.h"
26 
27 #include <cassert>
28 
29 
30 
31 namespace hiir
32 {
33 
34 
35 
36 /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
37 
38 
39 
40 /*
41 ==============================================================================
42 Name: ctor
43 Throws: Nothing
44 ==============================================================================
45 */
46 
47 template <int NC>
Downsampler2xF64Sse2()48 Downsampler2xF64Sse2 <NC>::Downsampler2xF64Sse2 ()
49 {
50 	for (int i = 0; i < _nbr_stages + 1; ++i)
51 	{
52 		_mm_store_pd (_filter [i]._coef, _mm_setzero_pd ());
53 	}
54 	if (NBR_COEFS < _nbr_stages * 2)
55 	{
56 		_filter [_nbr_stages]._coef [0] = 1;
57 	}
58 
59 	clear_buffers ();
60 }
61 
62 
63 
64 /*
65 ==============================================================================
66 Name: set_coefs
67 Description:
68 	Sets filter coefficients. Generate them with the PolyphaseIir2Designer
69 	class.
70 	Call this function before doing any processing.
71 Input parameters:
72 	- coef_arr: Array of coefficients. There should be as many coefficients as
73 		mentioned in the class template parameter.
74 Throws: Nothing
75 ==============================================================================
76 */
77 
78 template <int NC>
set_coefs(const double coef_arr[])79 void	Downsampler2xF64Sse2 <NC>::set_coefs (const double coef_arr [])
80 {
81 	assert (coef_arr != nullptr);
82 
83 	for (int i = 0; i < NBR_COEFS; ++i)
84 	{
85 		const int      stage = (i / _stage_width) + 1;
86 		const int      pos   = (i ^ 1) & (_stage_width - 1);
87 		_filter [stage]._coef [pos] = DataType (coef_arr [i]);
88 	}
89 }
90 
91 
92 
93 /*
94 ==============================================================================
95 Name: process_sample
96 Description:
97 	Downsamples (x2) one pair of samples, to generate one output sample.
98 Input parameters:
99 	- in_ptr: pointer on the two samples to decimate
100 Returns: Samplerate-reduced sample.
101 Throws: Nothing
102 ==============================================================================
103 */
104 
105 template <int NC>
process_sample(const double in_ptr[2])106 double	Downsampler2xF64Sse2 <NC>::process_sample (const double in_ptr [2])
107 {
108 	assert (in_ptr != nullptr);
109 
110 	auto           x  = _mm_loadu_pd (in_ptr);
111 	StageProcF64Sse2 <_nbr_stages>::process_sample_pos (x, &_filter [0]);
112 	x = _mm_add_sd (x, _mm_shuffle_pd (x, x, 1));
113 	x = _mm_mul_sd (x, _mm_set_sd (0.5f));
114 
115 	return _mm_cvtsd_f64 (x);
116 }
117 
118 
119 
120 /*
121 ==============================================================================
122 Name: process_block
123 Description:
124 	Downsamples (x2) a block of samples.
125 	Input and output blocks may overlap, see assert() for details.
126 Input parameters:
127 	- in_ptr: Input array, containing nbr_spl * 2 samples.
128 	- nbr_spl: Number of samples to output, > 0
129 Output parameters:
130 	- out_ptr: Array for the output samples, capacity: nbr_spl samples.
131 Throws: Nothing
132 ==============================================================================
133 */
134 
135 template <int NC>
process_block(double out_ptr[],const double in_ptr[],long nbr_spl)136 void	Downsampler2xF64Sse2 <NC>::process_block (double out_ptr [], const double in_ptr [], long nbr_spl)
137 {
138 	assert (in_ptr  != nullptr);
139 	assert (out_ptr != nullptr);
140 	assert (out_ptr <= in_ptr || out_ptr >= in_ptr + nbr_spl * 2);
141 	assert (nbr_spl > 0);
142 
143 	const auto     half = _mm_set1_pd (0.5f);
144 	for (long pos = 0; pos < nbr_spl; ++pos)
145 	{
146 		auto           x  = _mm_loadu_pd (in_ptr + pos * 2);
147 		StageProcF64Sse2 <_nbr_stages>::process_sample_pos (x, &_filter [0]);
148 		x = _mm_add_sd (x, _mm_shuffle_pd (x, x, 1));
149 		x = _mm_mul_sd (x, half);
150 		out_ptr [pos] = _mm_cvtsd_f64 (x);
151 	}
152 }
153 
154 
155 
156 /*
157 ==============================================================================
158 Name: process_sample_split
159 Description:
160 	Split (spectrum-wise) in half a pair of samples. The lower part of the
161 	spectrum is a classic downsampling, equivalent to the output of
162 	process_sample().
163 	The higher part is the complementary signal: original filter response
164 	is flipped from left to right, becoming a high-pass filter with the same
165 	cutoff frequency. This signal is then critically sampled (decimation by 2),
166 	flipping the spectrum: Fs/4...Fs/2 becomes Fs/4...0.
167 Input parameters:
168 	- in_ptr: pointer on the pair of input samples
169 Output parameters:
170 	- low: output sample, lower part of the spectrum (downsampling)
171 	- high: output sample, higher part of the spectrum.
172 Throws: Nothing
173 ==============================================================================
174 */
175 
176 template <int NC>
process_sample_split(double & low,double & high,const double in_ptr[2])177 void	Downsampler2xF64Sse2 <NC>::process_sample_split (double &low, double &high, const double in_ptr [2])
178 {
179 	assert (in_ptr != nullptr);
180 
181 	auto           x  = _mm_loadu_pd (in_ptr);
182 	StageProcF64Sse2 <_nbr_stages>::process_sample_pos (x, &_filter [0]);
183 	x = _mm_mul_pd (x, _mm_set1_pd (0.5f));
184 	const auto     xr = _mm_shuffle_pd (x, x, 1);
185 	low  = _mm_cvtsd_f64 (_mm_add_sd (xr, x));
186 	high = _mm_cvtsd_f64 (_mm_sub_sd (xr, x));
187 }
188 
189 
190 
191 /*
192 ==============================================================================
193 Name: process_block_split
194 Description:
195 	Split (spectrum-wise) in half a block of samples. The lower part of the
196 	spectrum is a classic downsampling, equivalent to the output of
197 	process_block().
198 	The higher part is the complementary signal: original filter response
199 	is flipped from left to right, becoming a high-pass filter with the same
200 	cutoff frequency. This signal is then critically sampled (decimation by 2),
201 	flipping the spectrum: Fs/4...Fs/2 becomes Fs/4...0.
202 	Input and output blocks may overlap, see assert() for details.
203 Input parameters:
204 	- in_ptr: Input array, containing nbr_spl * 2 samples.
205 	- nbr_spl: Number of samples for each output, > 0
206 Output parameters:
207 	- out_l_ptr: Array for the output samples, lower part of the spectrum
208 		(downsampling). Capacity: nbr_spl samples.
209 	- out_h_ptr: Array for the output samples, higher part of the spectrum.
210 		Capacity: nbr_spl samples.
211 Throws: Nothing
212 ==============================================================================
213 */
214 
215 template <int NC>
process_block_split(double out_l_ptr[],double out_h_ptr[],const double in_ptr[],long nbr_spl)216 void	Downsampler2xF64Sse2 <NC>::process_block_split (double out_l_ptr [], double out_h_ptr [], const double in_ptr [], long nbr_spl)
217 {
218 	assert (in_ptr    != nullptr);
219 	assert (out_l_ptr != nullptr);
220 	assert (out_l_ptr <= in_ptr || out_l_ptr >= in_ptr + nbr_spl * 2);
221 	assert (out_h_ptr != nullptr);
222 	assert (out_h_ptr <= in_ptr || out_h_ptr >= in_ptr + nbr_spl * 2);
223 	assert (out_h_ptr != out_l_ptr);
224 	assert (nbr_spl > 0);
225 
226 	const auto     half = _mm_set1_pd (0.5f);
227 	for (long pos = 0; pos < nbr_spl; ++pos)
228 	{
229 		auto           x  = _mm_loadu_pd (in_ptr + pos * 2);
230 		StageProcF64Sse2 <_nbr_stages>::process_sample_pos (x, &_filter [0]);
231 		x = _mm_mul_pd (x, half);
232 		const auto     xr = _mm_shuffle_pd (x, x, 1);
233 		out_l_ptr [pos] = _mm_cvtsd_f64 (_mm_add_sd (xr, x));
234 		out_h_ptr [pos] = _mm_cvtsd_f64 (_mm_sub_sd (xr, x));
235 	}
236 }
237 
238 
239 
240 /*
241 ==============================================================================
242 Name: clear_buffers
243 Description:
244 	Clears filter memory, as if it processed silence since an infinite amount
245 	of time.
246 Throws: Nothing
247 ==============================================================================
248 */
249 
250 template <int NC>
clear_buffers()251 void	Downsampler2xF64Sse2 <NC>::clear_buffers ()
252 {
253 	for (int i = 0; i < _nbr_stages + 1; ++i)
254 	{
255 		_mm_store_pd (_filter [i]._mem, _mm_setzero_pd ());
256 	}
257 }
258 
259 
260 
261 /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
262 
263 
264 
265 /*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
266 
267 
268 
269 }  // namespace hiir
270 
271 
272 
273 #endif   // hiir_Downsampler2xF64Sse2_CODEHEADER_INCLUDED
274 
275 
276 
277 /*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
278