1 /* replaygain_synthesis - Routines for applying ReplayGain to a signal
2  * Copyright (C) 2002-2009  Josh Coalson
3  * Copyright (C) 2011-2016  Xiph.Org Foundation
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18  */
19 /*
20  * This is an aggregation of pieces of code from John Edwards' WaveGain
21  * program.  Mostly cosmetic changes were made; otherwise, the dithering
22  * code is almost untouched and the gain processing was converted from
23  * processing a whole file to processing chunks of samples.
24  *
25  * The original copyright notices for WaveGain's dither.c and wavegain.c
26  * appear below:
27  */
28 /*
29  * (c) 2002 John Edwards
30  * mostly lifted from work by Frank Klemm
31  * random functions for dithering.
32  */
33 /*
34  * Copyright (C) 2002 John Edwards
35  * Additional code by Magnus Holmgren and Gian-Carlo Pascutto
36  */
37 
38 #ifdef HAVE_CONFIG_H
39 #  include <config.h>
40 #endif
41 
42 #include <string.h> /* for memset() */
43 #include <math.h>
44 #include "share/replaygain_synthesis.h"
45 #include "FLAC/assert.h"
46 
47 #define FLAC__I64L(x) x##LL
48 
49 
50 /*
51  * the following is based on parts of dither.c
52  */
53 
54 
55 /*
56  *  This is a simple random number generator with good quality for audio purposes.
57  *  It consists of two polycounters with opposite rotation direction and different
58  *  periods. The periods are coprime, so the total period is the product of both.
59  *
60  *     -------------------------------------------------------------------------------------------------
61  * +-> |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0|
62  * |   -------------------------------------------------------------------------------------------------
63  * |                                                                          |  |  |  |     |        |
64  * |                                                                          +--+--+--+-XOR-+--------+
65  * |                                                                                      |
66  * +--------------------------------------------------------------------------------------+
67  *
68  *     -------------------------------------------------------------------------------------------------
69  *     |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0| <-+
70  *     -------------------------------------------------------------------------------------------------   |
71  *       |  |           |  |                                                                               |
72  *       +--+----XOR----+--+                                                                               |
73  *                |                                                                                        |
74  *                +----------------------------------------------------------------------------------------+
75  *
76  *
77  *  The first has an period of 3*5*17*257*65537, the second of 7*47*73*178481,
78  *  which gives a period of 18.410.713.077.675.721.215. The result is the
79  *  XORed values of both generators.
80  */
81 
random_int_(void)82 static unsigned int random_int_(void)
83 {
84 	static const unsigned char parity_[256] = {
85 		0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
86 		1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
87 		1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
88 		0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
89 		1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
90 		0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
91 		0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
92 		1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0
93 	};
94 	static unsigned int r1_ = 1;
95 	static unsigned int r2_ = 1;
96 
97 	unsigned int t1, t2, t3, t4;
98 
99 	/* Parity calculation is done via table lookup, this is also available
100 	 * on CPUs without parity, can be implemented in C and avoid unpredictable
101 	 * jumps and slow rotate through the carry flag operations.
102 	 */
103 	t3   = t1 = r1_;    t4   = t2 = r2_;
104 	t1  &= 0xF5;        t2 >>= 25;
105 	t1   = parity_[t1]; t2  &= 0x63;
106 	t1 <<= 31;          t2   = parity_[t2];
107 
108 	return (r1_ = (t3 >> 1) | t1 ) ^ (r2_ = (t4 + t4) | t2 );
109 }
110 
111 /* gives a equal distributed random number */
112 /* between -2^31*mult and +2^31*mult */
random_equi_(double mult)113 static double random_equi_(double mult)
114 {
115 	return mult * (int) random_int_();
116 }
117 
118 /* gives a triangular distributed random number */
119 /* between -2^32*mult and +2^32*mult */
random_triangular_(double mult)120 static double random_triangular_(double mult)
121 {
122 	return mult * ( (double) (int) random_int_() + (double) (int) random_int_() );
123 }
124 
125 
126 static const float  F44_0 [16 + 32] = {
127 	(float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
128 	(float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
129 
130 	(float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
131 	(float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
132 
133 	(float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
134 	(float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0
135 };
136 
137 
138 static const float  F44_1 [16 + 32] = {  /* SNR(w) = 4.843163 dB, SNR = -3.192134 dB */
139 	(float) 0.85018292704024355931, (float) 0.29089597350995344721, (float)-0.05021866022121039450, (float)-0.23545456294599161833,
140 	(float)-0.58362726442227032096, (float)-0.67038978965193036429, (float)-0.38566861572833459221, (float)-0.15218663390367969967,
141 	(float)-0.02577543084864530676, (float) 0.14119295297688728127, (float) 0.22398848581628781612, (float) 0.15401727203382084116,
142 	(float) 0.05216161232906000929, (float)-0.00282237820999675451, (float)-0.03042794608323867363, (float)-0.03109780942998826024,
143 
144 	(float) 0.85018292704024355931, (float) 0.29089597350995344721, (float)-0.05021866022121039450, (float)-0.23545456294599161833,
145 	(float)-0.58362726442227032096, (float)-0.67038978965193036429, (float)-0.38566861572833459221, (float)-0.15218663390367969967,
146 	(float)-0.02577543084864530676, (float) 0.14119295297688728127, (float) 0.22398848581628781612, (float) 0.15401727203382084116,
147 	(float) 0.05216161232906000929, (float)-0.00282237820999675451, (float)-0.03042794608323867363, (float)-0.03109780942998826024,
148 
149 	(float) 0.85018292704024355931, (float) 0.29089597350995344721, (float)-0.05021866022121039450, (float)-0.23545456294599161833,
150 	(float)-0.58362726442227032096, (float)-0.67038978965193036429, (float)-0.38566861572833459221, (float)-0.15218663390367969967,
151 	(float)-0.02577543084864530676, (float) 0.14119295297688728127, (float) 0.22398848581628781612, (float) 0.15401727203382084116,
152 	(float) 0.05216161232906000929, (float)-0.00282237820999675451, (float)-0.03042794608323867363, (float)-0.03109780942998826024,
153 };
154 
155 
156 static const float  F44_2 [16 + 32] = {  /* SNR(w) = 10.060213 dB, SNR = -12.766730 dB */
157 	(float) 1.78827593892108555290, (float) 0.95508210637394326553, (float)-0.18447626783899924429, (float)-0.44198126506275016437,
158 	(float)-0.88404052492547413497, (float)-1.42218907262407452967, (float)-1.02037566838362314995, (float)-0.34861755756425577264,
159 	(float)-0.11490230170431934434, (float) 0.12498899339968611803, (float) 0.38065885268563131927, (float) 0.31883491321310506562,
160 	(float) 0.10486838686563442765, (float)-0.03105361685110374845, (float)-0.06450524884075370758, (float)-0.02939198261121969816,
161 
162 	(float) 1.78827593892108555290, (float) 0.95508210637394326553, (float)-0.18447626783899924429, (float)-0.44198126506275016437,
163 	(float)-0.88404052492547413497, (float)-1.42218907262407452967, (float)-1.02037566838362314995, (float)-0.34861755756425577264,
164 	(float)-0.11490230170431934434, (float) 0.12498899339968611803, (float) 0.38065885268563131927, (float) 0.31883491321310506562,
165 	(float) 0.10486838686563442765, (float)-0.03105361685110374845, (float)-0.06450524884075370758, (float)-0.02939198261121969816,
166 
167 	(float) 1.78827593892108555290, (float) 0.95508210637394326553, (float)-0.18447626783899924429, (float)-0.44198126506275016437,
168 	(float)-0.88404052492547413497, (float)-1.42218907262407452967, (float)-1.02037566838362314995, (float)-0.34861755756425577264,
169 	(float)-0.11490230170431934434, (float) 0.12498899339968611803, (float) 0.38065885268563131927, (float) 0.31883491321310506562,
170 	(float) 0.10486838686563442765, (float)-0.03105361685110374845, (float)-0.06450524884075370758, (float)-0.02939198261121969816,
171 };
172 
173 
174 static const float  F44_3 [16 + 32] = {  /* SNR(w) = 15.382598 dB, SNR = -29.402334 dB */
175 	(float) 2.89072132015058161445, (float) 2.68932810943698754106, (float) 0.21083359339410251227, (float)-0.98385073324997617515,
176 	(float)-1.11047823227097316719, (float)-2.18954076314139673147, (float)-2.36498032881953056225, (float)-0.95484132880101140785,
177 	(float)-0.23924057925542965158, (float)-0.13865235703915925642, (float) 0.43587843191057992846, (float) 0.65903257226026665927,
178 	(float) 0.24361815372443152787, (float)-0.00235974960154720097, (float) 0.01844166574603346289, (float) 0.01722945988740875099,
179 
180 	(float) 2.89072132015058161445, (float) 2.68932810943698754106, (float) 0.21083359339410251227, (float)-0.98385073324997617515,
181 	(float)-1.11047823227097316719, (float)-2.18954076314139673147, (float)-2.36498032881953056225, (float)-0.95484132880101140785,
182 	(float)-0.23924057925542965158, (float)-0.13865235703915925642, (float) 0.43587843191057992846, (float) 0.65903257226026665927,
183 	(float) 0.24361815372443152787, (float)-0.00235974960154720097, (float) 0.01844166574603346289, (float) 0.01722945988740875099,
184 
185 	(float) 2.89072132015058161445, (float) 2.68932810943698754106, (float) 0.21083359339410251227, (float)-0.98385073324997617515,
186 	(float)-1.11047823227097316719, (float)-2.18954076314139673147, (float)-2.36498032881953056225, (float)-0.95484132880101140785,
187 	(float)-0.23924057925542965158, (float)-0.13865235703915925642, (float) 0.43587843191057992846, (float) 0.65903257226026665927,
188 	(float) 0.24361815372443152787, (float)-0.00235974960154720097, (float) 0.01844166574603346289, (float) 0.01722945988740875099
189 };
190 
191 
scalar16_(const float * x,const float * y)192 static double scalar16_(const float* x, const float* y)
193 {
194 	return
195 		x[ 0]*y[ 0] + x[ 1]*y[ 1] + x[ 2]*y[ 2] + x[ 3]*y[ 3] +
196 		x[ 4]*y[ 4] + x[ 5]*y[ 5] + x[ 6]*y[ 6] + x[ 7]*y[ 7] +
197 		x[ 8]*y[ 8] + x[ 9]*y[ 9] + x[10]*y[10] + x[11]*y[11] +
198 		x[12]*y[12] + x[13]*y[13] + x[14]*y[14] + x[15]*y[15];
199 }
200 
201 
FLAC__replaygain_synthesis__init_dither_context(DitherContext * d,int bits,int shapingtype)202 void FLAC__replaygain_synthesis__init_dither_context(DitherContext *d, int bits, int shapingtype)
203 {
204 	static unsigned char default_dither [] = { 92, 92, 88, 84, 81, 78, 74, 67,  0,  0 };
205 	static const float*               F [] = { F44_0, F44_1, F44_2, F44_3 };
206 
207 	int indx;
208 
209 	if (shapingtype < 0) shapingtype = 0;
210 	if (shapingtype > 3) shapingtype = 3;
211 	d->ShapingType = (NoiseShaping)shapingtype;
212 	indx = bits - 11 - shapingtype;
213 	if (indx < 0) indx = 0;
214 	if (indx > 9) indx = 9;
215 
216 	memset ( d->ErrorHistory , 0, sizeof (d->ErrorHistory ) );
217 	memset ( d->DitherHistory, 0, sizeof (d->DitherHistory) );
218 
219 	d->FilterCoeff = F [shapingtype];
220 	d->Mask   = ((FLAC__uint64)-1) << (32 - bits);
221 	d->Add    = 0.5     * ((1L << (32 - bits)) - 1);
222 	d->Dither = 0.01f*default_dither[indx] / (((FLAC__int64)1) << bits);
223 	d->LastHistoryIndex = 0;
224 }
225 
226 /*
227  * the following is based on parts of wavegain.c
228  */
229 
dither_output_(DitherContext * d,FLAC__bool do_dithering,int shapingtype,int i,double Sum,int k)230 static FLAC__int64 dither_output_(DitherContext *d, FLAC__bool do_dithering, int shapingtype, int i, double Sum, int k)
231 {
232 	union {
233 		double d;
234 		FLAC__int64 i;
235 	} doubletmp;
236 	double Sum2;
237 	FLAC__int64 val;
238 
239 #define ROUND64(x)   ( doubletmp.d = (x) + d->Add + (FLAC__int64)FLAC__I64L(0x001FFFFD80000000), doubletmp.i - (FLAC__int64)FLAC__I64L(0x433FFFFD80000000) )
240 
241 	if(do_dithering) {
242 		if(shapingtype == 0) {
243 			double  tmp = random_equi_(d->Dither);
244 			Sum2 = tmp - d->LastRandomNumber [k];
245 			d->LastRandomNumber [k] = (int)tmp;
246 			Sum2 = Sum += Sum2;
247 			val = ROUND64(Sum2) & d->Mask;
248 		}
249 		else {
250 			Sum2 = random_triangular_(d->Dither) - scalar16_(d->DitherHistory[k], d->FilterCoeff + i);
251 			Sum += d->DitherHistory [k] [(-1-i)&15] = (float)Sum2;
252 			Sum2 = Sum + scalar16_(d->ErrorHistory [k], d->FilterCoeff + i);
253 			val = ROUND64(Sum2) & d->Mask;
254 			d->ErrorHistory [k] [(-1-i)&15] = (float)(Sum - val);
255 		}
256 		return val;
257 	}
258 	else
259 		return ROUND64(Sum);
260 
261 #undef ROUND64
262 }
263 
264 #if 0
265 	float        peak = 0.f,
266 	             new_peak,
267 	             factor_clip
268 	double       scale,
269 	             dB;
270 
271 	...
272 
273 	peak is in the range -32768.0 .. 32767.0
274 
275 	/* calculate factors for ReplayGain and ClippingPrevention */
276 	*track_gain = GetTitleGain() + settings->man_gain;
277 	scale = (float) pow(10., *track_gain * 0.05);
278 	if(settings->clip_prev) {
279 		factor_clip  = (float) (32767./( peak + 1));
280 		if(scale < factor_clip)
281 			factor_clip = 1.f;
282 		else
283 			factor_clip /= scale;
284 		scale *= factor_clip;
285 	}
286 	new_peak = (float) peak * scale;
287 
288 	dB = 20. * log10(scale);
289 	*track_gain = (float) dB;
290 
291  	const double scale = pow(10., (double)gain * 0.05);
292 #endif
293 
294 
FLAC__replaygain_synthesis__apply_gain(FLAC__byte * data_out,FLAC__bool little_endian_data_out,FLAC__bool unsigned_data_out,const FLAC__int32 * const input[],unsigned wide_samples,unsigned channels,const unsigned source_bps,const unsigned target_bps,const double scale,const FLAC__bool hard_limit,FLAC__bool do_dithering,DitherContext * dither_context)295 size_t FLAC__replaygain_synthesis__apply_gain(FLAC__byte *data_out, FLAC__bool little_endian_data_out, FLAC__bool unsigned_data_out, const FLAC__int32 * const input[], unsigned wide_samples, unsigned channels, const unsigned source_bps, const unsigned target_bps, const double scale, const FLAC__bool hard_limit, FLAC__bool do_dithering, DitherContext *dither_context)
296 {
297 	static const FLAC__int64 hard_clip_factors_[33] = {
298 		0, /* 0 bits-per-sample (not supported) */
299 		0, /* 1 bits-per-sample (not supported) */
300 		0, /* 2 bits-per-sample (not supported) */
301 		0, /* 3 bits-per-sample (not supported) */
302 		-8, /* 4 bits-per-sample */
303 		-16, /* 5 bits-per-sample */
304 		-32, /* 6 bits-per-sample */
305 		-64, /* 7 bits-per-sample */
306 		-128, /* 8 bits-per-sample */
307 		-256, /* 9 bits-per-sample */
308 		-512, /* 10 bits-per-sample */
309 		-1024, /* 11 bits-per-sample */
310 		-2048, /* 12 bits-per-sample */
311 		-4096, /* 13 bits-per-sample */
312 		-8192, /* 14 bits-per-sample */
313 		-16384, /* 15 bits-per-sample */
314 		-32768, /* 16 bits-per-sample */
315 		-65536, /* 17 bits-per-sample */
316 		-131072, /* 18 bits-per-sample */
317 		-262144, /* 19 bits-per-sample */
318 		-524288, /* 20 bits-per-sample */
319 		-1048576, /* 21 bits-per-sample */
320 		-2097152, /* 22 bits-per-sample */
321 		-4194304, /* 23 bits-per-sample */
322 		-8388608, /* 24 bits-per-sample */
323 		-16777216, /* 25 bits-per-sample */
324 		-33554432, /* 26 bits-per-sample */
325 		-67108864, /* 27 bits-per-sample */
326 		-134217728, /* 28 bits-per-sample */
327 		-268435456, /* 29 bits-per-sample */
328 		-536870912, /* 30 bits-per-sample */
329 		-1073741824, /* 31 bits-per-sample */
330 		(FLAC__int64)(-1073741824) * 2 /* 32 bits-per-sample */
331 	};
332 	const FLAC__int32 conv_shift = 32 - target_bps;
333 	const FLAC__int64 hard_clip_factor = hard_clip_factors_[target_bps];
334 	/*
335 	 * The integer input coming in has a varying range based on the
336 	 * source_bps.  We want to normalize it to [-1.0, 1.0) so instead
337 	 * of doing two multiplies on each sample, we just multiple
338 	 * 'scale' by 1/(2^(source_bps-1))
339 	 */
340 	const double multi_scale = scale / (double)(1u << (source_bps-1));
341 
342 	FLAC__byte * const start = data_out;
343 	unsigned i, channel;
344 	const FLAC__int32 *input_;
345 	double sample;
346 	const unsigned bytes_per_sample = target_bps / 8;
347 	const unsigned last_history_index = dither_context->LastHistoryIndex;
348 	NoiseShaping noise_shaping = dither_context->ShapingType;
349 	FLAC__int64 val64;
350 	FLAC__int32 val32;
351 	FLAC__int32 uval32;
352 	const FLAC__uint32 twiggle = 1u << (target_bps - 1);
353 
354 	FLAC__ASSERT(channels > 0 && channels <= FLAC_SHARE__MAX_SUPPORTED_CHANNELS);
355 	FLAC__ASSERT(source_bps >= 4);
356 	FLAC__ASSERT(target_bps >= 4);
357 	FLAC__ASSERT(source_bps <= 32);
358 	FLAC__ASSERT(target_bps < 32);
359 	FLAC__ASSERT((target_bps & 7) == 0);
360 
361 	for(channel = 0; channel < channels; channel++) {
362 		const unsigned incr = bytes_per_sample * channels;
363 		data_out = start + bytes_per_sample * channel;
364 		input_ = input[channel];
365 		for(i = 0; i < wide_samples; i++, data_out += incr) {
366 			sample = (double)input_[i] * multi_scale;
367 
368 			if(hard_limit) {
369 				/* hard 6dB limiting */
370 				if(sample < -0.5)
371 					sample = tanh((sample + 0.5) / (1-0.5)) * (1-0.5) - 0.5;
372 				else if(sample > 0.5)
373 					sample = tanh((sample - 0.5) / (1-0.5)) * (1-0.5) + 0.5;
374 			}
375 			sample *= 2147483647.;
376 
377 			val64 = dither_output_(dither_context, do_dithering, noise_shaping, (i + last_history_index) % 32, sample, channel) >> conv_shift;
378 
379 			val32 = (FLAC__int32)val64;
380 			if(val64 >= -hard_clip_factor)
381 				val32 = (FLAC__int32)(-(hard_clip_factor+1));
382 			else if(val64 < hard_clip_factor)
383 				val32 = (FLAC__int32)hard_clip_factor;
384 
385 			uval32 = (FLAC__uint32)val32;
386 			if (unsigned_data_out)
387 				uval32 ^= twiggle;
388 
389 			if (little_endian_data_out) {
390 				switch(target_bps) {
391 					case 24:
392 						data_out[2] = (FLAC__byte)(uval32 >> 16);
393 						/* fall through */
394 					case 16:
395 						data_out[1] = (FLAC__byte)(uval32 >> 8);
396 						/* fall through */
397 					case 8:
398 						data_out[0] = (FLAC__byte)uval32;
399 						break;
400 				}
401 			}
402 			else {
403 				switch(target_bps) {
404 					case 24:
405 						data_out[0] = (FLAC__byte)(uval32 >> 16);
406 						data_out[1] = (FLAC__byte)(uval32 >> 8);
407 						data_out[2] = (FLAC__byte)uval32;
408 						break;
409 					case 16:
410 						data_out[0] = (FLAC__byte)(uval32 >> 8);
411 						data_out[1] = (FLAC__byte)uval32;
412 						break;
413 					case 8:
414 						data_out[0] = (FLAC__byte)uval32;
415 						break;
416 				}
417 			}
418 		}
419 	}
420 	dither_context->LastHistoryIndex = (last_history_index + wide_samples) % 32;
421 
422 	return wide_samples * channels * (target_bps/8);
423 }
424