1 /*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
4 *
5 * Copyright (c) 2008 Bartlomiej Wolowiec
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 /**
25 * @file
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
28 *
29 * Generic codec information: libavcodec/nellymoserdec.c
30 *
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 * (Copyright Joseph Artsimovich and UAB "DKD")
33 *
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36 */
37
38 #include "libavutil/libm.h"
39
40 #include "libavutil/common.h"
41 #include "libavutil/float_dsp.h"
42 #include "libavutil/mathematics.h"
43
44 #include "audio_frame_queue.h"
45 #include "avcodec.h"
46 #include "fft.h"
47 #include "internal.h"
48 #include "nellymoser.h"
49 #include "sinewin.h"
50
51 #define BITSTREAM_WRITER_LE
52 #include "put_bits.h"
53
54 #define POW_TABLE_SIZE (1<<11)
55 #define POW_TABLE_OFFSET 3
56 #define OPT_SIZE ((1<<15) + 3000)
57
58 typedef struct NellyMoserEncodeContext {
59 AVCodecContext *avctx;
60 int last_frame;
61 AVFloatDSPContext fdsp;
62 FFTContext mdct_ctx;
63 AudioFrameQueue afq;
64 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
65 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
66 DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
67 float (*opt )[OPT_SIZE];
68 uint8_t (*path)[OPT_SIZE];
69 } NellyMoserEncodeContext;
70
71 static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0);
72
73 static const uint8_t sf_lut[96] = {
74 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
75 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
76 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
77 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
78 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
79 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
80 };
81
82 static const uint8_t sf_delta_lut[78] = {
83 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
84 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
85 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
86 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
87 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
88 };
89
90 static const uint8_t quant_lut[230] = {
91 0,
92
93 0, 1, 2,
94
95 0, 1, 2, 3, 4, 5, 6,
96
97 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
98 12, 13, 13, 13, 14,
99
100 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
101 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
102 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
103 30,
104
105 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
106 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
107 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
108 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
109 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
110 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
111 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
112 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
113 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
114 61, 61, 61, 61, 62,
115 };
116
117 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
118 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
119 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
120
apply_mdct(NellyMoserEncodeContext * s)121 static void apply_mdct(NellyMoserEncodeContext *s)
122 {
123 float *in0 = s->buf;
124 float *in1 = s->buf + NELLY_BUF_LEN;
125 float *in2 = s->buf + 2 * NELLY_BUF_LEN;
126
127 s->fdsp.vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
128 s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
129 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
130
131 s->fdsp.vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
132 s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
133 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
134 }
135
encode_end(AVCodecContext * avctx)136 static av_cold int encode_end(AVCodecContext *avctx)
137 {
138 NellyMoserEncodeContext *s = avctx->priv_data;
139
140 ff_mdct_end(&s->mdct_ctx);
141
142 if (s->avctx->trellis) {
143 av_free(s->opt);
144 av_free(s->path);
145 }
146 ff_af_queue_close(&s->afq);
147
148 return 0;
149 }
150
encode_init(AVCodecContext * avctx)151 static av_cold int encode_init(AVCodecContext *avctx)
152 {
153 NellyMoserEncodeContext *s = avctx->priv_data;
154 int i, ret;
155
156 if (avctx->channels != 1) {
157 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
158 return AVERROR(EINVAL);
159 }
160
161 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
162 avctx->sample_rate != 11025 &&
163 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
164 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
165 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
166 return AVERROR(EINVAL);
167 }
168
169 avctx->frame_size = NELLY_SAMPLES;
170 avctx->delay = NELLY_BUF_LEN;
171 ff_af_queue_init(avctx, &s->afq);
172 s->avctx = avctx;
173 if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
174 goto error;
175 avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
176
177 /* Generate overlap window */
178 ff_init_ff_sine_windows(7);
179 for (i = 0; i < POW_TABLE_SIZE; i++)
180 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
181
182 if (s->avctx->trellis) {
183 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
184 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
185 if (!s->opt || !s->path) {
186 ret = AVERROR(ENOMEM);
187 goto error;
188 }
189 }
190
191 return 0;
192 error:
193 encode_end(avctx);
194 return ret;
195 }
196
197 #define find_best(val, table, LUT, LUT_add, LUT_size) \
198 best_idx = \
199 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
200 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
201 best_idx++;
202
get_exponent_greedy(NellyMoserEncodeContext * s,float * cand,int * idx_table)203 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
204 {
205 int band, best_idx, power_idx = 0;
206 float power_candidate;
207
208 //base exponent
209 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
210 idx_table[0] = best_idx;
211 power_idx = ff_nelly_init_table[best_idx];
212
213 for (band = 1; band < NELLY_BANDS; band++) {
214 power_candidate = cand[band] - power_idx;
215 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
216 idx_table[band] = best_idx;
217 power_idx += ff_nelly_delta_table[best_idx];
218 }
219 }
220
distance(float x,float y,int band)221 static inline float distance(float x, float y, int band)
222 {
223 //return pow(fabs(x-y), 2.0);
224 float tmp = x - y;
225 return tmp * tmp;
226 }
227
get_exponent_dynamic(NellyMoserEncodeContext * s,float * cand,int * idx_table)228 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
229 {
230 int i, j, band, best_idx;
231 float power_candidate, best_val;
232
233 float (*opt )[OPT_SIZE] = s->opt ;
234 uint8_t(*path)[OPT_SIZE] = s->path;
235
236 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
237 opt[0][i] = INFINITY;
238 }
239
240 for (i = 0; i < 64; i++) {
241 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
242 path[0][ff_nelly_init_table[i]] = i;
243 }
244
245 for (band = 1; band < NELLY_BANDS; band++) {
246 int q, c = 0;
247 float tmp;
248 int idx_min, idx_max, idx;
249 power_candidate = cand[band];
250 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
251 idx_min = FFMAX(0, cand[band] - q);
252 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
253 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
254 if ( ISINF_FUNC(opt[band - 1][i]) )
255 continue;
256 for (j = 0; j < 32; j++) {
257 idx = i + ff_nelly_delta_table[j];
258 if (idx > idx_max)
259 break;
260 if (idx >= idx_min) {
261 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
262 if (opt[band][idx] > tmp) {
263 opt[band][idx] = tmp;
264 path[band][idx] = j;
265 c = 1;
266 }
267 }
268 }
269 }
270 }
271 assert(c); //FIXME
272 }
273
274 best_val = INFINITY;
275 best_idx = -1;
276 band = NELLY_BANDS - 1;
277 for (i = 0; i < OPT_SIZE; i++) {
278 if (best_val > opt[band][i]) {
279 best_val = opt[band][i];
280 best_idx = i;
281 }
282 }
283 for (band = NELLY_BANDS - 1; band >= 0; band--) {
284 idx_table[band] = path[band][best_idx];
285 if (band) {
286 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
287 }
288 }
289 }
290
291 /**
292 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
293 * @param s encoder context
294 * @param output output buffer
295 * @param output_size size of output buffer
296 */
encode_block(NellyMoserEncodeContext * s,unsigned char * output,int output_size)297 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
298 {
299 PutBitContext pb;
300 int i, j, band, block, best_idx, power_idx = 0;
301 float power_val, coeff, coeff_sum;
302 float pows[NELLY_FILL_LEN];
303 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
304 float cand[NELLY_BANDS];
305
306 apply_mdct(s);
307
308 init_put_bits(&pb, output, output_size * 8);
309
310 i = 0;
311 for (band = 0; band < NELLY_BANDS; band++) {
312 coeff_sum = 0;
313 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
314 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
315 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
316 }
317 cand[band] =
318 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
319 }
320
321 if (s->avctx->trellis) {
322 get_exponent_dynamic(s, cand, idx_table);
323 } else {
324 get_exponent_greedy(s, cand, idx_table);
325 }
326
327 i = 0;
328 for (band = 0; band < NELLY_BANDS; band++) {
329 if (band) {
330 power_idx += ff_nelly_delta_table[idx_table[band]];
331 put_bits(&pb, 5, idx_table[band]);
332 } else {
333 power_idx = ff_nelly_init_table[idx_table[0]];
334 put_bits(&pb, 6, idx_table[0]);
335 }
336 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
337 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
338 s->mdct_out[i] *= power_val;
339 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
340 pows[i] = power_idx;
341 }
342 }
343
344 ff_nelly_get_sample_bits(pows, bits);
345
346 for (block = 0; block < 2; block++) {
347 for (i = 0; i < NELLY_FILL_LEN; i++) {
348 if (bits[i] > 0) {
349 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
350 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
351 best_idx =
352 quant_lut[av_clip (
353 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
354 quant_lut_offset[bits[i]],
355 quant_lut_offset[bits[i]+1] - 1
356 )];
357 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
358 best_idx++;
359
360 put_bits(&pb, bits[i], best_idx);
361 }
362 }
363 if (!block)
364 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
365 }
366
367 flush_put_bits(&pb);
368 memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
369 }
370
encode_frame(AVCodecContext * avctx,AVPacket * avpkt,const AVFrame * frame,int * got_packet_ptr)371 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
372 const AVFrame *frame, int *got_packet_ptr)
373 {
374 NellyMoserEncodeContext *s = avctx->priv_data;
375 int ret;
376
377 if (s->last_frame)
378 return 0;
379
380 memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
381 if (frame) {
382 memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
383 frame->nb_samples * sizeof(*s->buf));
384 if (frame->nb_samples < NELLY_SAMPLES) {
385 memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
386 (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
387 if (frame->nb_samples >= NELLY_BUF_LEN)
388 s->last_frame = 1;
389 }
390 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
391 return ret;
392 } else {
393 memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
394 s->last_frame = 1;
395 }
396
397 if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)) < 0)
398 return ret;
399 encode_block(s, avpkt->data, avpkt->size);
400
401 /* Get the next frame pts/duration */
402 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
403 &avpkt->duration);
404
405 *got_packet_ptr = 1;
406 return 0;
407 }
408
409 AVCodec ff_nellymoser_encoder = {
410 .name = "nellymoser",
411 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
412 .type = AVMEDIA_TYPE_AUDIO,
413 .id = AV_CODEC_ID_NELLYMOSER,
414 .priv_data_size = sizeof(NellyMoserEncodeContext),
415 .init = encode_init,
416 .encode2 = encode_frame,
417 .close = encode_end,
418 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
419 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
420 AV_SAMPLE_FMT_NONE },
421 };
422