1 /*
2  * Nellymoser encoder
3  * This code is developed as part of Google Summer of Code 2008 Program.
4  *
5  * Copyright (c) 2008 Bartlomiej Wolowiec
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 /**
25  * @file
26  * Nellymoser encoder
27  * by Bartlomiej Wolowiec
28  *
29  * Generic codec information: libavcodec/nellymoserdec.c
30  *
31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32  *                             (Copyright Joseph Artsimovich and UAB "DKD")
33  *
34  * for more information about nellymoser format, visit:
35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
36  */
37 
38 #include "libavutil/libm.h"
39 
40 #include "libavutil/common.h"
41 #include "libavutil/float_dsp.h"
42 #include "libavutil/mathematics.h"
43 
44 #include "audio_frame_queue.h"
45 #include "avcodec.h"
46 #include "fft.h"
47 #include "internal.h"
48 #include "nellymoser.h"
49 #include "sinewin.h"
50 
51 #define BITSTREAM_WRITER_LE
52 #include "put_bits.h"
53 
54 #define POW_TABLE_SIZE (1<<11)
55 #define POW_TABLE_OFFSET 3
56 #define OPT_SIZE ((1<<15) + 3000)
57 
58 typedef struct NellyMoserEncodeContext {
59     AVCodecContext  *avctx;
60     int             last_frame;
61     AVFloatDSPContext fdsp;
62     FFTContext      mdct_ctx;
63     AudioFrameQueue afq;
64     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
65     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
66     DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];     ///< sample buffer
67     float           (*opt )[OPT_SIZE];
68     uint8_t         (*path)[OPT_SIZE];
69 } NellyMoserEncodeContext;
70 
71 static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
72 
73 static const uint8_t sf_lut[96] = {
74      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
75      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
76     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
77     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
78     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
79     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
80 };
81 
82 static const uint8_t sf_delta_lut[78] = {
83      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
84      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
85     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
86     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
87     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
88 };
89 
90 static const uint8_t quant_lut[230] = {
91      0,
92 
93      0,  1,  2,
94 
95      0,  1,  2,  3,  4,  5,  6,
96 
97      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
98     12, 13, 13, 13, 14,
99 
100      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
101      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
102     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
103     30,
104 
105      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
106      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
107     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
108     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
109     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
110     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
111     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
112     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
113     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
114     61, 61, 61, 61, 62,
115 };
116 
117 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
118 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
119 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
120 
apply_mdct(NellyMoserEncodeContext * s)121 static void apply_mdct(NellyMoserEncodeContext *s)
122 {
123     float *in0 = s->buf;
124     float *in1 = s->buf + NELLY_BUF_LEN;
125     float *in2 = s->buf + 2 * NELLY_BUF_LEN;
126 
127     s->fdsp.vector_fmul        (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
128     s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
129     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
130 
131     s->fdsp.vector_fmul        (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
132     s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
133     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
134 }
135 
encode_end(AVCodecContext * avctx)136 static av_cold int encode_end(AVCodecContext *avctx)
137 {
138     NellyMoserEncodeContext *s = avctx->priv_data;
139 
140     ff_mdct_end(&s->mdct_ctx);
141 
142     if (s->avctx->trellis) {
143         av_free(s->opt);
144         av_free(s->path);
145     }
146     ff_af_queue_close(&s->afq);
147 
148     return 0;
149 }
150 
encode_init(AVCodecContext * avctx)151 static av_cold int encode_init(AVCodecContext *avctx)
152 {
153     NellyMoserEncodeContext *s = avctx->priv_data;
154     int i, ret;
155 
156     if (avctx->channels != 1) {
157         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
158         return AVERROR(EINVAL);
159     }
160 
161     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
162         avctx->sample_rate != 11025 &&
163         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
164         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
165         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
166         return AVERROR(EINVAL);
167     }
168 
169     avctx->frame_size = NELLY_SAMPLES;
170     avctx->delay      = NELLY_BUF_LEN;
171     ff_af_queue_init(avctx, &s->afq);
172     s->avctx = avctx;
173     if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
174         goto error;
175     avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
176 
177     /* Generate overlap window */
178     ff_init_ff_sine_windows(7);
179     for (i = 0; i < POW_TABLE_SIZE; i++)
180         pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
181 
182     if (s->avctx->trellis) {
183         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
184         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
185         if (!s->opt || !s->path) {
186             ret = AVERROR(ENOMEM);
187             goto error;
188         }
189     }
190 
191     return 0;
192 error:
193     encode_end(avctx);
194     return ret;
195 }
196 
197 #define find_best(val, table, LUT, LUT_add, LUT_size) \
198     best_idx = \
199         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
200     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
201         best_idx++;
202 
get_exponent_greedy(NellyMoserEncodeContext * s,float * cand,int * idx_table)203 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
204 {
205     int band, best_idx, power_idx = 0;
206     float power_candidate;
207 
208     //base exponent
209     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
210     idx_table[0] = best_idx;
211     power_idx = ff_nelly_init_table[best_idx];
212 
213     for (band = 1; band < NELLY_BANDS; band++) {
214         power_candidate = cand[band] - power_idx;
215         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
216         idx_table[band] = best_idx;
217         power_idx += ff_nelly_delta_table[best_idx];
218     }
219 }
220 
distance(float x,float y,int band)221 static inline float distance(float x, float y, int band)
222 {
223     //return pow(fabs(x-y), 2.0);
224     float tmp = x - y;
225     return tmp * tmp;
226 }
227 
get_exponent_dynamic(NellyMoserEncodeContext * s,float * cand,int * idx_table)228 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
229 {
230     int i, j, band, best_idx;
231     float power_candidate, best_val;
232 
233     float  (*opt )[OPT_SIZE] = s->opt ;
234     uint8_t(*path)[OPT_SIZE] = s->path;
235 
236     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
237         opt[0][i] = INFINITY;
238     }
239 
240     for (i = 0; i < 64; i++) {
241         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
242         path[0][ff_nelly_init_table[i]] = i;
243     }
244 
245     for (band = 1; band < NELLY_BANDS; band++) {
246         int q, c = 0;
247         float tmp;
248         int idx_min, idx_max, idx;
249         power_candidate = cand[band];
250         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
251             idx_min = FFMAX(0, cand[band] - q);
252             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
253             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
254                 if ( ISINF_FUNC(opt[band - 1][i]) )
255                     continue;
256                 for (j = 0; j < 32; j++) {
257                     idx = i + ff_nelly_delta_table[j];
258                     if (idx > idx_max)
259                         break;
260                     if (idx >= idx_min) {
261                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
262                         if (opt[band][idx] > tmp) {
263                             opt[band][idx] = tmp;
264                             path[band][idx] = j;
265                             c = 1;
266                         }
267                     }
268                 }
269             }
270         }
271         assert(c); //FIXME
272     }
273 
274     best_val = INFINITY;
275     best_idx = -1;
276     band = NELLY_BANDS - 1;
277     for (i = 0; i < OPT_SIZE; i++) {
278         if (best_val > opt[band][i]) {
279             best_val = opt[band][i];
280             best_idx = i;
281         }
282     }
283     for (band = NELLY_BANDS - 1; band >= 0; band--) {
284         idx_table[band] = path[band][best_idx];
285         if (band) {
286             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
287         }
288     }
289 }
290 
291 /**
292  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
293  *  @param s               encoder context
294  *  @param output          output buffer
295  *  @param output_size     size of output buffer
296  */
encode_block(NellyMoserEncodeContext * s,unsigned char * output,int output_size)297 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
298 {
299     PutBitContext pb;
300     int i, j, band, block, best_idx, power_idx = 0;
301     float power_val, coeff, coeff_sum;
302     float pows[NELLY_FILL_LEN];
303     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
304     float cand[NELLY_BANDS];
305 
306     apply_mdct(s);
307 
308     init_put_bits(&pb, output, output_size * 8);
309 
310     i = 0;
311     for (band = 0; band < NELLY_BANDS; band++) {
312         coeff_sum = 0;
313         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
314             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
315                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
316         }
317         cand[band] =
318             log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
319     }
320 
321     if (s->avctx->trellis) {
322         get_exponent_dynamic(s, cand, idx_table);
323     } else {
324         get_exponent_greedy(s, cand, idx_table);
325     }
326 
327     i = 0;
328     for (band = 0; band < NELLY_BANDS; band++) {
329         if (band) {
330             power_idx += ff_nelly_delta_table[idx_table[band]];
331             put_bits(&pb, 5, idx_table[band]);
332         } else {
333             power_idx = ff_nelly_init_table[idx_table[0]];
334             put_bits(&pb, 6, idx_table[0]);
335         }
336         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
337         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
338             s->mdct_out[i] *= power_val;
339             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
340             pows[i] = power_idx;
341         }
342     }
343 
344     ff_nelly_get_sample_bits(pows, bits);
345 
346     for (block = 0; block < 2; block++) {
347         for (i = 0; i < NELLY_FILL_LEN; i++) {
348             if (bits[i] > 0) {
349                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
350                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
351                 best_idx =
352                     quant_lut[av_clip (
353                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
354                             quant_lut_offset[bits[i]],
355                             quant_lut_offset[bits[i]+1] - 1
356                             )];
357                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
358                     best_idx++;
359 
360                 put_bits(&pb, bits[i], best_idx);
361             }
362         }
363         if (!block)
364             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
365     }
366 
367     flush_put_bits(&pb);
368     memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
369 }
370 
encode_frame(AVCodecContext * avctx,AVPacket * avpkt,const AVFrame * frame,int * got_packet_ptr)371 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
372                         const AVFrame *frame, int *got_packet_ptr)
373 {
374     NellyMoserEncodeContext *s = avctx->priv_data;
375     int ret;
376 
377     if (s->last_frame)
378         return 0;
379 
380     memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
381     if (frame) {
382         memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
383                frame->nb_samples * sizeof(*s->buf));
384         if (frame->nb_samples < NELLY_SAMPLES) {
385             memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
386                    (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
387             if (frame->nb_samples >= NELLY_BUF_LEN)
388                 s->last_frame = 1;
389         }
390         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
391             return ret;
392     } else {
393         memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
394         s->last_frame = 1;
395     }
396 
397     if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)) < 0)
398         return ret;
399     encode_block(s, avpkt->data, avpkt->size);
400 
401     /* Get the next frame pts/duration */
402     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
403                        &avpkt->duration);
404 
405     *got_packet_ptr = 1;
406     return 0;
407 }
408 
409 AVCodec ff_nellymoser_encoder = {
410 	.name           = "nellymoser",
411     .long_name      = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
412     .type           = AVMEDIA_TYPE_AUDIO,
413     .id             = AV_CODEC_ID_NELLYMOSER,
414     .priv_data_size = sizeof(NellyMoserEncodeContext),
415     .init           = encode_init,
416     .encode2        = encode_frame,
417     .close          = encode_end,
418     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
419     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
420                                                      AV_SAMPLE_FMT_NONE },
421 };
422