1 /*
2  * SpanDSP - a series of DSP components for telephony
3  *
4  * gsm0610_lpc.c - GSM 06.10 full rate speech codec.
5  *
6  * Written by Steve Underwood <steveu@coppice.org>
7  *
8  * Copyright (C) 2006 Steve Underwood
9  *
10  * All rights reserved.
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU Lesser General Public License version 2.1,
14  * as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with this program; if not, write to the Free Software
23  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  *
25  * This code is based on the widely used GSM 06.10 code available from
26  * http://kbs.cs.tu-berlin.de/~jutta/toast.html
27  */
28 
29 /*! \file */
30 
31 #if defined(HAVE_CONFIG_H)
32 #include "config.h"
33 #endif
34 
35 #include <assert.h>
36 #include <inttypes.h>
37 #if defined(HAVE_TGMATH_H)
38 #include <tgmath.h>
39 #endif
40 #if defined(HAVE_MATH_H)
41 #include <math.h>
42 #endif
43 #include "floating_fudge.h"
44 #include <stdlib.h>
45 #include <memory.h>
46 #include <string.h>
47 
48 #include "spandsp/telephony.h"
49 #include "spandsp/fast_convert.h"
50 #include "spandsp/bitstream.h"
51 #include "spandsp/bit_operations.h"
52 #include "spandsp/saturated.h"
53 #include "spandsp/vector_int.h"
54 #include "spandsp/gsm0610.h"
55 
56 #include "gsm0610_local.h"
57 
58 /* 4.2.4 .. 4.2.7 LPC ANALYSIS SECTION */
59 
60 /* The number of left shifts needed to normalize the 32 bit
61    variable x for positive values on the interval
62    with minimum of
63    minimum of 1073741824  (01000000000000000000000000000000) and
64    maximum of 2147483647  (01111111111111111111111111111111)
65    and for negative values on the interval with
66    minimum of -2147483648 (-10000000000000000000000000000000) and
67    maximum of -1073741824 ( -1000000000000000000000000000000).
68 
69    In order to normalize the result, the following
70    operation must be done: norm_var1 = x << gsm0610_norm(x);
71 
72    (That's 'ffs', only from the left, not the right..)
73 */
74 
gsm0610_norm(int32_t x)75 int16_t gsm0610_norm(int32_t x)
76 {
77     assert(x != 0);
78 
79     if (x < 0)
80     {
81         if (x <= -1073741824)
82             return 0;
83         /*endif*/
84         x = ~x;
85     }
86     /*endif*/
87     return (int16_t) (30 - top_bit(x));
88 }
89 /*- End of function --------------------------------------------------------*/
90 
91 /*
92    (From p. 46, end of section 4.2.5)
93 
94    NOTE: The following lines gives [sic] one correct implementation
95          of the div(num, denum) arithmetic operation.  Compute div
96          which is the integer division of num by denom: with
97          denom >= num > 0
98 */
gsm_div(int16_t num,int16_t denom)99 static int16_t gsm_div(int16_t num, int16_t denom)
100 {
101     int32_t num32;
102     int32_t denom32;
103     int16_t div;
104     int k;
105 
106     /* The parameter num sometimes becomes zero.
107        Although this is explicitly guarded against in 4.2.5,
108        we assume that the result should then be zero as well. */
109 
110     assert(num >= 0  &&  denom >= num);
111     if (num == 0)
112         return 0;
113     /*endif*/
114     num32 = num;
115     denom32 = denom;
116     div = 0;
117     k = 15;
118     while (k--)
119     {
120         div <<= 1;
121         num32 <<= 1;
122 
123         if (num32 >= denom32)
124         {
125             num32 -= denom32;
126             div++;
127         }
128         /*endif*/
129     }
130     /*endwhile*/
131 
132     return div;
133 }
134 /*- End of function --------------------------------------------------------*/
135 
136 #if defined(__GNUC__)  &&  defined(SPANDSP_USE_MMX)
gsm0610_vec_vsraw(const int16_t * p,int n,int bits)137 static void gsm0610_vec_vsraw(const int16_t *p, int n, int bits)
138 {
139     static const int64_t ones = 0x0001000100010001LL;
140 
141     if (n == 0)
142         return;
143     /*endif*/
144 #if defined(__x86_64__)
145     __asm__ __volatile__(
146         " leaq -16(%%rsi,%%rax,2),%%rdx;\n"         /* edx = top - 16 */
147         " emms;\n"
148         " movd %%ecx,%%mm3;\n"
149         " movq %[ones],%%mm2;\n"
150         " psllw %%mm3,%%mm2;\n"
151         " psrlw $1,%%mm2;\n"
152         " cmpq %%rdx,%%rsi;"
153         " ja 4f;\n"
154 
155          " .p2align 2;\n"
156         /* 8 words per iteration */
157         "6:\n"
158         " movq (%%rsi),%%mm0;\n"
159         " movq 8(%%rsi),%%mm1;\n"
160         " paddsw %%mm2,%%mm0;\n"
161         " psraw %%mm3,%%mm0;\n"
162         " paddsw %%mm2,%%mm1;\n"
163         " psraw %%mm3,%%mm1;\n"
164         " movq %%mm0,(%%rsi);\n"
165         " movq %%mm1,8(%%rsi);\n"
166         " addq $16,%%rsi;\n"
167         " cmpq %%rdx,%%rsi;\n"
168         " jbe 6b;\n"
169 
170         " .p2align 2;\n"
171         "4:\n"
172         " addq $12,%%rdx;\n"                        /* now edx = top-4 */
173         " cmpq %%rdx,%%rsi;\n"
174         " ja 3f;\n"
175 
176         " .p2align 2;\n"
177         /* do up to 6 words, two per iteration */
178         "5:\n"
179         " movd (%%rsi),%%mm0;\n"
180         " paddsw %%mm2,%%mm0;\n"
181         " psraw %%mm3,%%mm0;\n"
182         " movd %%mm0,(%%rsi);\n"
183         " addq $4,%%rsi;\n"
184         " cmpq %%rdx,%%rsi;\n"
185         " jbe 5b;\n"
186 
187         " .p2align 2;\n"
188         "3:\n"
189         " addq $2,%%rdx;\n"                        /* now edx = top-2 */
190         " cmpq %%rdx,%%rsi;\n"
191         " ja 2f;\n"
192 
193         " movzwl (%%rsi),%%eax;\n"
194         " movd %%eax,%%mm0;\n"
195         " paddsw %%mm2,%%mm0;\n"
196         " psraw %%mm3,%%mm0;\n"
197         " movd %%mm0,%%eax;\n"
198         " movw %%ax,(%%rsi);\n"
199 
200         " .p2align 2;\n"
201         "2:\n"
202         " emms;\n"
203         :
204         : "S" (p), "a" (n), "c" (bits), [ones] "m" (ones)
205         : "edx"
206     );
207 #else
208     __asm__ __volatile__(
209         " leal -16(%%esi,%%eax,2),%%edx;\n"         /* edx = top - 16 */
210         " emms;\n"
211         " movd %%ecx,%%mm3;\n"
212         " movq %[ones],%%mm2;\n"
213         " psllw %%mm3,%%mm2;\n"
214         " psrlw $1,%%mm2;\n"
215         " cmpl %%edx,%%esi;"
216         " ja 4f;\n"
217 
218          " .p2align 2;\n"
219         /* 8 words per iteration */
220         "6:\n"
221         " movq (%%esi),%%mm0;\n"
222         " movq 8(%%esi),%%mm1;\n"
223         " paddsw %%mm2,%%mm0;\n"
224         " psraw %%mm3,%%mm0;\n"
225         " paddsw %%mm2,%%mm1;\n"
226         " psraw %%mm3,%%mm1;\n"
227         " movq %%mm0,(%%esi);\n"
228         " movq %%mm1,8(%%esi);\n"
229         " addl $16,%%esi;\n"
230         " cmpl %%edx,%%esi;\n"
231         " jbe 6b;\n"
232 
233         " .p2align 2;\n"
234         "4:\n"
235         " addl $12,%%edx;\n"                        /* now edx = top-4 */
236         " cmpl %%edx,%%esi;\n"
237         " ja 3f;\n"
238 
239         " .p2align 2;\n"
240         /* do up to 6 words, two per iteration */
241         "5:\n"
242         " movd (%%esi),%%mm0;\n"
243         " paddsw %%mm2,%%mm0;\n"
244         " psraw %%mm3,%%mm0;\n"
245         " movd %%mm0,(%%esi);\n"
246         " addl $4,%%esi;\n"
247         " cmpl %%edx,%%esi;\n"
248         " jbe 5b;\n"
249 
250         " .p2align 2;\n"
251         "3:\n"
252         " addl $2,%%edx;\n"                        /* now edx = top-2 */
253         " cmpl %%edx,%%esi;\n"
254         " ja 2f;\n"
255 
256         " movzwl (%%esi),%%eax;\n"
257         " movd %%eax,%%mm0;\n"
258         " paddsw %%mm2,%%mm0;\n"
259         " psraw %%mm3,%%mm0;\n"
260         " movd %%mm0,%%eax;\n"
261         " movw %%ax,(%%esi);\n"
262 
263         " .p2align 2;\n"
264         "2:\n"
265         " emms;\n"
266         :
267         : "S" (p), "a" (n), "c" (bits), [ones] "m" (ones)
268         : "edx"
269     );
270 #endif
271 }
272 /*- End of function --------------------------------------------------------*/
273 #endif
274 
275 /* 4.2.4 */
autocorrelation(int16_t amp[GSM0610_FRAME_LEN],int32_t L_ACF[9])276 static void autocorrelation(int16_t amp[GSM0610_FRAME_LEN], int32_t L_ACF[9])
277 {
278     int k;
279     int16_t smax;
280     int16_t scalauto;
281 #if !(defined(__GNUC__)  &&  defined(SPANDSP_USE_MMX))
282     int i;
283     int temp;
284     int16_t *sp;
285     int16_t sl;
286 #endif
287 
288     /* The goal is to compute the array L_ACF[k].  The signal s[i] must
289        be scaled in order to avoid an overflow situation. */
290 
291     /* Dynamic scaling of the array  s[0..159] */
292     /* Search for the maximum. */
293 #if defined(__GNUC__)  &&  defined(SPANDSP_USE_MMX)
294     smax = saturate16(vec_min_maxi16(amp, GSM0610_FRAME_LEN, NULL));
295 #else
296     for (smax = 0, k = 0;  k < GSM0610_FRAME_LEN;  k++)
297     {
298         temp = sat_abs16(amp[k]);
299         if (temp > smax)
300             smax = (int16_t) temp;
301         /*endif*/
302     }
303     /*endfor*/
304 #endif
305 
306     /* Computation of the scaling factor. */
307     if (smax == 0)
308     {
309         scalauto = 0;
310     }
311     else
312     {
313         assert(smax > 0);
314         scalauto = (int16_t) (4 - gsm0610_norm((int32_t) smax << 16));
315     }
316     /*endif*/
317 
318     /* Scaling of the array s[0...159] */
319 #if defined(__GNUC__)  &&  defined(SPANDSP_USE_MMX)
320     if (scalauto > 0)
321         gsm0610_vec_vsraw(amp, GSM0610_FRAME_LEN, scalauto);
322     /*endif*/
323 #else
324     if (scalauto > 0)
325     {
326         for (k = 0;  k < GSM0610_FRAME_LEN;  k++)
327             amp[k] = gsm_mult_r(amp[k], 16384 >> (scalauto - 1));
328         /*endfor*/
329     }
330     /*endif*/
331 #endif
332 
333     /* Compute the L_ACF[..]. */
334 #if defined(__GNUC__)  &&  defined(SPANDSP_USE_MMX)
335     for (k = 0;  k < 9;  k++)
336         L_ACF[k] = vec_dot_prodi16(amp, amp + k, GSM0610_FRAME_LEN - k) << 1;
337     /*endfor*/
338 #else
339     sp = amp;
340     sl = *sp;
341     L_ACF[0] = ((int32_t) sl*(int32_t) sp[0]);
342     sl = *++sp;
343     L_ACF[0] += ((int32_t) sl*(int32_t) sp[0]);
344     L_ACF[1] = ((int32_t) sl*(int32_t) sp[-1]);
345     sl = *++sp;
346     L_ACF[0] += ((int32_t) sl*(int32_t) sp[0]);
347     L_ACF[1] += ((int32_t) sl*(int32_t) sp[-1]);
348     L_ACF[2] = ((int32_t) sl*(int32_t) sp[-2]);
349     sl = *++sp;
350     L_ACF[0] += ((int32_t) sl*(int32_t) sp[0]);
351     L_ACF[1] += ((int32_t) sl*(int32_t) sp[-1]);
352     L_ACF[2] += ((int32_t) sl*(int32_t) sp[-2]);
353     L_ACF[3] = ((int32_t) sl*(int32_t) sp[-3]);
354     sl = *++sp;
355     L_ACF[0] += ((int32_t) sl*(int32_t) sp[0]);
356     L_ACF[1] += ((int32_t) sl*(int32_t) sp[-1]);
357     L_ACF[2] += ((int32_t) sl*(int32_t) sp[-2]);
358     L_ACF[3] += ((int32_t) sl*(int32_t) sp[-3]);
359     L_ACF[4] = ((int32_t) sl*(int32_t) sp[-4]);
360     sl = *++sp;
361     L_ACF[0] += ((int32_t) sl*(int32_t) sp[0]);
362     L_ACF[1] += ((int32_t) sl*(int32_t) sp[-1]);
363     L_ACF[2] += ((int32_t) sl*(int32_t) sp[-2]);
364     L_ACF[3] += ((int32_t) sl*(int32_t) sp[-3]);
365     L_ACF[4] += ((int32_t) sl*(int32_t) sp[-4]);
366     L_ACF[5] = ((int32_t) sl*(int32_t) sp[-5]);
367     sl = *++sp;
368     L_ACF[0] += ((int32_t) sl*(int32_t) sp[0]);
369     L_ACF[1] += ((int32_t) sl*(int32_t) sp[-1]);
370     L_ACF[2] += ((int32_t) sl*(int32_t) sp[-2]);
371     L_ACF[3] += ((int32_t) sl*(int32_t) sp[-3]);
372     L_ACF[4] += ((int32_t) sl*(int32_t) sp[-4]);
373     L_ACF[5] += ((int32_t) sl*(int32_t) sp[-5]);
374     L_ACF[6] = ((int32_t) sl*(int32_t) sp[-6]);
375     sl = *++sp;
376     L_ACF[0] += ((int32_t) sl*(int32_t) sp[0]);
377     L_ACF[1] += ((int32_t) sl*(int32_t) sp[-1]);
378     L_ACF[2] += ((int32_t) sl*(int32_t) sp[-2]);
379     L_ACF[3] += ((int32_t) sl*(int32_t) sp[-3]);
380     L_ACF[4] += ((int32_t) sl*(int32_t) sp[-4]);
381     L_ACF[5] += ((int32_t) sl*(int32_t) sp[-5]);
382     L_ACF[6] += ((int32_t) sl*(int32_t) sp[-6]);
383     L_ACF[7] = ((int32_t) sl*(int32_t) sp[-7]);
384     sl = *++sp;
385     L_ACF[0] += ((int32_t) sl*(int32_t) sp[0]);
386     L_ACF[1] += ((int32_t) sl*(int32_t) sp[-1]);
387     L_ACF[2] += ((int32_t) sl*(int32_t) sp[-2]);
388     L_ACF[3] += ((int32_t) sl*(int32_t) sp[-3]);
389     L_ACF[4] += ((int32_t) sl*(int32_t) sp[-4]);
390     L_ACF[5] += ((int32_t) sl*(int32_t) sp[-5]);
391     L_ACF[6] += ((int32_t) sl*(int32_t) sp[-6]);
392     L_ACF[7] += ((int32_t) sl*(int32_t) sp[-7]);
393     L_ACF[8] = ((int32_t) sl*(int32_t) sp[-8]);
394     for (i = 9;  i < GSM0610_FRAME_LEN;  i++)
395     {
396         sl = *++sp;
397         L_ACF[0] += ((int32_t) sl*(int32_t) sp[0]);
398         L_ACF[1] += ((int32_t) sl*(int32_t) sp[-1]);
399         L_ACF[2] += ((int32_t) sl*(int32_t) sp[-2]);
400         L_ACF[3] += ((int32_t) sl*(int32_t) sp[-3]);
401         L_ACF[4] += ((int32_t) sl*(int32_t) sp[-4]);
402         L_ACF[5] += ((int32_t) sl*(int32_t) sp[-5]);
403         L_ACF[6] += ((int32_t) sl*(int32_t) sp[-6]);
404         L_ACF[7] += ((int32_t) sl*(int32_t) sp[-7]);
405         L_ACF[8] += ((int32_t) sl*(int32_t) sp[-8]);
406     }
407     /*endfor*/
408     for (k = 0;  k < 9;  k++)
409         L_ACF[k] <<= 1;
410     /*endfor*/
411 #endif
412     /* Rescaling of the array s[0..159] */
413     if (scalauto > 0)
414     {
415         assert(scalauto <= 4);
416         for (k = 0;  k < GSM0610_FRAME_LEN;  k++)
417             amp[k] <<= scalauto;
418         /*endfor*/
419     }
420     /*endif*/
421 }
422 /*- End of function --------------------------------------------------------*/
423 
424 /* 4.2.5 */
reflection_coefficients(int32_t L_ACF[9],int16_t r[8])425 static void reflection_coefficients(int32_t L_ACF[9], int16_t r[8])
426 {
427     int i;
428     int m;
429     int n;
430     int16_t temp;
431     int16_t ACF[9];
432     int16_t P[9];
433     int16_t K[9];
434 
435     /* Schur recursion with 16 bits arithmetic. */
436     if (L_ACF[0] == 0)
437     {
438         for (i = 8;  i--;  *r++ = 0)
439             ;
440         /*endfor*/
441         return;
442     }
443     /*endif*/
444 
445     assert(L_ACF[0] != 0);
446     temp = gsm0610_norm(L_ACF[0]);
447 
448     assert(temp >= 0  &&  temp < 32);
449 
450     /* ? overflow ? */
451     for (i = 0;  i <= 8;  i++)
452         ACF[i] = (int16_t) ((L_ACF[i] << temp) >> 16);
453     /*endfor*/
454 
455     /* Initialize array P[..] and K[..] for the recursion. */
456     for (i = 1;  i <= 7;  i++)
457         K[i] = ACF[i];
458     /*endfor*/
459     for (i = 0;  i <= 8;  i++)
460         P[i] = ACF[i];
461     /*endfor*/
462     /* Compute reflection coefficients */
463     for (n = 1;  n <= 8;  n++, r++)
464     {
465         temp = P[1];
466         temp = sat_abs16(temp);
467         if (P[0] < temp)
468         {
469             for (i = n;  i <= 8;  i++)
470                 *r++ = 0;
471             /*endfor*/
472             return;
473         }
474         /*endif*/
475 
476         *r = gsm_div(temp, P[0]);
477 
478         assert(*r >= 0);
479         if (P[1] > 0)
480             *r = -*r;     /* r[n] = sub(0, r[n]) */
481         /*endif*/
482         assert(*r != INT16_MIN);
483         if (n == 8)
484             return;
485         /*endif*/
486 
487         /* Schur recursion */
488         temp = gsm_mult_r(P[1], *r);
489         P[0] = sat_add16(P[0], temp);
490 
491         for (m = 1;  m <= 8 - n;  m++)
492         {
493             temp = gsm_mult_r(K[m], *r);
494             P[m] = sat_add16(P[m + 1], temp);
495 
496             temp = gsm_mult_r(P[m + 1], *r);
497             K[m] = sat_add16(K[m], temp);
498         }
499         /*endfor*/
500     }
501     /*endfor*/
502 }
503 /*- End of function --------------------------------------------------------*/
504 
505 /* 4.2.6 */
transform_to_log_area_ratios(int16_t r[8])506 static void transform_to_log_area_ratios(int16_t r[8])
507 {
508     int16_t temp;
509     int i;
510 
511     /* The following scaling for r[..] and LAR[..] has been used:
512 
513        r[..]   = integer (real_r[..]*32768.); -1 <= real_r < 1.
514        LAR[..] = integer (real_LAR[..] * 16384);
515        with -1.625 <= real_LAR <= 1.625
516     */
517 
518     /* Computation of the LAR[0..7] from the r[0..7] */
519     for (i = 1;  i <= 8;  i++, r++)
520     {
521         temp = sat_abs16(*r);
522         assert(temp >= 0);
523 
524         if (temp < 22118)
525         {
526             temp >>= 1;
527         }
528         else if (temp < 31130)
529         {
530             assert(temp >= 11059);
531             temp -= 11059;
532         }
533         else
534         {
535             assert(temp >= 26112);
536             temp -= 26112;
537             temp <<= 2;
538         }
539         /*endif*/
540 
541         *r = (*r < 0)  ?  -temp  :  temp;
542         assert(*r != INT16_MIN);
543     }
544     /*endfor*/
545 }
546 /*- End of function --------------------------------------------------------*/
547 
548 /* 4.2.7 */
quantization_and_coding(int16_t LAR[8])549 static void quantization_and_coding(int16_t LAR[8])
550 {
551     int16_t temp;
552 
553     /* This procedure needs four tables; the following equations
554        give the optimum scaling for the constants:
555 
556        A[0..7] = integer(real_A[0..7] * 1024)
557        B[0..7] = integer(real_B[0..7] *  512)
558        MAC[0..7] = maximum of the LARc[0..7]
559        MIC[0..7] = minimum of the LARc[0..7] */
560 
561 #undef STEP
562 #define STEP(A,B,MAC,MIC)                                       \
563         temp = sat_mul16(A, *LAR);                              \
564         temp = sat_add16(temp, (B + 256));                      \
565         temp >>= 9;                                             \
566         *LAR  = (int16_t) ((temp > MAC)                         \
567                          ?                                      \
568                          MAC - MIC                              \
569                          :                                      \
570                          ((temp < MIC)  ?  0  :  temp - MIC));  \
571         LAR++;
572 
573     STEP(20480,     0,  31, -32);
574     STEP(20480,     0,  31, -32);
575     STEP(20480,  2048,  15, -16);
576     STEP(20480, -2560,  15, -16);
577 
578     STEP(13964,    94,   7,  -8);
579     STEP(15360, -1792,   7,  -8);
580     STEP( 8534,  -341,   3,  -4);
581     STEP( 9036, -1144,   3,  -4);
582 #undef STEP
583 }
584 /*- End of function --------------------------------------------------------*/
585 
gsm0610_lpc_analysis(gsm0610_state_t * s,int16_t amp[GSM0610_FRAME_LEN],int16_t LARc[8])586 void gsm0610_lpc_analysis(gsm0610_state_t *s,
587                           int16_t amp[GSM0610_FRAME_LEN],
588                           int16_t LARc[8])
589 {
590     int32_t L_ACF[9];
591 
592     autocorrelation(amp, L_ACF);
593     reflection_coefficients(L_ACF, LARc);
594     transform_to_log_area_ratios(LARc);
595     quantization_and_coding(LARc);
596 }
597 /*- End of function --------------------------------------------------------*/
598 /*- End of file ------------------------------------------------------------*/
599