1 /* Copyright (c) 2014, Cisco Systems, INC
2    Written by XiangMingZhu WeiZhou MinPeng YanWang
3 
4    Redistribution and use in source and binary forms, with or without
5    modification, are permitted provided that the following conditions
6    are met:
7 
8    - Redistributions of source code must retain the above copyright
9    notice, this list of conditions and the following disclaimer.
10 
11    - Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 
15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 
28 #ifndef MAIN_SSE_H
29 #define MAIN_SSE_H
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 # if defined(OPUS_X86_MAY_HAVE_SSE4_1)
36 
37 #if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */
38 #  define OVERRIDE_silk_VQ_WMat_EC
39 
40 void silk_VQ_WMat_EC_sse4_1(
41     opus_int8                   *ind,                           /* O    index of best codebook vector               */
42     opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
43     opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
44     const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
45     const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
46     const opus_int8             *cb_Q7,                         /* I    codebook                                    */
47     const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
48     const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
49     const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
50     const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
51     opus_int                    L                               /* I    number of vectors in codebook               */
52 );
53 
54 #if defined OPUS_X86_PRESUME_SSE4_1
55 
56 #define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
57                           mu_Q9, max_gain_Q7, L, arch) \
58     ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
59                           mu_Q9, max_gain_Q7, L))
60 
61 #else
62 
63 extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
64     opus_int8                   *ind,                           /* O    index of best codebook vector               */
65     opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
66     opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
67     const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
68     const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
69     const opus_int8             *cb_Q7,                         /* I    codebook                                    */
70     const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
71     const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
72     const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
73     const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
74     opus_int                    L                               /* I    number of vectors in codebook               */
75 );
76 
77 #  define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
78                           mu_Q9, max_gain_Q7, L, arch) \
79     ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \
80                           mu_Q9, max_gain_Q7, L))
81 
82 #endif
83 #endif
84 
85 #if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */
86 #  define OVERRIDE_silk_NSQ
87 
88 void silk_NSQ_sse4_1(
89     const silk_encoder_state    *psEncC,                                    /* I    Encoder State                   */
90     silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
91     SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
92     const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
93     opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
94     const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
95     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
96     const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
97     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
98     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
99     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
100     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
101     const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
102     const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
103     const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
104 );
105 
106 #if defined OPUS_X86_PRESUME_SSE4_1
107 
108 #define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
109                    HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
110     ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
111                    HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
112 
113 #else
114 
115 extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
116     const silk_encoder_state    *psEncC,                                    /* I    Encoder State                   */
117     silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
118     SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
119     const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
120     opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
121     const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
122     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
123     const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
124     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
125     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
126     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
127     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
128     const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
129     const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
130     const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
131 );
132 
133 #  define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
134                    HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
135     ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
136                    HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
137 
138 #endif
139 
140 #  define OVERRIDE_silk_NSQ_del_dec
141 
142 void silk_NSQ_del_dec_sse4_1(
143     const silk_encoder_state    *psEncC,                                    /* I    Encoder State                   */
144     silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
145     SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
146     const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
147     opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
148     const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
149     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
150     const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
151     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
152     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
153     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
154     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
155     const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
156     const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
157     const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
158 );
159 
160 #if defined OPUS_X86_PRESUME_SSE4_1
161 
162 #define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
163                            HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
164     ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
165                            HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
166 
167 #else
168 
169 extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
170     const silk_encoder_state    *psEncC,                                    /* I    Encoder State                   */
171     silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
172     SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
173     const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
174     opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
175     const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
176     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
177     const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
178     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
179     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
180     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
181     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
182     const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
183     const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
184     const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
185 );
186 
187 #  define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
188                            HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
189     ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
190                            HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
191 
192 #endif
193 #endif
194 
195 void silk_noise_shape_quantizer(
196     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
197     opus_int            signalType,             /* I    Signal type                     */
198     const opus_int32    x_sc_Q10[],             /* I                                    */
199     opus_int8           pulses[],               /* O                                    */
200     opus_int16          xq[],                   /* O                                    */
201     opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
202     const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
203     const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
204     const opus_int16    AR_shp_Q13[],           /* I    Noise shaping AR coefs          */
205     opus_int            lag,                    /* I    Pitch lag                       */
206     opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
207     opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
208     opus_int32          LF_shp_Q14,             /* I                                    */
209     opus_int32          Gain_Q16,               /* I                                    */
210     opus_int            Lambda_Q10,             /* I                                    */
211     opus_int            offset_Q10,             /* I                                    */
212     opus_int            length,                 /* I    Input length                    */
213     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
214     opus_int            predictLPCOrder,        /* I    Prediction filter order         */
215     int                 arch                    /* I    Architecture                    */
216 );
217 
218 /**************************/
219 /* Noise level estimation */
220 /**************************/
221 void silk_VAD_GetNoiseLevels(
222     const opus_int32            pX[ VAD_N_BANDS ],  /* I    subband energies                            */
223     silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
224 );
225 
226 #  define OVERRIDE_silk_VAD_GetSA_Q8
227 
228 opus_int silk_VAD_GetSA_Q8_sse4_1(
229     silk_encoder_state *psEnC,
230     const opus_int16   pIn[]
231 );
232 
233 #if defined(OPUS_X86_PRESUME_SSE4_1)
234 #define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn))
235 
236 #else
237 
238 #  define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
239      ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
240 
241 extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(
242      silk_encoder_state *psEnC,
243      const opus_int16   pIn[]);
244 
245 #endif
246 
247 # endif
248 #endif
249