1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
3 *
4 * $Date:        20. October 2015
5 * $Revision:    V1.4.5 b
6 *
7 * Project:      CMSIS DSP Library
8 * Title:        arm_math.h
9 *
10 * Description:  Public header file for CMSIS DSP Library
11 *
12 * Target Processor: Cortex-M7/Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *   - Redistributions of source code must retain the above copyright
18 *     notice, this list of conditions and the following disclaimer.
19 *   - Redistributions in binary form must reproduce the above copyright
20 *     notice, this list of conditions and the following disclaimer in
21 *     the documentation and/or other materials provided with the
22 *     distribution.
23 *   - Neither the name of ARM LIMITED nor the names of its contributors
24 *     may be used to endorse or promote products derived from this
25 *     software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39  * -------------------------------------------------------------------- */
40 
41 /**
42    \mainpage CMSIS DSP Software Library
43    *
44    * Introduction
45    * ------------
46    *
47    * This user manual describes the CMSIS DSP software library,
48    * a suite of common signal processing functions for use on Cortex-M processor based devices.
49    *
50    * The library is divided into a number of functions each covering a specific category:
51    * - Basic math functions
52    * - Fast math functions
53    * - Complex math functions
54    * - Filters
55    * - Matrix functions
56    * - Transforms
57    * - Motor control functions
58    * - Statistical functions
59    * - Support functions
60    * - Interpolation functions
61    *
62    * The library has separate functions for operating on 8-bit integers, 16-bit integers,
63    * 32-bit integer and 32-bit floating-point values.
64    *
65    * Using the Library
66    * ------------
67    *
68    * The library installer contains prebuilt versions of the libraries in the <code>Lib</code> folder.
69    * - arm_cortexM7lfdp_math.lib (Little endian and Double Precision Floating Point Unit on Cortex-M7)
70    * - arm_cortexM7bfdp_math.lib (Big endian and Double Precision Floating Point Unit on Cortex-M7)
71    * - arm_cortexM7lfsp_math.lib (Little endian and Single Precision Floating Point Unit on Cortex-M7)
72    * - arm_cortexM7bfsp_math.lib (Big endian and Single Precision Floating Point Unit on Cortex-M7)
73    * - arm_cortexM7l_math.lib (Little endian on Cortex-M7)
74    * - arm_cortexM7b_math.lib (Big endian on Cortex-M7)
75    * - arm_cortexM4lf_math.lib (Little endian and Floating Point Unit on Cortex-M4)
76    * - arm_cortexM4bf_math.lib (Big endian and Floating Point Unit on Cortex-M4)
77    * - arm_cortexM4l_math.lib (Little endian on Cortex-M4)
78    * - arm_cortexM4b_math.lib (Big endian on Cortex-M4)
79    * - arm_cortexM3l_math.lib (Little endian on Cortex-M3)
80    * - arm_cortexM3b_math.lib (Big endian on Cortex-M3)
81    * - arm_cortexM0l_math.lib (Little endian on Cortex-M0 / CortexM0+)
82    * - arm_cortexM0b_math.lib (Big endian on Cortex-M0 / CortexM0+)
83    *
84    * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
85    * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single
86    * public header file <code> arm_math.h</code> for Cortex-M7/M4/M3/M0/M0+ with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
87    * Define the appropriate pre processor MACRO ARM_MATH_CM7 or ARM_MATH_CM4 or  ARM_MATH_CM3 or
88    * ARM_MATH_CM0 or ARM_MATH_CM0PLUS depending on the target processor in the application.
89    *
90    * Examples
91    * --------
92    *
93    * The library ships with a number of examples which demonstrate how to use the library functions.
94    *
95    * Toolchain Support
96    * ------------
97    *
98    * The library has been developed and tested with MDK-ARM version 5.14.0.0
99    * The library is being tested in GCC and IAR toolchains and updates on this activity will be made available shortly.
100    *
101    * Building the Library
102    * ------------
103    *
104    * The library installer contains a project file to re build libraries on MDK-ARM Tool chain in the <code>CMSIS\\DSP_Lib\\Source\\ARM</code> folder.
105    * - arm_cortexM_math.uvprojx
106    *
107    *
108    * The libraries can be built by opening the arm_cortexM_math.uvprojx project in MDK-ARM, selecting a specific target, and defining the optional pre processor MACROs detailed above.
109    *
110    * Pre-processor Macros
111    * ------------
112    *
113    * Each library project have differant pre-processor macros.
114    *
115    * - UNALIGNED_SUPPORT_DISABLE:
116    *
117    * Define macro UNALIGNED_SUPPORT_DISABLE, If the silicon does not support unaligned memory access
118    *
119    * - ARM_MATH_BIG_ENDIAN:
120    *
121    * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
122    *
123    * - ARM_MATH_MATRIX_CHECK:
124    *
125    * Define macro ARM_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices
126    *
127    * - ARM_MATH_ROUNDING:
128    *
129    * Define macro ARM_MATH_ROUNDING for rounding on support functions
130    *
131    * - ARM_MATH_CMx:
132    *
133    * Define macro ARM_MATH_CM4 for building the library on Cortex-M4 target, ARM_MATH_CM3 for building library on Cortex-M3 target
134    * and ARM_MATH_CM0 for building library on Cortex-M0 target, ARM_MATH_CM0PLUS for building library on Cortex-M0+ target, and
135    * ARM_MATH_CM7 for building the library on cortex-M7.
136    *
137    * - __FPU_PRESENT:
138    *
139    * Initialize macro __FPU_PRESENT = 1 when building on FPU supported Targets. Enable this macro for M4bf and M4lf libraries
140    *
141    * <hr>
142    * CMSIS-DSP in ARM::CMSIS Pack
143    * -----------------------------
144    *
145    * The following files relevant to CMSIS-DSP are present in the <b>ARM::CMSIS</b> Pack directories:
146    * |File/Folder                   |Content                                                                 |
147    * |------------------------------|------------------------------------------------------------------------|
148    * |\b CMSIS\\Documentation\\DSP  | This documentation                                                     |
149    * |\b CMSIS\\DSP_Lib             | Software license agreement (license.txt)                               |
150    * |\b CMSIS\\DSP_Lib\\Examples   | Example projects demonstrating the usage of the library functions      |
151    * |\b CMSIS\\DSP_Lib\\Source     | Source files for rebuilding the library                                |
152    *
153    * <hr>
154    * Revision History of CMSIS-DSP
155    * ------------
156    * Please refer to \ref ChangeLog_pg.
157    *
158    * Copyright Notice
159    * ------------
160    *
161    * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
162    */
163 
164 
165 /**
166  * @defgroup groupMath Basic Math Functions
167  */
168 
169 /**
170  * @defgroup groupFastMath Fast Math Functions
171  * This set of functions provides a fast approximation to sine, cosine, and square root.
172  * As compared to most of the other functions in the CMSIS math library, the fast math functions
173  * operate on individual values and not arrays.
174  * There are separate functions for Q15, Q31, and floating-point data.
175  *
176  */
177 
178 /**
179  * @defgroup groupCmplxMath Complex Math Functions
180  * This set of functions operates on complex data vectors.
181  * The data in the complex arrays is stored in an interleaved fashion
182  * (real, imag, real, imag, ...).
183  * In the API functions, the number of samples in a complex array refers
184  * to the number of complex values; the array contains twice this number of
185  * real values.
186  */
187 
188 /**
189  * @defgroup groupFilters Filtering Functions
190  */
191 
192 /**
193  * @defgroup groupMatrix Matrix Functions
194  *
195  * This set of functions provides basic matrix math operations.
196  * The functions operate on matrix data structures.  For example,
197  * the type
198  * definition for the floating-point matrix structure is shown
199  * below:
200  * <pre>
201  *     typedef struct
202  *     {
203  *       uint16_t numRows;     // number of rows of the matrix.
204  *       uint16_t numCols;     // number of columns of the matrix.
205  *       float32_t *pData;     // points to the data of the matrix.
206  *     } arm_matrix_instance_f32;
207  * </pre>
208  * There are similar definitions for Q15 and Q31 data types.
209  *
210  * The structure specifies the size of the matrix and then points to
211  * an array of data.  The array is of size <code>numRows X numCols</code>
212  * and the values are arranged in row order.  That is, the
213  * matrix element (i, j) is stored at:
214  * <pre>
215  *     pData[i*numCols + j]
216  * </pre>
217  *
218  * \par Init Functions
219  * There is an associated initialization function for each type of matrix
220  * data structure.
221  * The initialization function sets the values of the internal structure fields.
222  * Refer to the function <code>arm_mat_init_f32()</code>, <code>arm_mat_init_q31()</code>
223  * and <code>arm_mat_init_q15()</code> for floating-point, Q31 and Q15 types,  respectively.
224  *
225  * \par
226  * Use of the initialization function is optional. However, if initialization function is used
227  * then the instance structure cannot be placed into a const data section.
228  * To place the instance structure in a const data
229  * section, manually initialize the data structure.  For example:
230  * <pre>
231  * <code>arm_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
232  * <code>arm_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
233  * <code>arm_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
234  * </pre>
235  * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
236  * specifies the number of columns, and <code>pData</code> points to the
237  * data array.
238  *
239  * \par Size Checking
240  * By default all of the matrix functions perform size checking on the input and
241  * output matrices.  For example, the matrix addition function verifies that the
242  * two input matrices and the output matrix all have the same number of rows and
243  * columns.  If the size check fails the functions return:
244  * <pre>
245  *     ARM_MATH_SIZE_MISMATCH
246  * </pre>
247  * Otherwise the functions return
248  * <pre>
249  *     ARM_MATH_SUCCESS
250  * </pre>
251  * There is some overhead associated with this matrix size checking.
252  * The matrix size checking is enabled via the \#define
253  * <pre>
254  *     ARM_MATH_MATRIX_CHECK
255  * </pre>
256  * within the library project settings.  By default this macro is defined
257  * and size checking is enabled.  By changing the project settings and
258  * undefining this macro size checking is eliminated and the functions
259  * run a bit faster.  With size checking disabled the functions always
260  * return <code>ARM_MATH_SUCCESS</code>.
261  */
262 
263 /**
264  * @defgroup groupTransforms Transform Functions
265  */
266 
267 /**
268  * @defgroup groupController Controller Functions
269  */
270 
271 /**
272  * @defgroup groupStats Statistics Functions
273  */
274 /**
275  * @defgroup groupSupport Support Functions
276  */
277 
278 /**
279  * @defgroup groupInterpolation Interpolation Functions
280  * These functions perform 1- and 2-dimensional interpolation of data.
281  * Linear interpolation is used for 1-dimensional data and
282  * bilinear interpolation is used for 2-dimensional data.
283  */
284 
285 /**
286  * @defgroup groupExamples Examples
287  */
288 #ifndef _ARM_MATH_H
289 #define _ARM_MATH_H
290 
291 /* ignore some GCC warnings */
292 #if defined ( __GNUC__ )
293 #pragma GCC diagnostic push
294 #pragma GCC diagnostic ignored "-Wsign-conversion"
295 #pragma GCC diagnostic ignored "-Wconversion"
296 #pragma GCC diagnostic ignored "-Wunused-parameter"
297 #endif
298 
299 #define __CMSIS_GENERIC         /* disable NVIC and Systick functions */
300 
301 #if defined(ARM_MATH_CM7)
302   #include "core_cm7.h"
303 #elif defined (ARM_MATH_CM4)
304   #include "core_cm4.h"
305 #elif defined (ARM_MATH_CM3)
306   #include "core_cm3.h"
307 #elif defined (ARM_MATH_CM0)
308   #include "core_cm0.h"
309   #define ARM_MATH_CM0_FAMILY
310 #elif defined (ARM_MATH_CM0PLUS)
311   #include "core_cm0plus.h"
312   #define ARM_MATH_CM0_FAMILY
313 #else
314   #error "Define according the used Cortex core ARM_MATH_CM7, ARM_MATH_CM4, ARM_MATH_CM3, ARM_MATH_CM0PLUS or ARM_MATH_CM0"
315 #endif
316 
317 #undef  __CMSIS_GENERIC         /* enable NVIC and Systick functions */
318 #include "string.h"
319 #include "math.h"
320 #ifdef   __cplusplus
321 extern "C"
322 {
323 #endif
324 
325 
326   /**
327    * @brief Macros required for reciprocal calculation in Normalized LMS
328    */
329 
330 #define DELTA_Q31          (0x100)
331 #define DELTA_Q15          0x5
332 #define INDEX_MASK         0x0000003F
333 #ifndef PI
334 #define PI                 3.14159265358979f
335 #endif
336 
337   /**
338    * @brief Macros required for SINE and COSINE Fast math approximations
339    */
340 
341 #define FAST_MATH_TABLE_SIZE  512
342 #define FAST_MATH_Q31_SHIFT   (32 - 10)
343 #define FAST_MATH_Q15_SHIFT   (16 - 10)
344 #define CONTROLLER_Q31_SHIFT  (32 - 9)
345 #define TABLE_SIZE  256
346 #define TABLE_SPACING_Q31     0x400000
347 #define TABLE_SPACING_Q15     0x80
348 
349   /**
350    * @brief Macros required for SINE and COSINE Controller functions
351    */
352   /* 1.31(q31) Fixed value of 2/360 */
353   /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
354 #define INPUT_SPACING         0xB60B61
355 
356   /**
357    * @brief Macro for Unaligned Support
358    */
359 #ifndef UNALIGNED_SUPPORT_DISABLE
360     #define ALIGN4
361 #else
362   #if defined  (__GNUC__)
363     #define ALIGN4 __attribute__((aligned(4)))
364   #else
365     #define ALIGN4 __align(4)
366   #endif
367 #endif   /* #ifndef UNALIGNED_SUPPORT_DISABLE */
368 
369   /**
370    * @brief Error status returned by some functions in the library.
371    */
372 
373   typedef enum
374   {
375     ARM_MATH_SUCCESS = 0,                /**< No error */
376     ARM_MATH_ARGUMENT_ERROR = -1,        /**< One or more arguments are incorrect */
377     ARM_MATH_LENGTH_ERROR = -2,          /**< Length of data buffer is incorrect */
378     ARM_MATH_SIZE_MISMATCH = -3,         /**< Size of matrices is not compatible with the operation. */
379     ARM_MATH_NANINF = -4,                /**< Not-a-number (NaN) or infinity is generated */
380     ARM_MATH_SINGULAR = -5,              /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
381     ARM_MATH_TEST_FAILURE = -6           /**< Test Failed  */
382   } arm_status;
383 
384   /**
385    * @brief 8-bit fractional data type in 1.7 format.
386    */
387   typedef int8_t q7_t;
388 
389   /**
390    * @brief 16-bit fractional data type in 1.15 format.
391    */
392   typedef int16_t q15_t;
393 
394   /**
395    * @brief 32-bit fractional data type in 1.31 format.
396    */
397   typedef int32_t q31_t;
398 
399   /**
400    * @brief 64-bit fractional data type in 1.63 format.
401    */
402   typedef int64_t q63_t;
403 
404   /**
405    * @brief 32-bit floating-point type definition.
406    */
407   typedef float float32_t;
408 
409   /**
410    * @brief 64-bit floating-point type definition.
411    */
412   typedef double float64_t;
413 
414   /**
415    * @brief definition to read/write two 16 bit values.
416    */
417 #if defined __CC_ARM
418   #define __SIMD32_TYPE int32_t __packed
419   #define CMSIS_UNUSED __attribute__((unused))
420 
421 #elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
422   #define __SIMD32_TYPE int32_t
423   #define CMSIS_UNUSED __attribute__((unused))
424 
425 #elif defined __GNUC__
426   #define __SIMD32_TYPE int32_t
427   #define CMSIS_UNUSED __attribute__((unused))
428 
429 #elif defined __ICCARM__
430   #define __SIMD32_TYPE int32_t __packed
431   #define CMSIS_UNUSED
432 
433 #elif defined __CSMC__
434   #define __SIMD32_TYPE int32_t
435   #define CMSIS_UNUSED
436 
437 #elif defined __TASKING__
438   #define __SIMD32_TYPE __unaligned int32_t
439   #define CMSIS_UNUSED
440 
441 #else
442   #error Unknown compiler
443 #endif
444 
445 #define __SIMD32(addr)        (*(__SIMD32_TYPE **) & (addr))
446 #define __SIMD32_CONST(addr)  ((__SIMD32_TYPE *)(addr))
447 #define _SIMD32_OFFSET(addr)  (*(__SIMD32_TYPE *)  (addr))
448 #define __SIMD64(addr)        (*(int64_t **) & (addr))
449 
450 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY)
451   /**
452    * @brief definition to pack two 16 bit values.
453    */
454 #define __PKHBT(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0x0000FFFF) | \
455                                          (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
456 #define __PKHTB(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0xFFFF0000) | \
457                                          (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
458 
459 #endif
460 
461 
462    /**
463    * @brief definition to pack four 8 bit values.
464    */
465 #ifndef ARM_MATH_BIG_ENDIAN
466 
467 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) | \
468                                 (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) | \
469                                 (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
470                                 (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
471 #else
472 
473 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) | \
474                                 (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) | \
475                                 (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \
476                                 (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
477 
478 #endif
479 
480 
481   /**
482    * @brief Clips Q63 to Q31 values.
483    */
clip_q63_to_q31(q63_t x)484   static __INLINE q31_t clip_q63_to_q31(
485   q63_t x)
486   {
487     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
488       ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
489   }
490 
491   /**
492    * @brief Clips Q63 to Q15 values.
493    */
clip_q63_to_q15(q63_t x)494   static __INLINE q15_t clip_q63_to_q15(
495   q63_t x)
496   {
497     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
498       ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
499   }
500 
501   /**
502    * @brief Clips Q31 to Q7 values.
503    */
clip_q31_to_q7(q31_t x)504   static __INLINE q7_t clip_q31_to_q7(
505   q31_t x)
506   {
507     return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
508       ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
509   }
510 
511   /**
512    * @brief Clips Q31 to Q15 values.
513    */
clip_q31_to_q15(q31_t x)514   static __INLINE q15_t clip_q31_to_q15(
515   q31_t x)
516   {
517     return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
518       ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
519   }
520 
521   /**
522    * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
523    */
524 
mult32x64(q63_t x,q31_t y)525   static __INLINE q63_t mult32x64(
526   q63_t x,
527   q31_t y)
528   {
529     return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
530             (((q63_t) (x >> 32) * y)));
531   }
532 
533 /*
534   #if defined (ARM_MATH_CM0_FAMILY) && defined ( __CC_ARM   )
535   #define __CLZ __clz
536   #endif
537  */
538 /* note: function can be removed when all toolchain support __CLZ for Cortex-M0 */
539 #if defined (ARM_MATH_CM0_FAMILY) && ((defined (__ICCARM__))  )
540   static __INLINE uint32_t __CLZ(
541   q31_t data);
542 
__CLZ(q31_t data)543   static __INLINE uint32_t __CLZ(
544   q31_t data)
545   {
546     uint32_t count = 0;
547     uint32_t mask = 0x80000000;
548 
549     while((data & mask) == 0)
550     {
551       count += 1u;
552       mask = mask >> 1u;
553     }
554 
555     return (count);
556   }
557 #endif
558 
559   /**
560    * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type.
561    */
562 
arm_recip_q31(q31_t in,q31_t * dst,q31_t * pRecipTable)563   static __INLINE uint32_t arm_recip_q31(
564   q31_t in,
565   q31_t * dst,
566   q31_t * pRecipTable)
567   {
568     q31_t out;
569     uint32_t tempVal;
570     uint32_t index, i;
571     uint32_t signBits;
572 
573     if(in > 0)
574     {
575       signBits = ((uint32_t) (__CLZ( in) - 1));
576     }
577     else
578     {
579       signBits = ((uint32_t) (__CLZ(-in) - 1));
580     }
581 
582     /* Convert input sample to 1.31 format */
583     in = (in << signBits);
584 
585     /* calculation of index for initial approximated Val */
586     index = (uint32_t)(in >> 24);
587     index = (index & INDEX_MASK);
588 
589     /* 1.31 with exp 1 */
590     out = pRecipTable[index];
591 
592     /* calculation of reciprocal value */
593     /* running approximation for two iterations */
594     for (i = 0u; i < 2u; i++)
595     {
596       tempVal = (uint32_t) (((q63_t) in * out) >> 31);
597       tempVal = 0x7FFFFFFFu - tempVal;
598       /*      1.31 with exp 1 */
599       /* out = (q31_t) (((q63_t) out * tempVal) >> 30); */
600       out = clip_q63_to_q31(((q63_t) out * tempVal) >> 30);
601     }
602 
603     /* write output */
604     *dst = out;
605 
606     /* return num of signbits of out = 1/in value */
607     return (signBits + 1u);
608   }
609 
610 
611   /**
612    * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type.
613    */
arm_recip_q15(q15_t in,q15_t * dst,q15_t * pRecipTable)614   static __INLINE uint32_t arm_recip_q15(
615   q15_t in,
616   q15_t * dst,
617   q15_t * pRecipTable)
618   {
619     q15_t out = 0;
620     uint32_t tempVal = 0;
621     uint32_t index = 0, i = 0;
622     uint32_t signBits = 0;
623 
624     if(in > 0)
625     {
626       signBits = ((uint32_t)(__CLZ( in) - 17));
627     }
628     else
629     {
630       signBits = ((uint32_t)(__CLZ(-in) - 17));
631     }
632 
633     /* Convert input sample to 1.15 format */
634     in = (in << signBits);
635 
636     /* calculation of index for initial approximated Val */
637     index = (uint32_t)(in >>  8);
638     index = (index & INDEX_MASK);
639 
640     /*      1.15 with exp 1  */
641     out = pRecipTable[index];
642 
643     /* calculation of reciprocal value */
644     /* running approximation for two iterations */
645     for (i = 0u; i < 2u; i++)
646     {
647       tempVal = (uint32_t) (((q31_t) in * out) >> 15);
648       tempVal = 0x7FFFu - tempVal;
649       /*      1.15 with exp 1 */
650       out = (q15_t) (((q31_t) out * tempVal) >> 14);
651       /* out = clip_q31_to_q15(((q31_t) out * tempVal) >> 14); */
652     }
653 
654     /* write output */
655     *dst = out;
656 
657     /* return num of signbits of out = 1/in value */
658     return (signBits + 1);
659   }
660 
661 
662   /*
663    * @brief C custom defined intrinisic function for only M0 processors
664    */
665 #if defined(ARM_MATH_CM0_FAMILY)
__SSAT(q31_t x,uint32_t y)666   static __INLINE q31_t __SSAT(
667   q31_t x,
668   uint32_t y)
669   {
670     int32_t posMax, negMin;
671     uint32_t i;
672 
673     posMax = 1;
674     for (i = 0; i < (y - 1); i++)
675     {
676       posMax = posMax * 2;
677     }
678 
679     if(x > 0)
680     {
681       posMax = (posMax - 1);
682 
683       if(x > posMax)
684       {
685         x = posMax;
686       }
687     }
688     else
689     {
690       negMin = -posMax;
691 
692       if(x < negMin)
693       {
694         x = negMin;
695       }
696     }
697     return (x);
698   }
699 #endif /* end of ARM_MATH_CM0_FAMILY */
700 
701 
702   /*
703    * @brief C custom defined intrinsic function for M3 and M0 processors
704    */
705 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY)
706 
707   /*
708    * @brief C custom defined QADD8 for M3 and M0 processors
709    */
__QADD8(uint32_t x,uint32_t y)710   static __INLINE uint32_t __QADD8(
711   uint32_t x,
712   uint32_t y)
713   {
714     q31_t r, s, t, u;
715 
716     r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
717     s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
718     t = __SSAT(((((q31_t)x <<  8) >> 24) + (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
719     u = __SSAT(((((q31_t)x      ) >> 24) + (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
720 
721     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
722   }
723 
724 
725   /*
726    * @brief C custom defined QSUB8 for M3 and M0 processors
727    */
__QSUB8(uint32_t x,uint32_t y)728   static __INLINE uint32_t __QSUB8(
729   uint32_t x,
730   uint32_t y)
731   {
732     q31_t r, s, t, u;
733 
734     r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
735     s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
736     t = __SSAT(((((q31_t)x <<  8) >> 24) - (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
737     u = __SSAT(((((q31_t)x      ) >> 24) - (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
738 
739     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
740   }
741 
742 
743   /*
744    * @brief C custom defined QADD16 for M3 and M0 processors
745    */
__QADD16(uint32_t x,uint32_t y)746   static __INLINE uint32_t __QADD16(
747   uint32_t x,
748   uint32_t y)
749   {
750 /*  q31_t r,     s;  without initialisation 'arm_offset_q15 test' fails  but 'intrinsic' tests pass! for armCC */
751     q31_t r = 0, s = 0;
752 
753     r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
754     s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
755 
756     return ((uint32_t)((s << 16) | (r      )));
757   }
758 
759 
760   /*
761    * @brief C custom defined SHADD16 for M3 and M0 processors
762    */
__SHADD16(uint32_t x,uint32_t y)763   static __INLINE uint32_t __SHADD16(
764   uint32_t x,
765   uint32_t y)
766   {
767     q31_t r, s;
768 
769     r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
770     s = (((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
771 
772     return ((uint32_t)((s << 16) | (r      )));
773   }
774 
775 
776   /*
777    * @brief C custom defined QSUB16 for M3 and M0 processors
778    */
__QSUB16(uint32_t x,uint32_t y)779   static __INLINE uint32_t __QSUB16(
780   uint32_t x,
781   uint32_t y)
782   {
783     q31_t r, s;
784 
785     r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
786     s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
787 
788     return ((uint32_t)((s << 16) | (r      )));
789   }
790 
791 
792   /*
793    * @brief C custom defined SHSUB16 for M3 and M0 processors
794    */
__SHSUB16(uint32_t x,uint32_t y)795   static __INLINE uint32_t __SHSUB16(
796   uint32_t x,
797   uint32_t y)
798   {
799     q31_t r, s;
800 
801     r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
802     s = (((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
803 
804     return ((uint32_t)((s << 16) | (r      )));
805   }
806 
807 
808   /*
809    * @brief C custom defined QASX for M3 and M0 processors
810    */
__QASX(uint32_t x,uint32_t y)811   static __INLINE uint32_t __QASX(
812   uint32_t x,
813   uint32_t y)
814   {
815     q31_t r, s;
816 
817     r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
818     s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
819 
820     return ((uint32_t)((s << 16) | (r      )));
821   }
822 
823 
824   /*
825    * @brief C custom defined SHASX for M3 and M0 processors
826    */
__SHASX(uint32_t x,uint32_t y)827   static __INLINE uint32_t __SHASX(
828   uint32_t x,
829   uint32_t y)
830   {
831     q31_t r, s;
832 
833     r = (((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
834     s = (((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
835 
836     return ((uint32_t)((s << 16) | (r      )));
837   }
838 
839 
840   /*
841    * @brief C custom defined QSAX for M3 and M0 processors
842    */
__QSAX(uint32_t x,uint32_t y)843   static __INLINE uint32_t __QSAX(
844   uint32_t x,
845   uint32_t y)
846   {
847     q31_t r, s;
848 
849     r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
850     s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
851 
852     return ((uint32_t)((s << 16) | (r      )));
853   }
854 
855 
856   /*
857    * @brief C custom defined SHSAX for M3 and M0 processors
858    */
__SHSAX(uint32_t x,uint32_t y)859   static __INLINE uint32_t __SHSAX(
860   uint32_t x,
861   uint32_t y)
862   {
863     q31_t r, s;
864 
865     r = (((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
866     s = (((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
867 
868     return ((uint32_t)((s << 16) | (r      )));
869   }
870 
871 
872   /*
873    * @brief C custom defined SMUSDX for M3 and M0 processors
874    */
__SMUSDX(uint32_t x,uint32_t y)875   static __INLINE uint32_t __SMUSDX(
876   uint32_t x,
877   uint32_t y)
878   {
879     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
880                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
881   }
882 
883   /*
884    * @brief C custom defined SMUADX for M3 and M0 processors
885    */
__SMUADX(uint32_t x,uint32_t y)886   static __INLINE uint32_t __SMUADX(
887   uint32_t x,
888   uint32_t y)
889   {
890     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
891                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
892   }
893 
894 
895   /*
896    * @brief C custom defined QADD for M3 and M0 processors
897    */
__QADD(int32_t x,int32_t y)898   static __INLINE int32_t __QADD(
899   int32_t x,
900   int32_t y)
901   {
902     return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y)));
903   }
904 
905 
906   /*
907    * @brief C custom defined QSUB for M3 and M0 processors
908    */
__QSUB(int32_t x,int32_t y)909   static __INLINE int32_t __QSUB(
910   int32_t x,
911   int32_t y)
912   {
913     return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y)));
914   }
915 
916 
917   /*
918    * @brief C custom defined SMLAD for M3 and M0 processors
919    */
__SMLAD(uint32_t x,uint32_t y,uint32_t sum)920   static __INLINE uint32_t __SMLAD(
921   uint32_t x,
922   uint32_t y,
923   uint32_t sum)
924   {
925     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
926                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
927                        ( ((q31_t)sum    )                                  )   ));
928   }
929 
930 
931   /*
932    * @brief C custom defined SMLADX for M3 and M0 processors
933    */
__SMLADX(uint32_t x,uint32_t y,uint32_t sum)934   static __INLINE uint32_t __SMLADX(
935   uint32_t x,
936   uint32_t y,
937   uint32_t sum)
938   {
939     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
940                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
941                        ( ((q31_t)sum    )                                  )   ));
942   }
943 
944 
945   /*
946    * @brief C custom defined SMLSDX for M3 and M0 processors
947    */
__SMLSDX(uint32_t x,uint32_t y,uint32_t sum)948   static __INLINE uint32_t __SMLSDX(
949   uint32_t x,
950   uint32_t y,
951   uint32_t sum)
952   {
953     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
954                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
955                        ( ((q31_t)sum    )                                  )   ));
956   }
957 
958 
959   /*
960    * @brief C custom defined SMLALD for M3 and M0 processors
961    */
__SMLALD(uint32_t x,uint32_t y,uint64_t sum)962   static __INLINE uint64_t __SMLALD(
963   uint32_t x,
964   uint32_t y,
965   uint64_t sum)
966   {
967 /*  return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */
968     return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
969                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
970                        ( ((q63_t)sum    )                                  )   ));
971   }
972 
973 
974   /*
975    * @brief C custom defined SMLALDX for M3 and M0 processors
976    */
__SMLALDX(uint32_t x,uint32_t y,uint64_t sum)977   static __INLINE uint64_t __SMLALDX(
978   uint32_t x,
979   uint32_t y,
980   uint64_t sum)
981   {
982 /*  return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */
983     return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
984                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
985                        ( ((q63_t)sum    )                                  )   ));
986   }
987 
988 
989   /*
990    * @brief C custom defined SMUAD for M3 and M0 processors
991    */
__SMUAD(uint32_t x,uint32_t y)992   static __INLINE uint32_t __SMUAD(
993   uint32_t x,
994   uint32_t y)
995   {
996     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
997                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
998   }
999 
1000 
1001   /*
1002    * @brief C custom defined SMUSD for M3 and M0 processors
1003    */
__SMUSD(uint32_t x,uint32_t y)1004   static __INLINE uint32_t __SMUSD(
1005   uint32_t x,
1006   uint32_t y)
1007   {
1008     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) -
1009                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
1010   }
1011 
1012 
1013   /*
1014    * @brief C custom defined SXTB16 for M3 and M0 processors
1015    */
__SXTB16(uint32_t x)1016   static __INLINE uint32_t __SXTB16(
1017   uint32_t x)
1018   {
1019     return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) |
1020                        ((((q31_t)x <<  8) >>  8) & (q31_t)0xFFFF0000)  ));
1021   }
1022 
1023 #endif /* defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY) */
1024 
1025 
1026   /**
1027    * @brief Instance structure for the Q7 FIR filter.
1028    */
1029   typedef struct
1030   {
1031     uint16_t numTaps;        /**< number of filter coefficients in the filter. */
1032     q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1033     q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1034   } arm_fir_instance_q7;
1035 
1036   /**
1037    * @brief Instance structure for the Q15 FIR filter.
1038    */
1039   typedef struct
1040   {
1041     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1042     q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1043     q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1044   } arm_fir_instance_q15;
1045 
1046   /**
1047    * @brief Instance structure for the Q31 FIR filter.
1048    */
1049   typedef struct
1050   {
1051     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1052     q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1053     q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
1054   } arm_fir_instance_q31;
1055 
1056   /**
1057    * @brief Instance structure for the floating-point FIR filter.
1058    */
1059   typedef struct
1060   {
1061     uint16_t numTaps;     /**< number of filter coefficients in the filter. */
1062     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1063     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
1064   } arm_fir_instance_f32;
1065 
1066 
1067   /**
1068    * @brief Processing function for the Q7 FIR filter.
1069    * @param[in]  S          points to an instance of the Q7 FIR filter structure.
1070    * @param[in]  pSrc       points to the block of input data.
1071    * @param[out] pDst       points to the block of output data.
1072    * @param[in]  blockSize  number of samples to process.
1073    */
1074   void arm_fir_q7(
1075   const arm_fir_instance_q7 * S,
1076   q7_t * pSrc,
1077   q7_t * pDst,
1078   uint32_t blockSize);
1079 
1080 
1081   /**
1082    * @brief  Initialization function for the Q7 FIR filter.
1083    * @param[in,out] S          points to an instance of the Q7 FIR structure.
1084    * @param[in]     numTaps    Number of filter coefficients in the filter.
1085    * @param[in]     pCoeffs    points to the filter coefficients.
1086    * @param[in]     pState     points to the state buffer.
1087    * @param[in]     blockSize  number of samples that are processed.
1088    */
1089   void arm_fir_init_q7(
1090   arm_fir_instance_q7 * S,
1091   uint16_t numTaps,
1092   q7_t * pCoeffs,
1093   q7_t * pState,
1094   uint32_t blockSize);
1095 
1096 
1097   /**
1098    * @brief Processing function for the Q15 FIR filter.
1099    * @param[in]  S          points to an instance of the Q15 FIR structure.
1100    * @param[in]  pSrc       points to the block of input data.
1101    * @param[out] pDst       points to the block of output data.
1102    * @param[in]  blockSize  number of samples to process.
1103    */
1104   void arm_fir_q15(
1105   const arm_fir_instance_q15 * S,
1106   q15_t * pSrc,
1107   q15_t * pDst,
1108   uint32_t blockSize);
1109 
1110 
1111   /**
1112    * @brief Processing function for the fast Q15 FIR filter for Cortex-M3 and Cortex-M4.
1113    * @param[in]  S          points to an instance of the Q15 FIR filter structure.
1114    * @param[in]  pSrc       points to the block of input data.
1115    * @param[out] pDst       points to the block of output data.
1116    * @param[in]  blockSize  number of samples to process.
1117    */
1118   void arm_fir_fast_q15(
1119   const arm_fir_instance_q15 * S,
1120   q15_t * pSrc,
1121   q15_t * pDst,
1122   uint32_t blockSize);
1123 
1124 
1125   /**
1126    * @brief  Initialization function for the Q15 FIR filter.
1127    * @param[in,out] S          points to an instance of the Q15 FIR filter structure.
1128    * @param[in]     numTaps    Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
1129    * @param[in]     pCoeffs    points to the filter coefficients.
1130    * @param[in]     pState     points to the state buffer.
1131    * @param[in]     blockSize  number of samples that are processed at a time.
1132    * @return The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_ARGUMENT_ERROR if
1133    * <code>numTaps</code> is not a supported value.
1134    */
1135   arm_status arm_fir_init_q15(
1136   arm_fir_instance_q15 * S,
1137   uint16_t numTaps,
1138   q15_t * pCoeffs,
1139   q15_t * pState,
1140   uint32_t blockSize);
1141 
1142 
1143   /**
1144    * @brief Processing function for the Q31 FIR filter.
1145    * @param[in]  S          points to an instance of the Q31 FIR filter structure.
1146    * @param[in]  pSrc       points to the block of input data.
1147    * @param[out] pDst       points to the block of output data.
1148    * @param[in]  blockSize  number of samples to process.
1149    */
1150   void arm_fir_q31(
1151   const arm_fir_instance_q31 * S,
1152   q31_t * pSrc,
1153   q31_t * pDst,
1154   uint32_t blockSize);
1155 
1156 
1157   /**
1158    * @brief Processing function for the fast Q31 FIR filter for Cortex-M3 and Cortex-M4.
1159    * @param[in]  S          points to an instance of the Q31 FIR structure.
1160    * @param[in]  pSrc       points to the block of input data.
1161    * @param[out] pDst       points to the block of output data.
1162    * @param[in]  blockSize  number of samples to process.
1163    */
1164   void arm_fir_fast_q31(
1165   const arm_fir_instance_q31 * S,
1166   q31_t * pSrc,
1167   q31_t * pDst,
1168   uint32_t blockSize);
1169 
1170 
1171   /**
1172    * @brief  Initialization function for the Q31 FIR filter.
1173    * @param[in,out] S          points to an instance of the Q31 FIR structure.
1174    * @param[in]     numTaps    Number of filter coefficients in the filter.
1175    * @param[in]     pCoeffs    points to the filter coefficients.
1176    * @param[in]     pState     points to the state buffer.
1177    * @param[in]     blockSize  number of samples that are processed at a time.
1178    */
1179   void arm_fir_init_q31(
1180   arm_fir_instance_q31 * S,
1181   uint16_t numTaps,
1182   q31_t * pCoeffs,
1183   q31_t * pState,
1184   uint32_t blockSize);
1185 
1186 
1187   /**
1188    * @brief Processing function for the floating-point FIR filter.
1189    * @param[in]  S          points to an instance of the floating-point FIR structure.
1190    * @param[in]  pSrc       points to the block of input data.
1191    * @param[out] pDst       points to the block of output data.
1192    * @param[in]  blockSize  number of samples to process.
1193    */
1194   void arm_fir_f32(
1195   const arm_fir_instance_f32 * S,
1196   float32_t * pSrc,
1197   float32_t * pDst,
1198   uint32_t blockSize);
1199 
1200 
1201   /**
1202    * @brief  Initialization function for the floating-point FIR filter.
1203    * @param[in,out] S          points to an instance of the floating-point FIR filter structure.
1204    * @param[in]     numTaps    Number of filter coefficients in the filter.
1205    * @param[in]     pCoeffs    points to the filter coefficients.
1206    * @param[in]     pState     points to the state buffer.
1207    * @param[in]     blockSize  number of samples that are processed at a time.
1208    */
1209   void arm_fir_init_f32(
1210   arm_fir_instance_f32 * S,
1211   uint16_t numTaps,
1212   float32_t * pCoeffs,
1213   float32_t * pState,
1214   uint32_t blockSize);
1215 
1216 
1217   /**
1218    * @brief Instance structure for the Q15 Biquad cascade filter.
1219    */
1220   typedef struct
1221   {
1222     int8_t numStages;        /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1223     q15_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1224     q15_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1225     int8_t postShift;        /**< Additional shift, in bits, applied to each output sample. */
1226   } arm_biquad_casd_df1_inst_q15;
1227 
1228   /**
1229    * @brief Instance structure for the Q31 Biquad cascade filter.
1230    */
1231   typedef struct
1232   {
1233     uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1234     q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1235     q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1236     uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
1237   } arm_biquad_casd_df1_inst_q31;
1238 
1239   /**
1240    * @brief Instance structure for the floating-point Biquad cascade filter.
1241    */
1242   typedef struct
1243   {
1244     uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1245     float32_t *pState;       /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1246     float32_t *pCoeffs;      /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1247   } arm_biquad_casd_df1_inst_f32;
1248 
1249 
1250   /**
1251    * @brief Processing function for the Q15 Biquad cascade filter.
1252    * @param[in]  S          points to an instance of the Q15 Biquad cascade structure.
1253    * @param[in]  pSrc       points to the block of input data.
1254    * @param[out] pDst       points to the block of output data.
1255    * @param[in]  blockSize  number of samples to process.
1256    */
1257   void arm_biquad_cascade_df1_q15(
1258   const arm_biquad_casd_df1_inst_q15 * S,
1259   q15_t * pSrc,
1260   q15_t * pDst,
1261   uint32_t blockSize);
1262 
1263 
1264   /**
1265    * @brief  Initialization function for the Q15 Biquad cascade filter.
1266    * @param[in,out] S          points to an instance of the Q15 Biquad cascade structure.
1267    * @param[in]     numStages  number of 2nd order stages in the filter.
1268    * @param[in]     pCoeffs    points to the filter coefficients.
1269    * @param[in]     pState     points to the state buffer.
1270    * @param[in]     postShift  Shift to be applied to the output. Varies according to the coefficients format
1271    */
1272   void arm_biquad_cascade_df1_init_q15(
1273   arm_biquad_casd_df1_inst_q15 * S,
1274   uint8_t numStages,
1275   q15_t * pCoeffs,
1276   q15_t * pState,
1277   int8_t postShift);
1278 
1279 
1280   /**
1281    * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1282    * @param[in]  S          points to an instance of the Q15 Biquad cascade structure.
1283    * @param[in]  pSrc       points to the block of input data.
1284    * @param[out] pDst       points to the block of output data.
1285    * @param[in]  blockSize  number of samples to process.
1286    */
1287   void arm_biquad_cascade_df1_fast_q15(
1288   const arm_biquad_casd_df1_inst_q15 * S,
1289   q15_t * pSrc,
1290   q15_t * pDst,
1291   uint32_t blockSize);
1292 
1293 
1294   /**
1295    * @brief Processing function for the Q31 Biquad cascade filter
1296    * @param[in]  S          points to an instance of the Q31 Biquad cascade structure.
1297    * @param[in]  pSrc       points to the block of input data.
1298    * @param[out] pDst       points to the block of output data.
1299    * @param[in]  blockSize  number of samples to process.
1300    */
1301   void arm_biquad_cascade_df1_q31(
1302   const arm_biquad_casd_df1_inst_q31 * S,
1303   q31_t * pSrc,
1304   q31_t * pDst,
1305   uint32_t blockSize);
1306 
1307 
1308   /**
1309    * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1310    * @param[in]  S          points to an instance of the Q31 Biquad cascade structure.
1311    * @param[in]  pSrc       points to the block of input data.
1312    * @param[out] pDst       points to the block of output data.
1313    * @param[in]  blockSize  number of samples to process.
1314    */
1315   void arm_biquad_cascade_df1_fast_q31(
1316   const arm_biquad_casd_df1_inst_q31 * S,
1317   q31_t * pSrc,
1318   q31_t * pDst,
1319   uint32_t blockSize);
1320 
1321 
1322   /**
1323    * @brief  Initialization function for the Q31 Biquad cascade filter.
1324    * @param[in,out] S          points to an instance of the Q31 Biquad cascade structure.
1325    * @param[in]     numStages  number of 2nd order stages in the filter.
1326    * @param[in]     pCoeffs    points to the filter coefficients.
1327    * @param[in]     pState     points to the state buffer.
1328    * @param[in]     postShift  Shift to be applied to the output. Varies according to the coefficients format
1329    */
1330   void arm_biquad_cascade_df1_init_q31(
1331   arm_biquad_casd_df1_inst_q31 * S,
1332   uint8_t numStages,
1333   q31_t * pCoeffs,
1334   q31_t * pState,
1335   int8_t postShift);
1336 
1337 
1338   /**
1339    * @brief Processing function for the floating-point Biquad cascade filter.
1340    * @param[in]  S          points to an instance of the floating-point Biquad cascade structure.
1341    * @param[in]  pSrc       points to the block of input data.
1342    * @param[out] pDst       points to the block of output data.
1343    * @param[in]  blockSize  number of samples to process.
1344    */
1345   void arm_biquad_cascade_df1_f32(
1346   const arm_biquad_casd_df1_inst_f32 * S,
1347   float32_t * pSrc,
1348   float32_t * pDst,
1349   uint32_t blockSize);
1350 
1351 
1352   /**
1353    * @brief  Initialization function for the floating-point Biquad cascade filter.
1354    * @param[in,out] S          points to an instance of the floating-point Biquad cascade structure.
1355    * @param[in]     numStages  number of 2nd order stages in the filter.
1356    * @param[in]     pCoeffs    points to the filter coefficients.
1357    * @param[in]     pState     points to the state buffer.
1358    */
1359   void arm_biquad_cascade_df1_init_f32(
1360   arm_biquad_casd_df1_inst_f32 * S,
1361   uint8_t numStages,
1362   float32_t * pCoeffs,
1363   float32_t * pState);
1364 
1365 
1366   /**
1367    * @brief Instance structure for the floating-point matrix structure.
1368    */
1369   typedef struct
1370   {
1371     uint16_t numRows;     /**< number of rows of the matrix.     */
1372     uint16_t numCols;     /**< number of columns of the matrix.  */
1373     float32_t *pData;     /**< points to the data of the matrix. */
1374   } arm_matrix_instance_f32;
1375 
1376 
1377   /**
1378    * @brief Instance structure for the floating-point matrix structure.
1379    */
1380   typedef struct
1381   {
1382     uint16_t numRows;     /**< number of rows of the matrix.     */
1383     uint16_t numCols;     /**< number of columns of the matrix.  */
1384     float64_t *pData;     /**< points to the data of the matrix. */
1385   } arm_matrix_instance_f64;
1386 
1387   /**
1388    * @brief Instance structure for the Q15 matrix structure.
1389    */
1390   typedef struct
1391   {
1392     uint16_t numRows;     /**< number of rows of the matrix.     */
1393     uint16_t numCols;     /**< number of columns of the matrix.  */
1394     q15_t *pData;         /**< points to the data of the matrix. */
1395   } arm_matrix_instance_q15;
1396 
1397   /**
1398    * @brief Instance structure for the Q31 matrix structure.
1399    */
1400   typedef struct
1401   {
1402     uint16_t numRows;     /**< number of rows of the matrix.     */
1403     uint16_t numCols;     /**< number of columns of the matrix.  */
1404     q31_t *pData;         /**< points to the data of the matrix. */
1405   } arm_matrix_instance_q31;
1406 
1407 
1408   /**
1409    * @brief Floating-point matrix addition.
1410    * @param[in]  pSrcA  points to the first input matrix structure
1411    * @param[in]  pSrcB  points to the second input matrix structure
1412    * @param[out] pDst   points to output matrix structure
1413    * @return     The function returns either
1414    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1415    */
1416   arm_status arm_mat_add_f32(
1417   const arm_matrix_instance_f32 * pSrcA,
1418   const arm_matrix_instance_f32 * pSrcB,
1419   arm_matrix_instance_f32 * pDst);
1420 
1421 
1422   /**
1423    * @brief Q15 matrix addition.
1424    * @param[in]   pSrcA  points to the first input matrix structure
1425    * @param[in]   pSrcB  points to the second input matrix structure
1426    * @param[out]  pDst   points to output matrix structure
1427    * @return     The function returns either
1428    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1429    */
1430   arm_status arm_mat_add_q15(
1431   const arm_matrix_instance_q15 * pSrcA,
1432   const arm_matrix_instance_q15 * pSrcB,
1433   arm_matrix_instance_q15 * pDst);
1434 
1435 
1436   /**
1437    * @brief Q31 matrix addition.
1438    * @param[in]  pSrcA  points to the first input matrix structure
1439    * @param[in]  pSrcB  points to the second input matrix structure
1440    * @param[out] pDst   points to output matrix structure
1441    * @return     The function returns either
1442    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1443    */
1444   arm_status arm_mat_add_q31(
1445   const arm_matrix_instance_q31 * pSrcA,
1446   const arm_matrix_instance_q31 * pSrcB,
1447   arm_matrix_instance_q31 * pDst);
1448 
1449 
1450   /**
1451    * @brief Floating-point, complex, matrix multiplication.
1452    * @param[in]  pSrcA  points to the first input matrix structure
1453    * @param[in]  pSrcB  points to the second input matrix structure
1454    * @param[out] pDst   points to output matrix structure
1455    * @return     The function returns either
1456    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1457    */
1458   arm_status arm_mat_cmplx_mult_f32(
1459   const arm_matrix_instance_f32 * pSrcA,
1460   const arm_matrix_instance_f32 * pSrcB,
1461   arm_matrix_instance_f32 * pDst);
1462 
1463 
1464   /**
1465    * @brief Q15, complex,  matrix multiplication.
1466    * @param[in]  pSrcA  points to the first input matrix structure
1467    * @param[in]  pSrcB  points to the second input matrix structure
1468    * @param[out] pDst   points to output matrix structure
1469    * @return     The function returns either
1470    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1471    */
1472   arm_status arm_mat_cmplx_mult_q15(
1473   const arm_matrix_instance_q15 * pSrcA,
1474   const arm_matrix_instance_q15 * pSrcB,
1475   arm_matrix_instance_q15 * pDst,
1476   q15_t * pScratch);
1477 
1478 
1479   /**
1480    * @brief Q31, complex, matrix multiplication.
1481    * @param[in]  pSrcA  points to the first input matrix structure
1482    * @param[in]  pSrcB  points to the second input matrix structure
1483    * @param[out] pDst   points to output matrix structure
1484    * @return     The function returns either
1485    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1486    */
1487   arm_status arm_mat_cmplx_mult_q31(
1488   const arm_matrix_instance_q31 * pSrcA,
1489   const arm_matrix_instance_q31 * pSrcB,
1490   arm_matrix_instance_q31 * pDst);
1491 
1492 
1493   /**
1494    * @brief Floating-point matrix transpose.
1495    * @param[in]  pSrc  points to the input matrix
1496    * @param[out] pDst  points to the output matrix
1497    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1498    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1499    */
1500   arm_status arm_mat_trans_f32(
1501   const arm_matrix_instance_f32 * pSrc,
1502   arm_matrix_instance_f32 * pDst);
1503 
1504 
1505   /**
1506    * @brief Q15 matrix transpose.
1507    * @param[in]  pSrc  points to the input matrix
1508    * @param[out] pDst  points to the output matrix
1509    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1510    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1511    */
1512   arm_status arm_mat_trans_q15(
1513   const arm_matrix_instance_q15 * pSrc,
1514   arm_matrix_instance_q15 * pDst);
1515 
1516 
1517   /**
1518    * @brief Q31 matrix transpose.
1519    * @param[in]  pSrc  points to the input matrix
1520    * @param[out] pDst  points to the output matrix
1521    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1522    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1523    */
1524   arm_status arm_mat_trans_q31(
1525   const arm_matrix_instance_q31 * pSrc,
1526   arm_matrix_instance_q31 * pDst);
1527 
1528 
1529   /**
1530    * @brief Floating-point matrix multiplication
1531    * @param[in]  pSrcA  points to the first input matrix structure
1532    * @param[in]  pSrcB  points to the second input matrix structure
1533    * @param[out] pDst   points to output matrix structure
1534    * @return     The function returns either
1535    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1536    */
1537   arm_status arm_mat_mult_f32(
1538   const arm_matrix_instance_f32 * pSrcA,
1539   const arm_matrix_instance_f32 * pSrcB,
1540   arm_matrix_instance_f32 * pDst);
1541 
1542 
1543   /**
1544    * @brief Q15 matrix multiplication
1545    * @param[in]  pSrcA   points to the first input matrix structure
1546    * @param[in]  pSrcB   points to the second input matrix structure
1547    * @param[out] pDst    points to output matrix structure
1548    * @param[in]  pState  points to the array for storing intermediate results
1549    * @return     The function returns either
1550    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1551    */
1552   arm_status arm_mat_mult_q15(
1553   const arm_matrix_instance_q15 * pSrcA,
1554   const arm_matrix_instance_q15 * pSrcB,
1555   arm_matrix_instance_q15 * pDst,
1556   q15_t * pState);
1557 
1558 
1559   /**
1560    * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1561    * @param[in]  pSrcA   points to the first input matrix structure
1562    * @param[in]  pSrcB   points to the second input matrix structure
1563    * @param[out] pDst    points to output matrix structure
1564    * @param[in]  pState  points to the array for storing intermediate results
1565    * @return     The function returns either
1566    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1567    */
1568   arm_status arm_mat_mult_fast_q15(
1569   const arm_matrix_instance_q15 * pSrcA,
1570   const arm_matrix_instance_q15 * pSrcB,
1571   arm_matrix_instance_q15 * pDst,
1572   q15_t * pState);
1573 
1574 
1575   /**
1576    * @brief Q31 matrix multiplication
1577    * @param[in]  pSrcA  points to the first input matrix structure
1578    * @param[in]  pSrcB  points to the second input matrix structure
1579    * @param[out] pDst   points to output matrix structure
1580    * @return     The function returns either
1581    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1582    */
1583   arm_status arm_mat_mult_q31(
1584   const arm_matrix_instance_q31 * pSrcA,
1585   const arm_matrix_instance_q31 * pSrcB,
1586   arm_matrix_instance_q31 * pDst);
1587 
1588 
1589   /**
1590    * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1591    * @param[in]  pSrcA  points to the first input matrix structure
1592    * @param[in]  pSrcB  points to the second input matrix structure
1593    * @param[out] pDst   points to output matrix structure
1594    * @return     The function returns either
1595    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1596    */
1597   arm_status arm_mat_mult_fast_q31(
1598   const arm_matrix_instance_q31 * pSrcA,
1599   const arm_matrix_instance_q31 * pSrcB,
1600   arm_matrix_instance_q31 * pDst);
1601 
1602 
1603   /**
1604    * @brief Floating-point matrix subtraction
1605    * @param[in]  pSrcA  points to the first input matrix structure
1606    * @param[in]  pSrcB  points to the second input matrix structure
1607    * @param[out] pDst   points to output matrix structure
1608    * @return     The function returns either
1609    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1610    */
1611   arm_status arm_mat_sub_f32(
1612   const arm_matrix_instance_f32 * pSrcA,
1613   const arm_matrix_instance_f32 * pSrcB,
1614   arm_matrix_instance_f32 * pDst);
1615 
1616 
1617   /**
1618    * @brief Q15 matrix subtraction
1619    * @param[in]  pSrcA  points to the first input matrix structure
1620    * @param[in]  pSrcB  points to the second input matrix structure
1621    * @param[out] pDst   points to output matrix structure
1622    * @return     The function returns either
1623    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1624    */
1625   arm_status arm_mat_sub_q15(
1626   const arm_matrix_instance_q15 * pSrcA,
1627   const arm_matrix_instance_q15 * pSrcB,
1628   arm_matrix_instance_q15 * pDst);
1629 
1630 
1631   /**
1632    * @brief Q31 matrix subtraction
1633    * @param[in]  pSrcA  points to the first input matrix structure
1634    * @param[in]  pSrcB  points to the second input matrix structure
1635    * @param[out] pDst   points to output matrix structure
1636    * @return     The function returns either
1637    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1638    */
1639   arm_status arm_mat_sub_q31(
1640   const arm_matrix_instance_q31 * pSrcA,
1641   const arm_matrix_instance_q31 * pSrcB,
1642   arm_matrix_instance_q31 * pDst);
1643 
1644 
1645   /**
1646    * @brief Floating-point matrix scaling.
1647    * @param[in]  pSrc   points to the input matrix
1648    * @param[in]  scale  scale factor
1649    * @param[out] pDst   points to the output matrix
1650    * @return     The function returns either
1651    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1652    */
1653   arm_status arm_mat_scale_f32(
1654   const arm_matrix_instance_f32 * pSrc,
1655   float32_t scale,
1656   arm_matrix_instance_f32 * pDst);
1657 
1658 
1659   /**
1660    * @brief Q15 matrix scaling.
1661    * @param[in]  pSrc        points to input matrix
1662    * @param[in]  scaleFract  fractional portion of the scale factor
1663    * @param[in]  shift       number of bits to shift the result by
1664    * @param[out] pDst        points to output matrix
1665    * @return     The function returns either
1666    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1667    */
1668   arm_status arm_mat_scale_q15(
1669   const arm_matrix_instance_q15 * pSrc,
1670   q15_t scaleFract,
1671   int32_t shift,
1672   arm_matrix_instance_q15 * pDst);
1673 
1674 
1675   /**
1676    * @brief Q31 matrix scaling.
1677    * @param[in]  pSrc        points to input matrix
1678    * @param[in]  scaleFract  fractional portion of the scale factor
1679    * @param[in]  shift       number of bits to shift the result by
1680    * @param[out] pDst        points to output matrix structure
1681    * @return     The function returns either
1682    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1683    */
1684   arm_status arm_mat_scale_q31(
1685   const arm_matrix_instance_q31 * pSrc,
1686   q31_t scaleFract,
1687   int32_t shift,
1688   arm_matrix_instance_q31 * pDst);
1689 
1690 
1691   /**
1692    * @brief  Q31 matrix initialization.
1693    * @param[in,out] S         points to an instance of the floating-point matrix structure.
1694    * @param[in]     nRows     number of rows in the matrix.
1695    * @param[in]     nColumns  number of columns in the matrix.
1696    * @param[in]     pData     points to the matrix data array.
1697    */
1698   void arm_mat_init_q31(
1699   arm_matrix_instance_q31 * S,
1700   uint16_t nRows,
1701   uint16_t nColumns,
1702   q31_t * pData);
1703 
1704 
1705   /**
1706    * @brief  Q15 matrix initialization.
1707    * @param[in,out] S         points to an instance of the floating-point matrix structure.
1708    * @param[in]     nRows     number of rows in the matrix.
1709    * @param[in]     nColumns  number of columns in the matrix.
1710    * @param[in]     pData     points to the matrix data array.
1711    */
1712   void arm_mat_init_q15(
1713   arm_matrix_instance_q15 * S,
1714   uint16_t nRows,
1715   uint16_t nColumns,
1716   q15_t * pData);
1717 
1718 
1719   /**
1720    * @brief  Floating-point matrix initialization.
1721    * @param[in,out] S         points to an instance of the floating-point matrix structure.
1722    * @param[in]     nRows     number of rows in the matrix.
1723    * @param[in]     nColumns  number of columns in the matrix.
1724    * @param[in]     pData     points to the matrix data array.
1725    */
1726   void arm_mat_init_f32(
1727   arm_matrix_instance_f32 * S,
1728   uint16_t nRows,
1729   uint16_t nColumns,
1730   float32_t * pData);
1731 
1732 
1733 
1734   /**
1735    * @brief Instance structure for the Q15 PID Control.
1736    */
1737   typedef struct
1738   {
1739     q15_t A0;           /**< The derived gain, A0 = Kp + Ki + Kd . */
1740 #ifdef ARM_MATH_CM0_FAMILY
1741     q15_t A1;
1742     q15_t A2;
1743 #else
1744     q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
1745 #endif
1746     q15_t state[3];     /**< The state array of length 3. */
1747     q15_t Kp;           /**< The proportional gain. */
1748     q15_t Ki;           /**< The integral gain. */
1749     q15_t Kd;           /**< The derivative gain. */
1750   } arm_pid_instance_q15;
1751 
1752   /**
1753    * @brief Instance structure for the Q31 PID Control.
1754    */
1755   typedef struct
1756   {
1757     q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
1758     q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
1759     q31_t A2;            /**< The derived gain, A2 = Kd . */
1760     q31_t state[3];      /**< The state array of length 3. */
1761     q31_t Kp;            /**< The proportional gain. */
1762     q31_t Ki;            /**< The integral gain. */
1763     q31_t Kd;            /**< The derivative gain. */
1764   } arm_pid_instance_q31;
1765 
1766   /**
1767    * @brief Instance structure for the floating-point PID Control.
1768    */
1769   typedef struct
1770   {
1771     float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
1772     float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
1773     float32_t A2;          /**< The derived gain, A2 = Kd . */
1774     float32_t state[3];    /**< The state array of length 3. */
1775     float32_t Kp;          /**< The proportional gain. */
1776     float32_t Ki;          /**< The integral gain. */
1777     float32_t Kd;          /**< The derivative gain. */
1778   } arm_pid_instance_f32;
1779 
1780 
1781 
1782   /**
1783    * @brief  Initialization function for the floating-point PID Control.
1784    * @param[in,out] S               points to an instance of the PID structure.
1785    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1786    */
1787   void arm_pid_init_f32(
1788   arm_pid_instance_f32 * S,
1789   int32_t resetStateFlag);
1790 
1791 
1792   /**
1793    * @brief  Reset function for the floating-point PID Control.
1794    * @param[in,out] S  is an instance of the floating-point PID Control structure
1795    */
1796   void arm_pid_reset_f32(
1797   arm_pid_instance_f32 * S);
1798 
1799 
1800   /**
1801    * @brief  Initialization function for the Q31 PID Control.
1802    * @param[in,out] S               points to an instance of the Q15 PID structure.
1803    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1804    */
1805   void arm_pid_init_q31(
1806   arm_pid_instance_q31 * S,
1807   int32_t resetStateFlag);
1808 
1809 
1810   /**
1811    * @brief  Reset function for the Q31 PID Control.
1812    * @param[in,out] S   points to an instance of the Q31 PID Control structure
1813    */
1814 
1815   void arm_pid_reset_q31(
1816   arm_pid_instance_q31 * S);
1817 
1818 
1819   /**
1820    * @brief  Initialization function for the Q15 PID Control.
1821    * @param[in,out] S               points to an instance of the Q15 PID structure.
1822    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1823    */
1824   void arm_pid_init_q15(
1825   arm_pid_instance_q15 * S,
1826   int32_t resetStateFlag);
1827 
1828 
1829   /**
1830    * @brief  Reset function for the Q15 PID Control.
1831    * @param[in,out] S  points to an instance of the q15 PID Control structure
1832    */
1833   void arm_pid_reset_q15(
1834   arm_pid_instance_q15 * S);
1835 
1836 
1837   /**
1838    * @brief Instance structure for the floating-point Linear Interpolate function.
1839    */
1840   typedef struct
1841   {
1842     uint32_t nValues;           /**< nValues */
1843     float32_t x1;               /**< x1 */
1844     float32_t xSpacing;         /**< xSpacing */
1845     float32_t *pYData;          /**< pointer to the table of Y values */
1846   } arm_linear_interp_instance_f32;
1847 
1848   /**
1849    * @brief Instance structure for the floating-point bilinear interpolation function.
1850    */
1851   typedef struct
1852   {
1853     uint16_t numRows;   /**< number of rows in the data table. */
1854     uint16_t numCols;   /**< number of columns in the data table. */
1855     float32_t *pData;   /**< points to the data table. */
1856   } arm_bilinear_interp_instance_f32;
1857 
1858    /**
1859    * @brief Instance structure for the Q31 bilinear interpolation function.
1860    */
1861   typedef struct
1862   {
1863     uint16_t numRows;   /**< number of rows in the data table. */
1864     uint16_t numCols;   /**< number of columns in the data table. */
1865     q31_t *pData;       /**< points to the data table. */
1866   } arm_bilinear_interp_instance_q31;
1867 
1868    /**
1869    * @brief Instance structure for the Q15 bilinear interpolation function.
1870    */
1871   typedef struct
1872   {
1873     uint16_t numRows;   /**< number of rows in the data table. */
1874     uint16_t numCols;   /**< number of columns in the data table. */
1875     q15_t *pData;       /**< points to the data table. */
1876   } arm_bilinear_interp_instance_q15;
1877 
1878    /**
1879    * @brief Instance structure for the Q15 bilinear interpolation function.
1880    */
1881   typedef struct
1882   {
1883     uint16_t numRows;   /**< number of rows in the data table. */
1884     uint16_t numCols;   /**< number of columns in the data table. */
1885     q7_t *pData;        /**< points to the data table. */
1886   } arm_bilinear_interp_instance_q7;
1887 
1888 
1889   /**
1890    * @brief Q7 vector multiplication.
1891    * @param[in]  pSrcA      points to the first input vector
1892    * @param[in]  pSrcB      points to the second input vector
1893    * @param[out] pDst       points to the output vector
1894    * @param[in]  blockSize  number of samples in each vector
1895    */
1896   void arm_mult_q7(
1897   q7_t * pSrcA,
1898   q7_t * pSrcB,
1899   q7_t * pDst,
1900   uint32_t blockSize);
1901 
1902 
1903   /**
1904    * @brief Q15 vector multiplication.
1905    * @param[in]  pSrcA      points to the first input vector
1906    * @param[in]  pSrcB      points to the second input vector
1907    * @param[out] pDst       points to the output vector
1908    * @param[in]  blockSize  number of samples in each vector
1909    */
1910   void arm_mult_q15(
1911   q15_t * pSrcA,
1912   q15_t * pSrcB,
1913   q15_t * pDst,
1914   uint32_t blockSize);
1915 
1916 
1917   /**
1918    * @brief Q31 vector multiplication.
1919    * @param[in]  pSrcA      points to the first input vector
1920    * @param[in]  pSrcB      points to the second input vector
1921    * @param[out] pDst       points to the output vector
1922    * @param[in]  blockSize  number of samples in each vector
1923    */
1924   void arm_mult_q31(
1925   q31_t * pSrcA,
1926   q31_t * pSrcB,
1927   q31_t * pDst,
1928   uint32_t blockSize);
1929 
1930 
1931   /**
1932    * @brief Floating-point vector multiplication.
1933    * @param[in]  pSrcA      points to the first input vector
1934    * @param[in]  pSrcB      points to the second input vector
1935    * @param[out] pDst       points to the output vector
1936    * @param[in]  blockSize  number of samples in each vector
1937    */
1938   void arm_mult_f32(
1939   float32_t * pSrcA,
1940   float32_t * pSrcB,
1941   float32_t * pDst,
1942   uint32_t blockSize);
1943 
1944 
1945   /**
1946    * @brief Instance structure for the Q15 CFFT/CIFFT function.
1947    */
1948   typedef struct
1949   {
1950     uint16_t fftLen;                 /**< length of the FFT. */
1951     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1952     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1953     q15_t *pTwiddle;                 /**< points to the Sin twiddle factor table. */
1954     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
1955     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1956     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1957   } arm_cfft_radix2_instance_q15;
1958 
1959 /* Deprecated */
1960   arm_status arm_cfft_radix2_init_q15(
1961   arm_cfft_radix2_instance_q15 * S,
1962   uint16_t fftLen,
1963   uint8_t ifftFlag,
1964   uint8_t bitReverseFlag);
1965 
1966 /* Deprecated */
1967   void arm_cfft_radix2_q15(
1968   const arm_cfft_radix2_instance_q15 * S,
1969   q15_t * pSrc);
1970 
1971 
1972   /**
1973    * @brief Instance structure for the Q15 CFFT/CIFFT function.
1974    */
1975   typedef struct
1976   {
1977     uint16_t fftLen;                 /**< length of the FFT. */
1978     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1979     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1980     q15_t *pTwiddle;                 /**< points to the twiddle factor table. */
1981     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
1982     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1983     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1984   } arm_cfft_radix4_instance_q15;
1985 
1986 /* Deprecated */
1987   arm_status arm_cfft_radix4_init_q15(
1988   arm_cfft_radix4_instance_q15 * S,
1989   uint16_t fftLen,
1990   uint8_t ifftFlag,
1991   uint8_t bitReverseFlag);
1992 
1993 /* Deprecated */
1994   void arm_cfft_radix4_q15(
1995   const arm_cfft_radix4_instance_q15 * S,
1996   q15_t * pSrc);
1997 
1998   /**
1999    * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
2000    */
2001   typedef struct
2002   {
2003     uint16_t fftLen;                 /**< length of the FFT. */
2004     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2005     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2006     q31_t *pTwiddle;                 /**< points to the Twiddle factor table. */
2007     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2008     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2009     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2010   } arm_cfft_radix2_instance_q31;
2011 
2012 /* Deprecated */
2013   arm_status arm_cfft_radix2_init_q31(
2014   arm_cfft_radix2_instance_q31 * S,
2015   uint16_t fftLen,
2016   uint8_t ifftFlag,
2017   uint8_t bitReverseFlag);
2018 
2019 /* Deprecated */
2020   void arm_cfft_radix2_q31(
2021   const arm_cfft_radix2_instance_q31 * S,
2022   q31_t * pSrc);
2023 
2024   /**
2025    * @brief Instance structure for the Q31 CFFT/CIFFT function.
2026    */
2027   typedef struct
2028   {
2029     uint16_t fftLen;                 /**< length of the FFT. */
2030     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2031     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2032     q31_t *pTwiddle;                 /**< points to the twiddle factor table. */
2033     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2034     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2035     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2036   } arm_cfft_radix4_instance_q31;
2037 
2038 /* Deprecated */
2039   void arm_cfft_radix4_q31(
2040   const arm_cfft_radix4_instance_q31 * S,
2041   q31_t * pSrc);
2042 
2043 /* Deprecated */
2044   arm_status arm_cfft_radix4_init_q31(
2045   arm_cfft_radix4_instance_q31 * S,
2046   uint16_t fftLen,
2047   uint8_t ifftFlag,
2048   uint8_t bitReverseFlag);
2049 
2050   /**
2051    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2052    */
2053   typedef struct
2054   {
2055     uint16_t fftLen;                   /**< length of the FFT. */
2056     uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2057     uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2058     float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
2059     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
2060     uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2061     uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2062     float32_t onebyfftLen;             /**< value of 1/fftLen. */
2063   } arm_cfft_radix2_instance_f32;
2064 
2065 /* Deprecated */
2066   arm_status arm_cfft_radix2_init_f32(
2067   arm_cfft_radix2_instance_f32 * S,
2068   uint16_t fftLen,
2069   uint8_t ifftFlag,
2070   uint8_t bitReverseFlag);
2071 
2072 /* Deprecated */
2073   void arm_cfft_radix2_f32(
2074   const arm_cfft_radix2_instance_f32 * S,
2075   float32_t * pSrc);
2076 
2077   /**
2078    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2079    */
2080   typedef struct
2081   {
2082     uint16_t fftLen;                   /**< length of the FFT. */
2083     uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2084     uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2085     float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
2086     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
2087     uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2088     uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2089     float32_t onebyfftLen;             /**< value of 1/fftLen. */
2090   } arm_cfft_radix4_instance_f32;
2091 
2092 /* Deprecated */
2093   arm_status arm_cfft_radix4_init_f32(
2094   arm_cfft_radix4_instance_f32 * S,
2095   uint16_t fftLen,
2096   uint8_t ifftFlag,
2097   uint8_t bitReverseFlag);
2098 
2099 /* Deprecated */
2100   void arm_cfft_radix4_f32(
2101   const arm_cfft_radix4_instance_f32 * S,
2102   float32_t * pSrc);
2103 
2104   /**
2105    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
2106    */
2107   typedef struct
2108   {
2109     uint16_t fftLen;                   /**< length of the FFT. */
2110     const q15_t *pTwiddle;             /**< points to the Twiddle factor table. */
2111     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2112     uint16_t bitRevLength;             /**< bit reversal table length. */
2113   } arm_cfft_instance_q15;
2114 
2115 void arm_cfft_q15(
2116     const arm_cfft_instance_q15 * S,
2117     q15_t * p1,
2118     uint8_t ifftFlag,
2119     uint8_t bitReverseFlag);
2120 
2121   /**
2122    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
2123    */
2124   typedef struct
2125   {
2126     uint16_t fftLen;                   /**< length of the FFT. */
2127     const q31_t *pTwiddle;             /**< points to the Twiddle factor table. */
2128     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2129     uint16_t bitRevLength;             /**< bit reversal table length. */
2130   } arm_cfft_instance_q31;
2131 
2132 void arm_cfft_q31(
2133     const arm_cfft_instance_q31 * S,
2134     q31_t * p1,
2135     uint8_t ifftFlag,
2136     uint8_t bitReverseFlag);
2137 
2138   /**
2139    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2140    */
2141   typedef struct
2142   {
2143     uint16_t fftLen;                   /**< length of the FFT. */
2144     const float32_t *pTwiddle;         /**< points to the Twiddle factor table. */
2145     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2146     uint16_t bitRevLength;             /**< bit reversal table length. */
2147   } arm_cfft_instance_f32;
2148 
2149   void arm_cfft_f32(
2150   const arm_cfft_instance_f32 * S,
2151   float32_t * p1,
2152   uint8_t ifftFlag,
2153   uint8_t bitReverseFlag);
2154 
2155   /**
2156    * @brief Instance structure for the Q15 RFFT/RIFFT function.
2157    */
2158   typedef struct
2159   {
2160     uint32_t fftLenReal;                      /**< length of the real FFT. */
2161     uint8_t ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2162     uint8_t bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2163     uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2164     q15_t *pTwiddleAReal;                     /**< points to the real twiddle factor table. */
2165     q15_t *pTwiddleBReal;                     /**< points to the imag twiddle factor table. */
2166     const arm_cfft_instance_q15 *pCfft;       /**< points to the complex FFT instance. */
2167   } arm_rfft_instance_q15;
2168 
2169   arm_status arm_rfft_init_q15(
2170   arm_rfft_instance_q15 * S,
2171   uint32_t fftLenReal,
2172   uint32_t ifftFlagR,
2173   uint32_t bitReverseFlag);
2174 
2175   void arm_rfft_q15(
2176   const arm_rfft_instance_q15 * S,
2177   q15_t * pSrc,
2178   q15_t * pDst);
2179 
2180   /**
2181    * @brief Instance structure for the Q31 RFFT/RIFFT function.
2182    */
2183   typedef struct
2184   {
2185     uint32_t fftLenReal;                        /**< length of the real FFT. */
2186     uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2187     uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2188     uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2189     q31_t *pTwiddleAReal;                       /**< points to the real twiddle factor table. */
2190     q31_t *pTwiddleBReal;                       /**< points to the imag twiddle factor table. */
2191     const arm_cfft_instance_q31 *pCfft;         /**< points to the complex FFT instance. */
2192   } arm_rfft_instance_q31;
2193 
2194   arm_status arm_rfft_init_q31(
2195   arm_rfft_instance_q31 * S,
2196   uint32_t fftLenReal,
2197   uint32_t ifftFlagR,
2198   uint32_t bitReverseFlag);
2199 
2200   void arm_rfft_q31(
2201   const arm_rfft_instance_q31 * S,
2202   q31_t * pSrc,
2203   q31_t * pDst);
2204 
2205   /**
2206    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2207    */
2208   typedef struct
2209   {
2210     uint32_t fftLenReal;                        /**< length of the real FFT. */
2211     uint16_t fftLenBy2;                         /**< length of the complex FFT. */
2212     uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2213     uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2214     uint32_t twidCoefRModifier;                     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2215     float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
2216     float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
2217     arm_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
2218   } arm_rfft_instance_f32;
2219 
2220   arm_status arm_rfft_init_f32(
2221   arm_rfft_instance_f32 * S,
2222   arm_cfft_radix4_instance_f32 * S_CFFT,
2223   uint32_t fftLenReal,
2224   uint32_t ifftFlagR,
2225   uint32_t bitReverseFlag);
2226 
2227   void arm_rfft_f32(
2228   const arm_rfft_instance_f32 * S,
2229   float32_t * pSrc,
2230   float32_t * pDst);
2231 
2232   /**
2233    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2234    */
2235 typedef struct
2236   {
2237     arm_cfft_instance_f32 Sint;      /**< Internal CFFT structure. */
2238     uint16_t fftLenRFFT;             /**< length of the real sequence */
2239     float32_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
2240   } arm_rfft_fast_instance_f32 ;
2241 
2242 arm_status arm_rfft_fast_init_f32 (
2243    arm_rfft_fast_instance_f32 * S,
2244    uint16_t fftLen);
2245 
2246 void arm_rfft_fast_f32(
2247   arm_rfft_fast_instance_f32 * S,
2248   float32_t * p, float32_t * pOut,
2249   uint8_t ifftFlag);
2250 
2251   /**
2252    * @brief Instance structure for the floating-point DCT4/IDCT4 function.
2253    */
2254   typedef struct
2255   {
2256     uint16_t N;                          /**< length of the DCT4. */
2257     uint16_t Nby2;                       /**< half of the length of the DCT4. */
2258     float32_t normalize;                 /**< normalizing factor. */
2259     float32_t *pTwiddle;                 /**< points to the twiddle factor table. */
2260     float32_t *pCosFactor;               /**< points to the cosFactor table. */
2261     arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
2262     arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
2263   } arm_dct4_instance_f32;
2264 
2265 
2266   /**
2267    * @brief  Initialization function for the floating-point DCT4/IDCT4.
2268    * @param[in,out] S          points to an instance of floating-point DCT4/IDCT4 structure.
2269    * @param[in]     S_RFFT     points to an instance of floating-point RFFT/RIFFT structure.
2270    * @param[in]     S_CFFT     points to an instance of floating-point CFFT/CIFFT structure.
2271    * @param[in]     N          length of the DCT4.
2272    * @param[in]     Nby2       half of the length of the DCT4.
2273    * @param[in]     normalize  normalizing factor.
2274    * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
2275    */
2276   arm_status arm_dct4_init_f32(
2277   arm_dct4_instance_f32 * S,
2278   arm_rfft_instance_f32 * S_RFFT,
2279   arm_cfft_radix4_instance_f32 * S_CFFT,
2280   uint16_t N,
2281   uint16_t Nby2,
2282   float32_t normalize);
2283 
2284 
2285   /**
2286    * @brief Processing function for the floating-point DCT4/IDCT4.
2287    * @param[in]     S              points to an instance of the floating-point DCT4/IDCT4 structure.
2288    * @param[in]     pState         points to state buffer.
2289    * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
2290    */
2291   void arm_dct4_f32(
2292   const arm_dct4_instance_f32 * S,
2293   float32_t * pState,
2294   float32_t * pInlineBuffer);
2295 
2296 
2297   /**
2298    * @brief Instance structure for the Q31 DCT4/IDCT4 function.
2299    */
2300   typedef struct
2301   {
2302     uint16_t N;                          /**< length of the DCT4. */
2303     uint16_t Nby2;                       /**< half of the length of the DCT4. */
2304     q31_t normalize;                     /**< normalizing factor. */
2305     q31_t *pTwiddle;                     /**< points to the twiddle factor table. */
2306     q31_t *pCosFactor;                   /**< points to the cosFactor table. */
2307     arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
2308     arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
2309   } arm_dct4_instance_q31;
2310 
2311 
2312   /**
2313    * @brief  Initialization function for the Q31 DCT4/IDCT4.
2314    * @param[in,out] S          points to an instance of Q31 DCT4/IDCT4 structure.
2315    * @param[in]     S_RFFT     points to an instance of Q31 RFFT/RIFFT structure
2316    * @param[in]     S_CFFT     points to an instance of Q31 CFFT/CIFFT structure
2317    * @param[in]     N          length of the DCT4.
2318    * @param[in]     Nby2       half of the length of the DCT4.
2319    * @param[in]     normalize  normalizing factor.
2320    * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2321    */
2322   arm_status arm_dct4_init_q31(
2323   arm_dct4_instance_q31 * S,
2324   arm_rfft_instance_q31 * S_RFFT,
2325   arm_cfft_radix4_instance_q31 * S_CFFT,
2326   uint16_t N,
2327   uint16_t Nby2,
2328   q31_t normalize);
2329 
2330 
2331   /**
2332    * @brief Processing function for the Q31 DCT4/IDCT4.
2333    * @param[in]     S              points to an instance of the Q31 DCT4 structure.
2334    * @param[in]     pState         points to state buffer.
2335    * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
2336    */
2337   void arm_dct4_q31(
2338   const arm_dct4_instance_q31 * S,
2339   q31_t * pState,
2340   q31_t * pInlineBuffer);
2341 
2342 
2343   /**
2344    * @brief Instance structure for the Q15 DCT4/IDCT4 function.
2345    */
2346   typedef struct
2347   {
2348     uint16_t N;                          /**< length of the DCT4. */
2349     uint16_t Nby2;                       /**< half of the length of the DCT4. */
2350     q15_t normalize;                     /**< normalizing factor. */
2351     q15_t *pTwiddle;                     /**< points to the twiddle factor table. */
2352     q15_t *pCosFactor;                   /**< points to the cosFactor table. */
2353     arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
2354     arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
2355   } arm_dct4_instance_q15;
2356 
2357 
2358   /**
2359    * @brief  Initialization function for the Q15 DCT4/IDCT4.
2360    * @param[in,out] S          points to an instance of Q15 DCT4/IDCT4 structure.
2361    * @param[in]     S_RFFT     points to an instance of Q15 RFFT/RIFFT structure.
2362    * @param[in]     S_CFFT     points to an instance of Q15 CFFT/CIFFT structure.
2363    * @param[in]     N          length of the DCT4.
2364    * @param[in]     Nby2       half of the length of the DCT4.
2365    * @param[in]     normalize  normalizing factor.
2366    * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2367    */
2368   arm_status arm_dct4_init_q15(
2369   arm_dct4_instance_q15 * S,
2370   arm_rfft_instance_q15 * S_RFFT,
2371   arm_cfft_radix4_instance_q15 * S_CFFT,
2372   uint16_t N,
2373   uint16_t Nby2,
2374   q15_t normalize);
2375 
2376 
2377   /**
2378    * @brief Processing function for the Q15 DCT4/IDCT4.
2379    * @param[in]     S              points to an instance of the Q15 DCT4 structure.
2380    * @param[in]     pState         points to state buffer.
2381    * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
2382    */
2383   void arm_dct4_q15(
2384   const arm_dct4_instance_q15 * S,
2385   q15_t * pState,
2386   q15_t * pInlineBuffer);
2387 
2388 
2389   /**
2390    * @brief Floating-point vector addition.
2391    * @param[in]  pSrcA      points to the first input vector
2392    * @param[in]  pSrcB      points to the second input vector
2393    * @param[out] pDst       points to the output vector
2394    * @param[in]  blockSize  number of samples in each vector
2395    */
2396   void arm_add_f32(
2397   float32_t * pSrcA,
2398   float32_t * pSrcB,
2399   float32_t * pDst,
2400   uint32_t blockSize);
2401 
2402 
2403   /**
2404    * @brief Q7 vector addition.
2405    * @param[in]  pSrcA      points to the first input vector
2406    * @param[in]  pSrcB      points to the second input vector
2407    * @param[out] pDst       points to the output vector
2408    * @param[in]  blockSize  number of samples in each vector
2409    */
2410   void arm_add_q7(
2411   q7_t * pSrcA,
2412   q7_t * pSrcB,
2413   q7_t * pDst,
2414   uint32_t blockSize);
2415 
2416 
2417   /**
2418    * @brief Q15 vector addition.
2419    * @param[in]  pSrcA      points to the first input vector
2420    * @param[in]  pSrcB      points to the second input vector
2421    * @param[out] pDst       points to the output vector
2422    * @param[in]  blockSize  number of samples in each vector
2423    */
2424   void arm_add_q15(
2425   q15_t * pSrcA,
2426   q15_t * pSrcB,
2427   q15_t * pDst,
2428   uint32_t blockSize);
2429 
2430 
2431   /**
2432    * @brief Q31 vector addition.
2433    * @param[in]  pSrcA      points to the first input vector
2434    * @param[in]  pSrcB      points to the second input vector
2435    * @param[out] pDst       points to the output vector
2436    * @param[in]  blockSize  number of samples in each vector
2437    */
2438   void arm_add_q31(
2439   q31_t * pSrcA,
2440   q31_t * pSrcB,
2441   q31_t * pDst,
2442   uint32_t blockSize);
2443 
2444 
2445   /**
2446    * @brief Floating-point vector subtraction.
2447    * @param[in]  pSrcA      points to the first input vector
2448    * @param[in]  pSrcB      points to the second input vector
2449    * @param[out] pDst       points to the output vector
2450    * @param[in]  blockSize  number of samples in each vector
2451    */
2452   void arm_sub_f32(
2453   float32_t * pSrcA,
2454   float32_t * pSrcB,
2455   float32_t * pDst,
2456   uint32_t blockSize);
2457 
2458 
2459   /**
2460    * @brief Q7 vector subtraction.
2461    * @param[in]  pSrcA      points to the first input vector
2462    * @param[in]  pSrcB      points to the second input vector
2463    * @param[out] pDst       points to the output vector
2464    * @param[in]  blockSize  number of samples in each vector
2465    */
2466   void arm_sub_q7(
2467   q7_t * pSrcA,
2468   q7_t * pSrcB,
2469   q7_t * pDst,
2470   uint32_t blockSize);
2471 
2472 
2473   /**
2474    * @brief Q15 vector subtraction.
2475    * @param[in]  pSrcA      points to the first input vector
2476    * @param[in]  pSrcB      points to the second input vector
2477    * @param[out] pDst       points to the output vector
2478    * @param[in]  blockSize  number of samples in each vector
2479    */
2480   void arm_sub_q15(
2481   q15_t * pSrcA,
2482   q15_t * pSrcB,
2483   q15_t * pDst,
2484   uint32_t blockSize);
2485 
2486 
2487   /**
2488    * @brief Q31 vector subtraction.
2489    * @param[in]  pSrcA      points to the first input vector
2490    * @param[in]  pSrcB      points to the second input vector
2491    * @param[out] pDst       points to the output vector
2492    * @param[in]  blockSize  number of samples in each vector
2493    */
2494   void arm_sub_q31(
2495   q31_t * pSrcA,
2496   q31_t * pSrcB,
2497   q31_t * pDst,
2498   uint32_t blockSize);
2499 
2500 
2501   /**
2502    * @brief Multiplies a floating-point vector by a scalar.
2503    * @param[in]  pSrc       points to the input vector
2504    * @param[in]  scale      scale factor to be applied
2505    * @param[out] pDst       points to the output vector
2506    * @param[in]  blockSize  number of samples in the vector
2507    */
2508   void arm_scale_f32(
2509   float32_t * pSrc,
2510   float32_t scale,
2511   float32_t * pDst,
2512   uint32_t blockSize);
2513 
2514 
2515   /**
2516    * @brief Multiplies a Q7 vector by a scalar.
2517    * @param[in]  pSrc        points to the input vector
2518    * @param[in]  scaleFract  fractional portion of the scale value
2519    * @param[in]  shift       number of bits to shift the result by
2520    * @param[out] pDst        points to the output vector
2521    * @param[in]  blockSize   number of samples in the vector
2522    */
2523   void arm_scale_q7(
2524   q7_t * pSrc,
2525   q7_t scaleFract,
2526   int8_t shift,
2527   q7_t * pDst,
2528   uint32_t blockSize);
2529 
2530 
2531   /**
2532    * @brief Multiplies a Q15 vector by a scalar.
2533    * @param[in]  pSrc        points to the input vector
2534    * @param[in]  scaleFract  fractional portion of the scale value
2535    * @param[in]  shift       number of bits to shift the result by
2536    * @param[out] pDst        points to the output vector
2537    * @param[in]  blockSize   number of samples in the vector
2538    */
2539   void arm_scale_q15(
2540   q15_t * pSrc,
2541   q15_t scaleFract,
2542   int8_t shift,
2543   q15_t * pDst,
2544   uint32_t blockSize);
2545 
2546 
2547   /**
2548    * @brief Multiplies a Q31 vector by a scalar.
2549    * @param[in]  pSrc        points to the input vector
2550    * @param[in]  scaleFract  fractional portion of the scale value
2551    * @param[in]  shift       number of bits to shift the result by
2552    * @param[out] pDst        points to the output vector
2553    * @param[in]  blockSize   number of samples in the vector
2554    */
2555   void arm_scale_q31(
2556   q31_t * pSrc,
2557   q31_t scaleFract,
2558   int8_t shift,
2559   q31_t * pDst,
2560   uint32_t blockSize);
2561 
2562 
2563   /**
2564    * @brief Q7 vector absolute value.
2565    * @param[in]  pSrc       points to the input buffer
2566    * @param[out] pDst       points to the output buffer
2567    * @param[in]  blockSize  number of samples in each vector
2568    */
2569   void arm_abs_q7(
2570   q7_t * pSrc,
2571   q7_t * pDst,
2572   uint32_t blockSize);
2573 
2574 
2575   /**
2576    * @brief Floating-point vector absolute value.
2577    * @param[in]  pSrc       points to the input buffer
2578    * @param[out] pDst       points to the output buffer
2579    * @param[in]  blockSize  number of samples in each vector
2580    */
2581   void arm_abs_f32(
2582   float32_t * pSrc,
2583   float32_t * pDst,
2584   uint32_t blockSize);
2585 
2586 
2587   /**
2588    * @brief Q15 vector absolute value.
2589    * @param[in]  pSrc       points to the input buffer
2590    * @param[out] pDst       points to the output buffer
2591    * @param[in]  blockSize  number of samples in each vector
2592    */
2593   void arm_abs_q15(
2594   q15_t * pSrc,
2595   q15_t * pDst,
2596   uint32_t blockSize);
2597 
2598 
2599   /**
2600    * @brief Q31 vector absolute value.
2601    * @param[in]  pSrc       points to the input buffer
2602    * @param[out] pDst       points to the output buffer
2603    * @param[in]  blockSize  number of samples in each vector
2604    */
2605   void arm_abs_q31(
2606   q31_t * pSrc,
2607   q31_t * pDst,
2608   uint32_t blockSize);
2609 
2610 
2611   /**
2612    * @brief Dot product of floating-point vectors.
2613    * @param[in]  pSrcA      points to the first input vector
2614    * @param[in]  pSrcB      points to the second input vector
2615    * @param[in]  blockSize  number of samples in each vector
2616    * @param[out] result     output result returned here
2617    */
2618   void arm_dot_prod_f32(
2619   float32_t * pSrcA,
2620   float32_t * pSrcB,
2621   uint32_t blockSize,
2622   float32_t * result);
2623 
2624 
2625   /**
2626    * @brief Dot product of Q7 vectors.
2627    * @param[in]  pSrcA      points to the first input vector
2628    * @param[in]  pSrcB      points to the second input vector
2629    * @param[in]  blockSize  number of samples in each vector
2630    * @param[out] result     output result returned here
2631    */
2632   void arm_dot_prod_q7(
2633   q7_t * pSrcA,
2634   q7_t * pSrcB,
2635   uint32_t blockSize,
2636   q31_t * result);
2637 
2638 
2639   /**
2640    * @brief Dot product of Q15 vectors.
2641    * @param[in]  pSrcA      points to the first input vector
2642    * @param[in]  pSrcB      points to the second input vector
2643    * @param[in]  blockSize  number of samples in each vector
2644    * @param[out] result     output result returned here
2645    */
2646   void arm_dot_prod_q15(
2647   q15_t * pSrcA,
2648   q15_t * pSrcB,
2649   uint32_t blockSize,
2650   q63_t * result);
2651 
2652 
2653   /**
2654    * @brief Dot product of Q31 vectors.
2655    * @param[in]  pSrcA      points to the first input vector
2656    * @param[in]  pSrcB      points to the second input vector
2657    * @param[in]  blockSize  number of samples in each vector
2658    * @param[out] result     output result returned here
2659    */
2660   void arm_dot_prod_q31(
2661   q31_t * pSrcA,
2662   q31_t * pSrcB,
2663   uint32_t blockSize,
2664   q63_t * result);
2665 
2666 
2667   /**
2668    * @brief  Shifts the elements of a Q7 vector a specified number of bits.
2669    * @param[in]  pSrc       points to the input vector
2670    * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
2671    * @param[out] pDst       points to the output vector
2672    * @param[in]  blockSize  number of samples in the vector
2673    */
2674   void arm_shift_q7(
2675   q7_t * pSrc,
2676   int8_t shiftBits,
2677   q7_t * pDst,
2678   uint32_t blockSize);
2679 
2680 
2681   /**
2682    * @brief  Shifts the elements of a Q15 vector a specified number of bits.
2683    * @param[in]  pSrc       points to the input vector
2684    * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
2685    * @param[out] pDst       points to the output vector
2686    * @param[in]  blockSize  number of samples in the vector
2687    */
2688   void arm_shift_q15(
2689   q15_t * pSrc,
2690   int8_t shiftBits,
2691   q15_t * pDst,
2692   uint32_t blockSize);
2693 
2694 
2695   /**
2696    * @brief  Shifts the elements of a Q31 vector a specified number of bits.
2697    * @param[in]  pSrc       points to the input vector
2698    * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
2699    * @param[out] pDst       points to the output vector
2700    * @param[in]  blockSize  number of samples in the vector
2701    */
2702   void arm_shift_q31(
2703   q31_t * pSrc,
2704   int8_t shiftBits,
2705   q31_t * pDst,
2706   uint32_t blockSize);
2707 
2708 
2709   /**
2710    * @brief  Adds a constant offset to a floating-point vector.
2711    * @param[in]  pSrc       points to the input vector
2712    * @param[in]  offset     is the offset to be added
2713    * @param[out] pDst       points to the output vector
2714    * @param[in]  blockSize  number of samples in the vector
2715    */
2716   void arm_offset_f32(
2717   float32_t * pSrc,
2718   float32_t offset,
2719   float32_t * pDst,
2720   uint32_t blockSize);
2721 
2722 
2723   /**
2724    * @brief  Adds a constant offset to a Q7 vector.
2725    * @param[in]  pSrc       points to the input vector
2726    * @param[in]  offset     is the offset to be added
2727    * @param[out] pDst       points to the output vector
2728    * @param[in]  blockSize  number of samples in the vector
2729    */
2730   void arm_offset_q7(
2731   q7_t * pSrc,
2732   q7_t offset,
2733   q7_t * pDst,
2734   uint32_t blockSize);
2735 
2736 
2737   /**
2738    * @brief  Adds a constant offset to a Q15 vector.
2739    * @param[in]  pSrc       points to the input vector
2740    * @param[in]  offset     is the offset to be added
2741    * @param[out] pDst       points to the output vector
2742    * @param[in]  blockSize  number of samples in the vector
2743    */
2744   void arm_offset_q15(
2745   q15_t * pSrc,
2746   q15_t offset,
2747   q15_t * pDst,
2748   uint32_t blockSize);
2749 
2750 
2751   /**
2752    * @brief  Adds a constant offset to a Q31 vector.
2753    * @param[in]  pSrc       points to the input vector
2754    * @param[in]  offset     is the offset to be added
2755    * @param[out] pDst       points to the output vector
2756    * @param[in]  blockSize  number of samples in the vector
2757    */
2758   void arm_offset_q31(
2759   q31_t * pSrc,
2760   q31_t offset,
2761   q31_t * pDst,
2762   uint32_t blockSize);
2763 
2764 
2765   /**
2766    * @brief  Negates the elements of a floating-point vector.
2767    * @param[in]  pSrc       points to the input vector
2768    * @param[out] pDst       points to the output vector
2769    * @param[in]  blockSize  number of samples in the vector
2770    */
2771   void arm_negate_f32(
2772   float32_t * pSrc,
2773   float32_t * pDst,
2774   uint32_t blockSize);
2775 
2776 
2777   /**
2778    * @brief  Negates the elements of a Q7 vector.
2779    * @param[in]  pSrc       points to the input vector
2780    * @param[out] pDst       points to the output vector
2781    * @param[in]  blockSize  number of samples in the vector
2782    */
2783   void arm_negate_q7(
2784   q7_t * pSrc,
2785   q7_t * pDst,
2786   uint32_t blockSize);
2787 
2788 
2789   /**
2790    * @brief  Negates the elements of a Q15 vector.
2791    * @param[in]  pSrc       points to the input vector
2792    * @param[out] pDst       points to the output vector
2793    * @param[in]  blockSize  number of samples in the vector
2794    */
2795   void arm_negate_q15(
2796   q15_t * pSrc,
2797   q15_t * pDst,
2798   uint32_t blockSize);
2799 
2800 
2801   /**
2802    * @brief  Negates the elements of a Q31 vector.
2803    * @param[in]  pSrc       points to the input vector
2804    * @param[out] pDst       points to the output vector
2805    * @param[in]  blockSize  number of samples in the vector
2806    */
2807   void arm_negate_q31(
2808   q31_t * pSrc,
2809   q31_t * pDst,
2810   uint32_t blockSize);
2811 
2812 
2813   /**
2814    * @brief  Copies the elements of a floating-point vector.
2815    * @param[in]  pSrc       input pointer
2816    * @param[out] pDst       output pointer
2817    * @param[in]  blockSize  number of samples to process
2818    */
2819   void arm_copy_f32(
2820   float32_t * pSrc,
2821   float32_t * pDst,
2822   uint32_t blockSize);
2823 
2824 
2825   /**
2826    * @brief  Copies the elements of a Q7 vector.
2827    * @param[in]  pSrc       input pointer
2828    * @param[out] pDst       output pointer
2829    * @param[in]  blockSize  number of samples to process
2830    */
2831   void arm_copy_q7(
2832   q7_t * pSrc,
2833   q7_t * pDst,
2834   uint32_t blockSize);
2835 
2836 
2837   /**
2838    * @brief  Copies the elements of a Q15 vector.
2839    * @param[in]  pSrc       input pointer
2840    * @param[out] pDst       output pointer
2841    * @param[in]  blockSize  number of samples to process
2842    */
2843   void arm_copy_q15(
2844   q15_t * pSrc,
2845   q15_t * pDst,
2846   uint32_t blockSize);
2847 
2848 
2849   /**
2850    * @brief  Copies the elements of a Q31 vector.
2851    * @param[in]  pSrc       input pointer
2852    * @param[out] pDst       output pointer
2853    * @param[in]  blockSize  number of samples to process
2854    */
2855   void arm_copy_q31(
2856   q31_t * pSrc,
2857   q31_t * pDst,
2858   uint32_t blockSize);
2859 
2860 
2861   /**
2862    * @brief  Fills a constant value into a floating-point vector.
2863    * @param[in]  value      input value to be filled
2864    * @param[out] pDst       output pointer
2865    * @param[in]  blockSize  number of samples to process
2866    */
2867   void arm_fill_f32(
2868   float32_t value,
2869   float32_t * pDst,
2870   uint32_t blockSize);
2871 
2872 
2873   /**
2874    * @brief  Fills a constant value into a Q7 vector.
2875    * @param[in]  value      input value to be filled
2876    * @param[out] pDst       output pointer
2877    * @param[in]  blockSize  number of samples to process
2878    */
2879   void arm_fill_q7(
2880   q7_t value,
2881   q7_t * pDst,
2882   uint32_t blockSize);
2883 
2884 
2885   /**
2886    * @brief  Fills a constant value into a Q15 vector.
2887    * @param[in]  value      input value to be filled
2888    * @param[out] pDst       output pointer
2889    * @param[in]  blockSize  number of samples to process
2890    */
2891   void arm_fill_q15(
2892   q15_t value,
2893   q15_t * pDst,
2894   uint32_t blockSize);
2895 
2896 
2897   /**
2898    * @brief  Fills a constant value into a Q31 vector.
2899    * @param[in]  value      input value to be filled
2900    * @param[out] pDst       output pointer
2901    * @param[in]  blockSize  number of samples to process
2902    */
2903   void arm_fill_q31(
2904   q31_t value,
2905   q31_t * pDst,
2906   uint32_t blockSize);
2907 
2908 
2909 /**
2910  * @brief Convolution of floating-point sequences.
2911  * @param[in]  pSrcA    points to the first input sequence.
2912  * @param[in]  srcALen  length of the first input sequence.
2913  * @param[in]  pSrcB    points to the second input sequence.
2914  * @param[in]  srcBLen  length of the second input sequence.
2915  * @param[out] pDst     points to the location where the output result is written.  Length srcALen+srcBLen-1.
2916  */
2917   void arm_conv_f32(
2918   float32_t * pSrcA,
2919   uint32_t srcALen,
2920   float32_t * pSrcB,
2921   uint32_t srcBLen,
2922   float32_t * pDst);
2923 
2924 
2925   /**
2926    * @brief Convolution of Q15 sequences.
2927    * @param[in]  pSrcA      points to the first input sequence.
2928    * @param[in]  srcALen    length of the first input sequence.
2929    * @param[in]  pSrcB      points to the second input sequence.
2930    * @param[in]  srcBLen    length of the second input sequence.
2931    * @param[out] pDst       points to the block of output data  Length srcALen+srcBLen-1.
2932    * @param[in]  pScratch1  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
2933    * @param[in]  pScratch2  points to scratch buffer of size min(srcALen, srcBLen).
2934    */
2935   void arm_conv_opt_q15(
2936   q15_t * pSrcA,
2937   uint32_t srcALen,
2938   q15_t * pSrcB,
2939   uint32_t srcBLen,
2940   q15_t * pDst,
2941   q15_t * pScratch1,
2942   q15_t * pScratch2);
2943 
2944 
2945 /**
2946  * @brief Convolution of Q15 sequences.
2947  * @param[in]  pSrcA    points to the first input sequence.
2948  * @param[in]  srcALen  length of the first input sequence.
2949  * @param[in]  pSrcB    points to the second input sequence.
2950  * @param[in]  srcBLen  length of the second input sequence.
2951  * @param[out] pDst     points to the location where the output result is written.  Length srcALen+srcBLen-1.
2952  */
2953   void arm_conv_q15(
2954   q15_t * pSrcA,
2955   uint32_t srcALen,
2956   q15_t * pSrcB,
2957   uint32_t srcBLen,
2958   q15_t * pDst);
2959 
2960 
2961   /**
2962    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
2963    * @param[in]  pSrcA    points to the first input sequence.
2964    * @param[in]  srcALen  length of the first input sequence.
2965    * @param[in]  pSrcB    points to the second input sequence.
2966    * @param[in]  srcBLen  length of the second input sequence.
2967    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
2968    */
2969   void arm_conv_fast_q15(
2970           q15_t * pSrcA,
2971           uint32_t srcALen,
2972           q15_t * pSrcB,
2973           uint32_t srcBLen,
2974           q15_t * pDst);
2975 
2976 
2977   /**
2978    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
2979    * @param[in]  pSrcA      points to the first input sequence.
2980    * @param[in]  srcALen    length of the first input sequence.
2981    * @param[in]  pSrcB      points to the second input sequence.
2982    * @param[in]  srcBLen    length of the second input sequence.
2983    * @param[out] pDst       points to the block of output data  Length srcALen+srcBLen-1.
2984    * @param[in]  pScratch1  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
2985    * @param[in]  pScratch2  points to scratch buffer of size min(srcALen, srcBLen).
2986    */
2987   void arm_conv_fast_opt_q15(
2988   q15_t * pSrcA,
2989   uint32_t srcALen,
2990   q15_t * pSrcB,
2991   uint32_t srcBLen,
2992   q15_t * pDst,
2993   q15_t * pScratch1,
2994   q15_t * pScratch2);
2995 
2996 
2997   /**
2998    * @brief Convolution of Q31 sequences.
2999    * @param[in]  pSrcA    points to the first input sequence.
3000    * @param[in]  srcALen  length of the first input sequence.
3001    * @param[in]  pSrcB    points to the second input sequence.
3002    * @param[in]  srcBLen  length of the second input sequence.
3003    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
3004    */
3005   void arm_conv_q31(
3006   q31_t * pSrcA,
3007   uint32_t srcALen,
3008   q31_t * pSrcB,
3009   uint32_t srcBLen,
3010   q31_t * pDst);
3011 
3012 
3013   /**
3014    * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3015    * @param[in]  pSrcA    points to the first input sequence.
3016    * @param[in]  srcALen  length of the first input sequence.
3017    * @param[in]  pSrcB    points to the second input sequence.
3018    * @param[in]  srcBLen  length of the second input sequence.
3019    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
3020    */
3021   void arm_conv_fast_q31(
3022   q31_t * pSrcA,
3023   uint32_t srcALen,
3024   q31_t * pSrcB,
3025   uint32_t srcBLen,
3026   q31_t * pDst);
3027 
3028 
3029     /**
3030    * @brief Convolution of Q7 sequences.
3031    * @param[in]  pSrcA      points to the first input sequence.
3032    * @param[in]  srcALen    length of the first input sequence.
3033    * @param[in]  pSrcB      points to the second input sequence.
3034    * @param[in]  srcBLen    length of the second input sequence.
3035    * @param[out] pDst       points to the block of output data  Length srcALen+srcBLen-1.
3036    * @param[in]  pScratch1  points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3037    * @param[in]  pScratch2  points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
3038    */
3039   void arm_conv_opt_q7(
3040   q7_t * pSrcA,
3041   uint32_t srcALen,
3042   q7_t * pSrcB,
3043   uint32_t srcBLen,
3044   q7_t * pDst,
3045   q15_t * pScratch1,
3046   q15_t * pScratch2);
3047 
3048 
3049   /**
3050    * @brief Convolution of Q7 sequences.
3051    * @param[in]  pSrcA    points to the first input sequence.
3052    * @param[in]  srcALen  length of the first input sequence.
3053    * @param[in]  pSrcB    points to the second input sequence.
3054    * @param[in]  srcBLen  length of the second input sequence.
3055    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
3056    */
3057   void arm_conv_q7(
3058   q7_t * pSrcA,
3059   uint32_t srcALen,
3060   q7_t * pSrcB,
3061   uint32_t srcBLen,
3062   q7_t * pDst);
3063 
3064 
3065   /**
3066    * @brief Partial convolution of floating-point sequences.
3067    * @param[in]  pSrcA       points to the first input sequence.
3068    * @param[in]  srcALen     length of the first input sequence.
3069    * @param[in]  pSrcB       points to the second input sequence.
3070    * @param[in]  srcBLen     length of the second input sequence.
3071    * @param[out] pDst        points to the block of output data
3072    * @param[in]  firstIndex  is the first output sample to start with.
3073    * @param[in]  numPoints   is the number of output points to be computed.
3074    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3075    */
3076   arm_status arm_conv_partial_f32(
3077   float32_t * pSrcA,
3078   uint32_t srcALen,
3079   float32_t * pSrcB,
3080   uint32_t srcBLen,
3081   float32_t * pDst,
3082   uint32_t firstIndex,
3083   uint32_t numPoints);
3084 
3085 
3086   /**
3087    * @brief Partial convolution of Q15 sequences.
3088    * @param[in]  pSrcA       points to the first input sequence.
3089    * @param[in]  srcALen     length of the first input sequence.
3090    * @param[in]  pSrcB       points to the second input sequence.
3091    * @param[in]  srcBLen     length of the second input sequence.
3092    * @param[out] pDst        points to the block of output data
3093    * @param[in]  firstIndex  is the first output sample to start with.
3094    * @param[in]  numPoints   is the number of output points to be computed.
3095    * @param[in]  pScratch1   points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3096    * @param[in]  pScratch2   points to scratch buffer of size min(srcALen, srcBLen).
3097    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3098    */
3099   arm_status arm_conv_partial_opt_q15(
3100   q15_t * pSrcA,
3101   uint32_t srcALen,
3102   q15_t * pSrcB,
3103   uint32_t srcBLen,
3104   q15_t * pDst,
3105   uint32_t firstIndex,
3106   uint32_t numPoints,
3107   q15_t * pScratch1,
3108   q15_t * pScratch2);
3109 
3110 
3111   /**
3112    * @brief Partial convolution of Q15 sequences.
3113    * @param[in]  pSrcA       points to the first input sequence.
3114    * @param[in]  srcALen     length of the first input sequence.
3115    * @param[in]  pSrcB       points to the second input sequence.
3116    * @param[in]  srcBLen     length of the second input sequence.
3117    * @param[out] pDst        points to the block of output data
3118    * @param[in]  firstIndex  is the first output sample to start with.
3119    * @param[in]  numPoints   is the number of output points to be computed.
3120    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3121    */
3122   arm_status arm_conv_partial_q15(
3123   q15_t * pSrcA,
3124   uint32_t srcALen,
3125   q15_t * pSrcB,
3126   uint32_t srcBLen,
3127   q15_t * pDst,
3128   uint32_t firstIndex,
3129   uint32_t numPoints);
3130 
3131 
3132   /**
3133    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3134    * @param[in]  pSrcA       points to the first input sequence.
3135    * @param[in]  srcALen     length of the first input sequence.
3136    * @param[in]  pSrcB       points to the second input sequence.
3137    * @param[in]  srcBLen     length of the second input sequence.
3138    * @param[out] pDst        points to the block of output data
3139    * @param[in]  firstIndex  is the first output sample to start with.
3140    * @param[in]  numPoints   is the number of output points to be computed.
3141    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3142    */
3143   arm_status arm_conv_partial_fast_q15(
3144   q15_t * pSrcA,
3145   uint32_t srcALen,
3146   q15_t * pSrcB,
3147   uint32_t srcBLen,
3148   q15_t * pDst,
3149   uint32_t firstIndex,
3150   uint32_t numPoints);
3151 
3152 
3153   /**
3154    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3155    * @param[in]  pSrcA       points to the first input sequence.
3156    * @param[in]  srcALen     length of the first input sequence.
3157    * @param[in]  pSrcB       points to the second input sequence.
3158    * @param[in]  srcBLen     length of the second input sequence.
3159    * @param[out] pDst        points to the block of output data
3160    * @param[in]  firstIndex  is the first output sample to start with.
3161    * @param[in]  numPoints   is the number of output points to be computed.
3162    * @param[in]  pScratch1   points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3163    * @param[in]  pScratch2   points to scratch buffer of size min(srcALen, srcBLen).
3164    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3165    */
3166   arm_status arm_conv_partial_fast_opt_q15(
3167   q15_t * pSrcA,
3168   uint32_t srcALen,
3169   q15_t * pSrcB,
3170   uint32_t srcBLen,
3171   q15_t * pDst,
3172   uint32_t firstIndex,
3173   uint32_t numPoints,
3174   q15_t * pScratch1,
3175   q15_t * pScratch2);
3176 
3177 
3178   /**
3179    * @brief Partial convolution of Q31 sequences.
3180    * @param[in]  pSrcA       points to the first input sequence.
3181    * @param[in]  srcALen     length of the first input sequence.
3182    * @param[in]  pSrcB       points to the second input sequence.
3183    * @param[in]  srcBLen     length of the second input sequence.
3184    * @param[out] pDst        points to the block of output data
3185    * @param[in]  firstIndex  is the first output sample to start with.
3186    * @param[in]  numPoints   is the number of output points to be computed.
3187    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3188    */
3189   arm_status arm_conv_partial_q31(
3190   q31_t * pSrcA,
3191   uint32_t srcALen,
3192   q31_t * pSrcB,
3193   uint32_t srcBLen,
3194   q31_t * pDst,
3195   uint32_t firstIndex,
3196   uint32_t numPoints);
3197 
3198 
3199   /**
3200    * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3201    * @param[in]  pSrcA       points to the first input sequence.
3202    * @param[in]  srcALen     length of the first input sequence.
3203    * @param[in]  pSrcB       points to the second input sequence.
3204    * @param[in]  srcBLen     length of the second input sequence.
3205    * @param[out] pDst        points to the block of output data
3206    * @param[in]  firstIndex  is the first output sample to start with.
3207    * @param[in]  numPoints   is the number of output points to be computed.
3208    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3209    */
3210   arm_status arm_conv_partial_fast_q31(
3211   q31_t * pSrcA,
3212   uint32_t srcALen,
3213   q31_t * pSrcB,
3214   uint32_t srcBLen,
3215   q31_t * pDst,
3216   uint32_t firstIndex,
3217   uint32_t numPoints);
3218 
3219 
3220   /**
3221    * @brief Partial convolution of Q7 sequences
3222    * @param[in]  pSrcA       points to the first input sequence.
3223    * @param[in]  srcALen     length of the first input sequence.
3224    * @param[in]  pSrcB       points to the second input sequence.
3225    * @param[in]  srcBLen     length of the second input sequence.
3226    * @param[out] pDst        points to the block of output data
3227    * @param[in]  firstIndex  is the first output sample to start with.
3228    * @param[in]  numPoints   is the number of output points to be computed.
3229    * @param[in]  pScratch1   points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3230    * @param[in]  pScratch2   points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
3231    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3232    */
3233   arm_status arm_conv_partial_opt_q7(
3234   q7_t * pSrcA,
3235   uint32_t srcALen,
3236   q7_t * pSrcB,
3237   uint32_t srcBLen,
3238   q7_t * pDst,
3239   uint32_t firstIndex,
3240   uint32_t numPoints,
3241   q15_t * pScratch1,
3242   q15_t * pScratch2);
3243 
3244 
3245 /**
3246    * @brief Partial convolution of Q7 sequences.
3247    * @param[in]  pSrcA       points to the first input sequence.
3248    * @param[in]  srcALen     length of the first input sequence.
3249    * @param[in]  pSrcB       points to the second input sequence.
3250    * @param[in]  srcBLen     length of the second input sequence.
3251    * @param[out] pDst        points to the block of output data
3252    * @param[in]  firstIndex  is the first output sample to start with.
3253    * @param[in]  numPoints   is the number of output points to be computed.
3254    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3255    */
3256   arm_status arm_conv_partial_q7(
3257   q7_t * pSrcA,
3258   uint32_t srcALen,
3259   q7_t * pSrcB,
3260   uint32_t srcBLen,
3261   q7_t * pDst,
3262   uint32_t firstIndex,
3263   uint32_t numPoints);
3264 
3265 
3266   /**
3267    * @brief Instance structure for the Q15 FIR decimator.
3268    */
3269   typedef struct
3270   {
3271     uint8_t M;                  /**< decimation factor. */
3272     uint16_t numTaps;           /**< number of coefficients in the filter. */
3273     q15_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
3274     q15_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3275   } arm_fir_decimate_instance_q15;
3276 
3277   /**
3278    * @brief Instance structure for the Q31 FIR decimator.
3279    */
3280   typedef struct
3281   {
3282     uint8_t M;                  /**< decimation factor. */
3283     uint16_t numTaps;           /**< number of coefficients in the filter. */
3284     q31_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
3285     q31_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3286   } arm_fir_decimate_instance_q31;
3287 
3288   /**
3289    * @brief Instance structure for the floating-point FIR decimator.
3290    */
3291   typedef struct
3292   {
3293     uint8_t M;                  /**< decimation factor. */
3294     uint16_t numTaps;           /**< number of coefficients in the filter. */
3295     float32_t *pCoeffs;         /**< points to the coefficient array. The array is of length numTaps.*/
3296     float32_t *pState;          /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3297   } arm_fir_decimate_instance_f32;
3298 
3299 
3300   /**
3301    * @brief Processing function for the floating-point FIR decimator.
3302    * @param[in]  S          points to an instance of the floating-point FIR decimator structure.
3303    * @param[in]  pSrc       points to the block of input data.
3304    * @param[out] pDst       points to the block of output data
3305    * @param[in]  blockSize  number of input samples to process per call.
3306    */
3307   void arm_fir_decimate_f32(
3308   const arm_fir_decimate_instance_f32 * S,
3309   float32_t * pSrc,
3310   float32_t * pDst,
3311   uint32_t blockSize);
3312 
3313 
3314   /**
3315    * @brief  Initialization function for the floating-point FIR decimator.
3316    * @param[in,out] S          points to an instance of the floating-point FIR decimator structure.
3317    * @param[in]     numTaps    number of coefficients in the filter.
3318    * @param[in]     M          decimation factor.
3319    * @param[in]     pCoeffs    points to the filter coefficients.
3320    * @param[in]     pState     points to the state buffer.
3321    * @param[in]     blockSize  number of input samples to process per call.
3322    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3323    * <code>blockSize</code> is not a multiple of <code>M</code>.
3324    */
3325   arm_status arm_fir_decimate_init_f32(
3326   arm_fir_decimate_instance_f32 * S,
3327   uint16_t numTaps,
3328   uint8_t M,
3329   float32_t * pCoeffs,
3330   float32_t * pState,
3331   uint32_t blockSize);
3332 
3333 
3334   /**
3335    * @brief Processing function for the Q15 FIR decimator.
3336    * @param[in]  S          points to an instance of the Q15 FIR decimator structure.
3337    * @param[in]  pSrc       points to the block of input data.
3338    * @param[out] pDst       points to the block of output data
3339    * @param[in]  blockSize  number of input samples to process per call.
3340    */
3341   void arm_fir_decimate_q15(
3342   const arm_fir_decimate_instance_q15 * S,
3343   q15_t * pSrc,
3344   q15_t * pDst,
3345   uint32_t blockSize);
3346 
3347 
3348   /**
3349    * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3350    * @param[in]  S          points to an instance of the Q15 FIR decimator structure.
3351    * @param[in]  pSrc       points to the block of input data.
3352    * @param[out] pDst       points to the block of output data
3353    * @param[in]  blockSize  number of input samples to process per call.
3354    */
3355   void arm_fir_decimate_fast_q15(
3356   const arm_fir_decimate_instance_q15 * S,
3357   q15_t * pSrc,
3358   q15_t * pDst,
3359   uint32_t blockSize);
3360 
3361 
3362   /**
3363    * @brief  Initialization function for the Q15 FIR decimator.
3364    * @param[in,out] S          points to an instance of the Q15 FIR decimator structure.
3365    * @param[in]     numTaps    number of coefficients in the filter.
3366    * @param[in]     M          decimation factor.
3367    * @param[in]     pCoeffs    points to the filter coefficients.
3368    * @param[in]     pState     points to the state buffer.
3369    * @param[in]     blockSize  number of input samples to process per call.
3370    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3371    * <code>blockSize</code> is not a multiple of <code>M</code>.
3372    */
3373   arm_status arm_fir_decimate_init_q15(
3374   arm_fir_decimate_instance_q15 * S,
3375   uint16_t numTaps,
3376   uint8_t M,
3377   q15_t * pCoeffs,
3378   q15_t * pState,
3379   uint32_t blockSize);
3380 
3381 
3382   /**
3383    * @brief Processing function for the Q31 FIR decimator.
3384    * @param[in]  S     points to an instance of the Q31 FIR decimator structure.
3385    * @param[in]  pSrc  points to the block of input data.
3386    * @param[out] pDst  points to the block of output data
3387    * @param[in] blockSize number of input samples to process per call.
3388    */
3389   void arm_fir_decimate_q31(
3390   const arm_fir_decimate_instance_q31 * S,
3391   q31_t * pSrc,
3392   q31_t * pDst,
3393   uint32_t blockSize);
3394 
3395   /**
3396    * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3397    * @param[in]  S          points to an instance of the Q31 FIR decimator structure.
3398    * @param[in]  pSrc       points to the block of input data.
3399    * @param[out] pDst       points to the block of output data
3400    * @param[in]  blockSize  number of input samples to process per call.
3401    */
3402   void arm_fir_decimate_fast_q31(
3403   arm_fir_decimate_instance_q31 * S,
3404   q31_t * pSrc,
3405   q31_t * pDst,
3406   uint32_t blockSize);
3407 
3408 
3409   /**
3410    * @brief  Initialization function for the Q31 FIR decimator.
3411    * @param[in,out] S          points to an instance of the Q31 FIR decimator structure.
3412    * @param[in]     numTaps    number of coefficients in the filter.
3413    * @param[in]     M          decimation factor.
3414    * @param[in]     pCoeffs    points to the filter coefficients.
3415    * @param[in]     pState     points to the state buffer.
3416    * @param[in]     blockSize  number of input samples to process per call.
3417    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3418    * <code>blockSize</code> is not a multiple of <code>M</code>.
3419    */
3420   arm_status arm_fir_decimate_init_q31(
3421   arm_fir_decimate_instance_q31 * S,
3422   uint16_t numTaps,
3423   uint8_t M,
3424   q31_t * pCoeffs,
3425   q31_t * pState,
3426   uint32_t blockSize);
3427 
3428 
3429   /**
3430    * @brief Instance structure for the Q15 FIR interpolator.
3431    */
3432   typedef struct
3433   {
3434     uint8_t L;                      /**< upsample factor. */
3435     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3436     q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
3437     q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3438   } arm_fir_interpolate_instance_q15;
3439 
3440   /**
3441    * @brief Instance structure for the Q31 FIR interpolator.
3442    */
3443   typedef struct
3444   {
3445     uint8_t L;                      /**< upsample factor. */
3446     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3447     q31_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
3448     q31_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3449   } arm_fir_interpolate_instance_q31;
3450 
3451   /**
3452    * @brief Instance structure for the floating-point FIR interpolator.
3453    */
3454   typedef struct
3455   {
3456     uint8_t L;                     /**< upsample factor. */
3457     uint16_t phaseLength;          /**< length of each polyphase filter component. */
3458     float32_t *pCoeffs;            /**< points to the coefficient array. The array is of length L*phaseLength. */
3459     float32_t *pState;             /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
3460   } arm_fir_interpolate_instance_f32;
3461 
3462 
3463   /**
3464    * @brief Processing function for the Q15 FIR interpolator.
3465    * @param[in]  S          points to an instance of the Q15 FIR interpolator structure.
3466    * @param[in]  pSrc       points to the block of input data.
3467    * @param[out] pDst       points to the block of output data.
3468    * @param[in]  blockSize  number of input samples to process per call.
3469    */
3470   void arm_fir_interpolate_q15(
3471   const arm_fir_interpolate_instance_q15 * S,
3472   q15_t * pSrc,
3473   q15_t * pDst,
3474   uint32_t blockSize);
3475 
3476 
3477   /**
3478    * @brief  Initialization function for the Q15 FIR interpolator.
3479    * @param[in,out] S          points to an instance of the Q15 FIR interpolator structure.
3480    * @param[in]     L          upsample factor.
3481    * @param[in]     numTaps    number of filter coefficients in the filter.
3482    * @param[in]     pCoeffs    points to the filter coefficient buffer.
3483    * @param[in]     pState     points to the state buffer.
3484    * @param[in]     blockSize  number of input samples to process per call.
3485    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3486    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3487    */
3488   arm_status arm_fir_interpolate_init_q15(
3489   arm_fir_interpolate_instance_q15 * S,
3490   uint8_t L,
3491   uint16_t numTaps,
3492   q15_t * pCoeffs,
3493   q15_t * pState,
3494   uint32_t blockSize);
3495 
3496 
3497   /**
3498    * @brief Processing function for the Q31 FIR interpolator.
3499    * @param[in]  S          points to an instance of the Q15 FIR interpolator structure.
3500    * @param[in]  pSrc       points to the block of input data.
3501    * @param[out] pDst       points to the block of output data.
3502    * @param[in]  blockSize  number of input samples to process per call.
3503    */
3504   void arm_fir_interpolate_q31(
3505   const arm_fir_interpolate_instance_q31 * S,
3506   q31_t * pSrc,
3507   q31_t * pDst,
3508   uint32_t blockSize);
3509 
3510 
3511   /**
3512    * @brief  Initialization function for the Q31 FIR interpolator.
3513    * @param[in,out] S          points to an instance of the Q31 FIR interpolator structure.
3514    * @param[in]     L          upsample factor.
3515    * @param[in]     numTaps    number of filter coefficients in the filter.
3516    * @param[in]     pCoeffs    points to the filter coefficient buffer.
3517    * @param[in]     pState     points to the state buffer.
3518    * @param[in]     blockSize  number of input samples to process per call.
3519    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3520    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3521    */
3522   arm_status arm_fir_interpolate_init_q31(
3523   arm_fir_interpolate_instance_q31 * S,
3524   uint8_t L,
3525   uint16_t numTaps,
3526   q31_t * pCoeffs,
3527   q31_t * pState,
3528   uint32_t blockSize);
3529 
3530 
3531   /**
3532    * @brief Processing function for the floating-point FIR interpolator.
3533    * @param[in]  S          points to an instance of the floating-point FIR interpolator structure.
3534    * @param[in]  pSrc       points to the block of input data.
3535    * @param[out] pDst       points to the block of output data.
3536    * @param[in]  blockSize  number of input samples to process per call.
3537    */
3538   void arm_fir_interpolate_f32(
3539   const arm_fir_interpolate_instance_f32 * S,
3540   float32_t * pSrc,
3541   float32_t * pDst,
3542   uint32_t blockSize);
3543 
3544 
3545   /**
3546    * @brief  Initialization function for the floating-point FIR interpolator.
3547    * @param[in,out] S          points to an instance of the floating-point FIR interpolator structure.
3548    * @param[in]     L          upsample factor.
3549    * @param[in]     numTaps    number of filter coefficients in the filter.
3550    * @param[in]     pCoeffs    points to the filter coefficient buffer.
3551    * @param[in]     pState     points to the state buffer.
3552    * @param[in]     blockSize  number of input samples to process per call.
3553    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3554    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3555    */
3556   arm_status arm_fir_interpolate_init_f32(
3557   arm_fir_interpolate_instance_f32 * S,
3558   uint8_t L,
3559   uint16_t numTaps,
3560   float32_t * pCoeffs,
3561   float32_t * pState,
3562   uint32_t blockSize);
3563 
3564 
3565   /**
3566    * @brief Instance structure for the high precision Q31 Biquad cascade filter.
3567    */
3568   typedef struct
3569   {
3570     uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3571     q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
3572     q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
3573     uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
3574   } arm_biquad_cas_df1_32x64_ins_q31;
3575 
3576 
3577   /**
3578    * @param[in]  S          points to an instance of the high precision Q31 Biquad cascade filter structure.
3579    * @param[in]  pSrc       points to the block of input data.
3580    * @param[out] pDst       points to the block of output data
3581    * @param[in]  blockSize  number of samples to process.
3582    */
3583   void arm_biquad_cas_df1_32x64_q31(
3584   const arm_biquad_cas_df1_32x64_ins_q31 * S,
3585   q31_t * pSrc,
3586   q31_t * pDst,
3587   uint32_t blockSize);
3588 
3589 
3590   /**
3591    * @param[in,out] S          points to an instance of the high precision Q31 Biquad cascade filter structure.
3592    * @param[in]     numStages  number of 2nd order stages in the filter.
3593    * @param[in]     pCoeffs    points to the filter coefficients.
3594    * @param[in]     pState     points to the state buffer.
3595    * @param[in]     postShift  shift to be applied to the output. Varies according to the coefficients format
3596    */
3597   void arm_biquad_cas_df1_32x64_init_q31(
3598   arm_biquad_cas_df1_32x64_ins_q31 * S,
3599   uint8_t numStages,
3600   q31_t * pCoeffs,
3601   q63_t * pState,
3602   uint8_t postShift);
3603 
3604 
3605   /**
3606    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3607    */
3608   typedef struct
3609   {
3610     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3611     float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
3612     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3613   } arm_biquad_cascade_df2T_instance_f32;
3614 
3615   /**
3616    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3617    */
3618   typedef struct
3619   {
3620     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3621     float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 4*numStages. */
3622     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3623   } arm_biquad_cascade_stereo_df2T_instance_f32;
3624 
3625   /**
3626    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3627    */
3628   typedef struct
3629   {
3630     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3631     float64_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
3632     float64_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3633   } arm_biquad_cascade_df2T_instance_f64;
3634 
3635 
3636   /**
3637    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
3638    * @param[in]  S          points to an instance of the filter data structure.
3639    * @param[in]  pSrc       points to the block of input data.
3640    * @param[out] pDst       points to the block of output data
3641    * @param[in]  blockSize  number of samples to process.
3642    */
3643   void arm_biquad_cascade_df2T_f32(
3644   const arm_biquad_cascade_df2T_instance_f32 * S,
3645   float32_t * pSrc,
3646   float32_t * pDst,
3647   uint32_t blockSize);
3648 
3649 
3650   /**
3651    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 2 channels
3652    * @param[in]  S          points to an instance of the filter data structure.
3653    * @param[in]  pSrc       points to the block of input data.
3654    * @param[out] pDst       points to the block of output data
3655    * @param[in]  blockSize  number of samples to process.
3656    */
3657   void arm_biquad_cascade_stereo_df2T_f32(
3658   const arm_biquad_cascade_stereo_df2T_instance_f32 * S,
3659   float32_t * pSrc,
3660   float32_t * pDst,
3661   uint32_t blockSize);
3662 
3663 
3664   /**
3665    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
3666    * @param[in]  S          points to an instance of the filter data structure.
3667    * @param[in]  pSrc       points to the block of input data.
3668    * @param[out] pDst       points to the block of output data
3669    * @param[in]  blockSize  number of samples to process.
3670    */
3671   void arm_biquad_cascade_df2T_f64(
3672   const arm_biquad_cascade_df2T_instance_f64 * S,
3673   float64_t * pSrc,
3674   float64_t * pDst,
3675   uint32_t blockSize);
3676 
3677 
3678   /**
3679    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3680    * @param[in,out] S          points to an instance of the filter data structure.
3681    * @param[in]     numStages  number of 2nd order stages in the filter.
3682    * @param[in]     pCoeffs    points to the filter coefficients.
3683    * @param[in]     pState     points to the state buffer.
3684    */
3685   void arm_biquad_cascade_df2T_init_f32(
3686   arm_biquad_cascade_df2T_instance_f32 * S,
3687   uint8_t numStages,
3688   float32_t * pCoeffs,
3689   float32_t * pState);
3690 
3691 
3692   /**
3693    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3694    * @param[in,out] S          points to an instance of the filter data structure.
3695    * @param[in]     numStages  number of 2nd order stages in the filter.
3696    * @param[in]     pCoeffs    points to the filter coefficients.
3697    * @param[in]     pState     points to the state buffer.
3698    */
3699   void arm_biquad_cascade_stereo_df2T_init_f32(
3700   arm_biquad_cascade_stereo_df2T_instance_f32 * S,
3701   uint8_t numStages,
3702   float32_t * pCoeffs,
3703   float32_t * pState);
3704 
3705 
3706   /**
3707    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3708    * @param[in,out] S          points to an instance of the filter data structure.
3709    * @param[in]     numStages  number of 2nd order stages in the filter.
3710    * @param[in]     pCoeffs    points to the filter coefficients.
3711    * @param[in]     pState     points to the state buffer.
3712    */
3713   void arm_biquad_cascade_df2T_init_f64(
3714   arm_biquad_cascade_df2T_instance_f64 * S,
3715   uint8_t numStages,
3716   float64_t * pCoeffs,
3717   float64_t * pState);
3718 
3719 
3720   /**
3721    * @brief Instance structure for the Q15 FIR lattice filter.
3722    */
3723   typedef struct
3724   {
3725     uint16_t numStages;                  /**< number of filter stages. */
3726     q15_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
3727     q15_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
3728   } arm_fir_lattice_instance_q15;
3729 
3730   /**
3731    * @brief Instance structure for the Q31 FIR lattice filter.
3732    */
3733   typedef struct
3734   {
3735     uint16_t numStages;                  /**< number of filter stages. */
3736     q31_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
3737     q31_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
3738   } arm_fir_lattice_instance_q31;
3739 
3740   /**
3741    * @brief Instance structure for the floating-point FIR lattice filter.
3742    */
3743   typedef struct
3744   {
3745     uint16_t numStages;                  /**< number of filter stages. */
3746     float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
3747     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
3748   } arm_fir_lattice_instance_f32;
3749 
3750 
3751   /**
3752    * @brief Initialization function for the Q15 FIR lattice filter.
3753    * @param[in] S          points to an instance of the Q15 FIR lattice structure.
3754    * @param[in] numStages  number of filter stages.
3755    * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
3756    * @param[in] pState     points to the state buffer.  The array is of length numStages.
3757    */
3758   void arm_fir_lattice_init_q15(
3759   arm_fir_lattice_instance_q15 * S,
3760   uint16_t numStages,
3761   q15_t * pCoeffs,
3762   q15_t * pState);
3763 
3764 
3765   /**
3766    * @brief Processing function for the Q15 FIR lattice filter.
3767    * @param[in]  S          points to an instance of the Q15 FIR lattice structure.
3768    * @param[in]  pSrc       points to the block of input data.
3769    * @param[out] pDst       points to the block of output data.
3770    * @param[in]  blockSize  number of samples to process.
3771    */
3772   void arm_fir_lattice_q15(
3773   const arm_fir_lattice_instance_q15 * S,
3774   q15_t * pSrc,
3775   q15_t * pDst,
3776   uint32_t blockSize);
3777 
3778 
3779   /**
3780    * @brief Initialization function for the Q31 FIR lattice filter.
3781    * @param[in] S          points to an instance of the Q31 FIR lattice structure.
3782    * @param[in] numStages  number of filter stages.
3783    * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
3784    * @param[in] pState     points to the state buffer.   The array is of length numStages.
3785    */
3786   void arm_fir_lattice_init_q31(
3787   arm_fir_lattice_instance_q31 * S,
3788   uint16_t numStages,
3789   q31_t * pCoeffs,
3790   q31_t * pState);
3791 
3792 
3793   /**
3794    * @brief Processing function for the Q31 FIR lattice filter.
3795    * @param[in]  S          points to an instance of the Q31 FIR lattice structure.
3796    * @param[in]  pSrc       points to the block of input data.
3797    * @param[out] pDst       points to the block of output data
3798    * @param[in]  blockSize  number of samples to process.
3799    */
3800   void arm_fir_lattice_q31(
3801   const arm_fir_lattice_instance_q31 * S,
3802   q31_t * pSrc,
3803   q31_t * pDst,
3804   uint32_t blockSize);
3805 
3806 
3807 /**
3808  * @brief Initialization function for the floating-point FIR lattice filter.
3809  * @param[in] S          points to an instance of the floating-point FIR lattice structure.
3810  * @param[in] numStages  number of filter stages.
3811  * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
3812  * @param[in] pState     points to the state buffer.  The array is of length numStages.
3813  */
3814   void arm_fir_lattice_init_f32(
3815   arm_fir_lattice_instance_f32 * S,
3816   uint16_t numStages,
3817   float32_t * pCoeffs,
3818   float32_t * pState);
3819 
3820 
3821   /**
3822    * @brief Processing function for the floating-point FIR lattice filter.
3823    * @param[in]  S          points to an instance of the floating-point FIR lattice structure.
3824    * @param[in]  pSrc       points to the block of input data.
3825    * @param[out] pDst       points to the block of output data
3826    * @param[in]  blockSize  number of samples to process.
3827    */
3828   void arm_fir_lattice_f32(
3829   const arm_fir_lattice_instance_f32 * S,
3830   float32_t * pSrc,
3831   float32_t * pDst,
3832   uint32_t blockSize);
3833 
3834 
3835   /**
3836    * @brief Instance structure for the Q15 IIR lattice filter.
3837    */
3838   typedef struct
3839   {
3840     uint16_t numStages;                  /**< number of stages in the filter. */
3841     q15_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
3842     q15_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
3843     q15_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
3844   } arm_iir_lattice_instance_q15;
3845 
3846   /**
3847    * @brief Instance structure for the Q31 IIR lattice filter.
3848    */
3849   typedef struct
3850   {
3851     uint16_t numStages;                  /**< number of stages in the filter. */
3852     q31_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
3853     q31_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
3854     q31_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
3855   } arm_iir_lattice_instance_q31;
3856 
3857   /**
3858    * @brief Instance structure for the floating-point IIR lattice filter.
3859    */
3860   typedef struct
3861   {
3862     uint16_t numStages;                  /**< number of stages in the filter. */
3863     float32_t *pState;                   /**< points to the state variable array. The array is of length numStages+blockSize. */
3864     float32_t *pkCoeffs;                 /**< points to the reflection coefficient array. The array is of length numStages. */
3865     float32_t *pvCoeffs;                 /**< points to the ladder coefficient array. The array is of length numStages+1. */
3866   } arm_iir_lattice_instance_f32;
3867 
3868 
3869   /**
3870    * @brief Processing function for the floating-point IIR lattice filter.
3871    * @param[in]  S          points to an instance of the floating-point IIR lattice structure.
3872    * @param[in]  pSrc       points to the block of input data.
3873    * @param[out] pDst       points to the block of output data.
3874    * @param[in]  blockSize  number of samples to process.
3875    */
3876   void arm_iir_lattice_f32(
3877   const arm_iir_lattice_instance_f32 * S,
3878   float32_t * pSrc,
3879   float32_t * pDst,
3880   uint32_t blockSize);
3881 
3882 
3883   /**
3884    * @brief Initialization function for the floating-point IIR lattice filter.
3885    * @param[in] S          points to an instance of the floating-point IIR lattice structure.
3886    * @param[in] numStages  number of stages in the filter.
3887    * @param[in] pkCoeffs   points to the reflection coefficient buffer.  The array is of length numStages.
3888    * @param[in] pvCoeffs   points to the ladder coefficient buffer.  The array is of length numStages+1.
3889    * @param[in] pState     points to the state buffer.  The array is of length numStages+blockSize-1.
3890    * @param[in] blockSize  number of samples to process.
3891    */
3892   void arm_iir_lattice_init_f32(
3893   arm_iir_lattice_instance_f32 * S,
3894   uint16_t numStages,
3895   float32_t * pkCoeffs,
3896   float32_t * pvCoeffs,
3897   float32_t * pState,
3898   uint32_t blockSize);
3899 
3900 
3901   /**
3902    * @brief Processing function for the Q31 IIR lattice filter.
3903    * @param[in]  S          points to an instance of the Q31 IIR lattice structure.
3904    * @param[in]  pSrc       points to the block of input data.
3905    * @param[out] pDst       points to the block of output data.
3906    * @param[in]  blockSize  number of samples to process.
3907    */
3908   void arm_iir_lattice_q31(
3909   const arm_iir_lattice_instance_q31 * S,
3910   q31_t * pSrc,
3911   q31_t * pDst,
3912   uint32_t blockSize);
3913 
3914 
3915   /**
3916    * @brief Initialization function for the Q31 IIR lattice filter.
3917    * @param[in] S          points to an instance of the Q31 IIR lattice structure.
3918    * @param[in] numStages  number of stages in the filter.
3919    * @param[in] pkCoeffs   points to the reflection coefficient buffer.  The array is of length numStages.
3920    * @param[in] pvCoeffs   points to the ladder coefficient buffer.  The array is of length numStages+1.
3921    * @param[in] pState     points to the state buffer.  The array is of length numStages+blockSize.
3922    * @param[in] blockSize  number of samples to process.
3923    */
3924   void arm_iir_lattice_init_q31(
3925   arm_iir_lattice_instance_q31 * S,
3926   uint16_t numStages,
3927   q31_t * pkCoeffs,
3928   q31_t * pvCoeffs,
3929   q31_t * pState,
3930   uint32_t blockSize);
3931 
3932 
3933   /**
3934    * @brief Processing function for the Q15 IIR lattice filter.
3935    * @param[in]  S          points to an instance of the Q15 IIR lattice structure.
3936    * @param[in]  pSrc       points to the block of input data.
3937    * @param[out] pDst       points to the block of output data.
3938    * @param[in]  blockSize  number of samples to process.
3939    */
3940   void arm_iir_lattice_q15(
3941   const arm_iir_lattice_instance_q15 * S,
3942   q15_t * pSrc,
3943   q15_t * pDst,
3944   uint32_t blockSize);
3945 
3946 
3947 /**
3948  * @brief Initialization function for the Q15 IIR lattice filter.
3949  * @param[in] S          points to an instance of the fixed-point Q15 IIR lattice structure.
3950  * @param[in] numStages  number of stages in the filter.
3951  * @param[in] pkCoeffs   points to reflection coefficient buffer.  The array is of length numStages.
3952  * @param[in] pvCoeffs   points to ladder coefficient buffer.  The array is of length numStages+1.
3953  * @param[in] pState     points to state buffer.  The array is of length numStages+blockSize.
3954  * @param[in] blockSize  number of samples to process per call.
3955  */
3956   void arm_iir_lattice_init_q15(
3957   arm_iir_lattice_instance_q15 * S,
3958   uint16_t numStages,
3959   q15_t * pkCoeffs,
3960   q15_t * pvCoeffs,
3961   q15_t * pState,
3962   uint32_t blockSize);
3963 
3964 
3965   /**
3966    * @brief Instance structure for the floating-point LMS filter.
3967    */
3968   typedef struct
3969   {
3970     uint16_t numTaps;    /**< number of coefficients in the filter. */
3971     float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3972     float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
3973     float32_t mu;        /**< step size that controls filter coefficient updates. */
3974   } arm_lms_instance_f32;
3975 
3976 
3977   /**
3978    * @brief Processing function for floating-point LMS filter.
3979    * @param[in]  S          points to an instance of the floating-point LMS filter structure.
3980    * @param[in]  pSrc       points to the block of input data.
3981    * @param[in]  pRef       points to the block of reference data.
3982    * @param[out] pOut       points to the block of output data.
3983    * @param[out] pErr       points to the block of error data.
3984    * @param[in]  blockSize  number of samples to process.
3985    */
3986   void arm_lms_f32(
3987   const arm_lms_instance_f32 * S,
3988   float32_t * pSrc,
3989   float32_t * pRef,
3990   float32_t * pOut,
3991   float32_t * pErr,
3992   uint32_t blockSize);
3993 
3994 
3995   /**
3996    * @brief Initialization function for floating-point LMS filter.
3997    * @param[in] S          points to an instance of the floating-point LMS filter structure.
3998    * @param[in] numTaps    number of filter coefficients.
3999    * @param[in] pCoeffs    points to the coefficient buffer.
4000    * @param[in] pState     points to state buffer.
4001    * @param[in] mu         step size that controls filter coefficient updates.
4002    * @param[in] blockSize  number of samples to process.
4003    */
4004   void arm_lms_init_f32(
4005   arm_lms_instance_f32 * S,
4006   uint16_t numTaps,
4007   float32_t * pCoeffs,
4008   float32_t * pState,
4009   float32_t mu,
4010   uint32_t blockSize);
4011 
4012 
4013   /**
4014    * @brief Instance structure for the Q15 LMS filter.
4015    */
4016   typedef struct
4017   {
4018     uint16_t numTaps;    /**< number of coefficients in the filter. */
4019     q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4020     q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4021     q15_t mu;            /**< step size that controls filter coefficient updates. */
4022     uint32_t postShift;  /**< bit shift applied to coefficients. */
4023   } arm_lms_instance_q15;
4024 
4025 
4026   /**
4027    * @brief Initialization function for the Q15 LMS filter.
4028    * @param[in] S          points to an instance of the Q15 LMS filter structure.
4029    * @param[in] numTaps    number of filter coefficients.
4030    * @param[in] pCoeffs    points to the coefficient buffer.
4031    * @param[in] pState     points to the state buffer.
4032    * @param[in] mu         step size that controls filter coefficient updates.
4033    * @param[in] blockSize  number of samples to process.
4034    * @param[in] postShift  bit shift applied to coefficients.
4035    */
4036   void arm_lms_init_q15(
4037   arm_lms_instance_q15 * S,
4038   uint16_t numTaps,
4039   q15_t * pCoeffs,
4040   q15_t * pState,
4041   q15_t mu,
4042   uint32_t blockSize,
4043   uint32_t postShift);
4044 
4045 
4046   /**
4047    * @brief Processing function for Q15 LMS filter.
4048    * @param[in]  S          points to an instance of the Q15 LMS filter structure.
4049    * @param[in]  pSrc       points to the block of input data.
4050    * @param[in]  pRef       points to the block of reference data.
4051    * @param[out] pOut       points to the block of output data.
4052    * @param[out] pErr       points to the block of error data.
4053    * @param[in]  blockSize  number of samples to process.
4054    */
4055   void arm_lms_q15(
4056   const arm_lms_instance_q15 * S,
4057   q15_t * pSrc,
4058   q15_t * pRef,
4059   q15_t * pOut,
4060   q15_t * pErr,
4061   uint32_t blockSize);
4062 
4063 
4064   /**
4065    * @brief Instance structure for the Q31 LMS filter.
4066    */
4067   typedef struct
4068   {
4069     uint16_t numTaps;    /**< number of coefficients in the filter. */
4070     q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4071     q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4072     q31_t mu;            /**< step size that controls filter coefficient updates. */
4073     uint32_t postShift;  /**< bit shift applied to coefficients. */
4074   } arm_lms_instance_q31;
4075 
4076 
4077   /**
4078    * @brief Processing function for Q31 LMS filter.
4079    * @param[in]  S          points to an instance of the Q15 LMS filter structure.
4080    * @param[in]  pSrc       points to the block of input data.
4081    * @param[in]  pRef       points to the block of reference data.
4082    * @param[out] pOut       points to the block of output data.
4083    * @param[out] pErr       points to the block of error data.
4084    * @param[in]  blockSize  number of samples to process.
4085    */
4086   void arm_lms_q31(
4087   const arm_lms_instance_q31 * S,
4088   q31_t * pSrc,
4089   q31_t * pRef,
4090   q31_t * pOut,
4091   q31_t * pErr,
4092   uint32_t blockSize);
4093 
4094 
4095   /**
4096    * @brief Initialization function for Q31 LMS filter.
4097    * @param[in] S          points to an instance of the Q31 LMS filter structure.
4098    * @param[in] numTaps    number of filter coefficients.
4099    * @param[in] pCoeffs    points to coefficient buffer.
4100    * @param[in] pState     points to state buffer.
4101    * @param[in] mu         step size that controls filter coefficient updates.
4102    * @param[in] blockSize  number of samples to process.
4103    * @param[in] postShift  bit shift applied to coefficients.
4104    */
4105   void arm_lms_init_q31(
4106   arm_lms_instance_q31 * S,
4107   uint16_t numTaps,
4108   q31_t * pCoeffs,
4109   q31_t * pState,
4110   q31_t mu,
4111   uint32_t blockSize,
4112   uint32_t postShift);
4113 
4114 
4115   /**
4116    * @brief Instance structure for the floating-point normalized LMS filter.
4117    */
4118   typedef struct
4119   {
4120     uint16_t numTaps;     /**< number of coefficients in the filter. */
4121     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4122     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
4123     float32_t mu;         /**< step size that control filter coefficient updates. */
4124     float32_t energy;     /**< saves previous frame energy. */
4125     float32_t x0;         /**< saves previous input sample. */
4126   } arm_lms_norm_instance_f32;
4127 
4128 
4129   /**
4130    * @brief Processing function for floating-point normalized LMS filter.
4131    * @param[in]  S          points to an instance of the floating-point normalized LMS filter structure.
4132    * @param[in]  pSrc       points to the block of input data.
4133    * @param[in]  pRef       points to the block of reference data.
4134    * @param[out] pOut       points to the block of output data.
4135    * @param[out] pErr       points to the block of error data.
4136    * @param[in]  blockSize  number of samples to process.
4137    */
4138   void arm_lms_norm_f32(
4139   arm_lms_norm_instance_f32 * S,
4140   float32_t * pSrc,
4141   float32_t * pRef,
4142   float32_t * pOut,
4143   float32_t * pErr,
4144   uint32_t blockSize);
4145 
4146 
4147   /**
4148    * @brief Initialization function for floating-point normalized LMS filter.
4149    * @param[in] S          points to an instance of the floating-point LMS filter structure.
4150    * @param[in] numTaps    number of filter coefficients.
4151    * @param[in] pCoeffs    points to coefficient buffer.
4152    * @param[in] pState     points to state buffer.
4153    * @param[in] mu         step size that controls filter coefficient updates.
4154    * @param[in] blockSize  number of samples to process.
4155    */
4156   void arm_lms_norm_init_f32(
4157   arm_lms_norm_instance_f32 * S,
4158   uint16_t numTaps,
4159   float32_t * pCoeffs,
4160   float32_t * pState,
4161   float32_t mu,
4162   uint32_t blockSize);
4163 
4164 
4165   /**
4166    * @brief Instance structure for the Q31 normalized LMS filter.
4167    */
4168   typedef struct
4169   {
4170     uint16_t numTaps;     /**< number of coefficients in the filter. */
4171     q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4172     q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4173     q31_t mu;             /**< step size that controls filter coefficient updates. */
4174     uint8_t postShift;    /**< bit shift applied to coefficients. */
4175     q31_t *recipTable;    /**< points to the reciprocal initial value table. */
4176     q31_t energy;         /**< saves previous frame energy. */
4177     q31_t x0;             /**< saves previous input sample. */
4178   } arm_lms_norm_instance_q31;
4179 
4180 
4181   /**
4182    * @brief Processing function for Q31 normalized LMS filter.
4183    * @param[in]  S          points to an instance of the Q31 normalized LMS filter structure.
4184    * @param[in]  pSrc       points to the block of input data.
4185    * @param[in]  pRef       points to the block of reference data.
4186    * @param[out] pOut       points to the block of output data.
4187    * @param[out] pErr       points to the block of error data.
4188    * @param[in]  blockSize  number of samples to process.
4189    */
4190   void arm_lms_norm_q31(
4191   arm_lms_norm_instance_q31 * S,
4192   q31_t * pSrc,
4193   q31_t * pRef,
4194   q31_t * pOut,
4195   q31_t * pErr,
4196   uint32_t blockSize);
4197 
4198 
4199   /**
4200    * @brief Initialization function for Q31 normalized LMS filter.
4201    * @param[in] S          points to an instance of the Q31 normalized LMS filter structure.
4202    * @param[in] numTaps    number of filter coefficients.
4203    * @param[in] pCoeffs    points to coefficient buffer.
4204    * @param[in] pState     points to state buffer.
4205    * @param[in] mu         step size that controls filter coefficient updates.
4206    * @param[in] blockSize  number of samples to process.
4207    * @param[in] postShift  bit shift applied to coefficients.
4208    */
4209   void arm_lms_norm_init_q31(
4210   arm_lms_norm_instance_q31 * S,
4211   uint16_t numTaps,
4212   q31_t * pCoeffs,
4213   q31_t * pState,
4214   q31_t mu,
4215   uint32_t blockSize,
4216   uint8_t postShift);
4217 
4218 
4219   /**
4220    * @brief Instance structure for the Q15 normalized LMS filter.
4221    */
4222   typedef struct
4223   {
4224     uint16_t numTaps;     /**< Number of coefficients in the filter. */
4225     q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4226     q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4227     q15_t mu;             /**< step size that controls filter coefficient updates. */
4228     uint8_t postShift;    /**< bit shift applied to coefficients. */
4229     q15_t *recipTable;    /**< Points to the reciprocal initial value table. */
4230     q15_t energy;         /**< saves previous frame energy. */
4231     q15_t x0;             /**< saves previous input sample. */
4232   } arm_lms_norm_instance_q15;
4233 
4234 
4235   /**
4236    * @brief Processing function for Q15 normalized LMS filter.
4237    * @param[in]  S          points to an instance of the Q15 normalized LMS filter structure.
4238    * @param[in]  pSrc       points to the block of input data.
4239    * @param[in]  pRef       points to the block of reference data.
4240    * @param[out] pOut       points to the block of output data.
4241    * @param[out] pErr       points to the block of error data.
4242    * @param[in]  blockSize  number of samples to process.
4243    */
4244   void arm_lms_norm_q15(
4245   arm_lms_norm_instance_q15 * S,
4246   q15_t * pSrc,
4247   q15_t * pRef,
4248   q15_t * pOut,
4249   q15_t * pErr,
4250   uint32_t blockSize);
4251 
4252 
4253   /**
4254    * @brief Initialization function for Q15 normalized LMS filter.
4255    * @param[in] S          points to an instance of the Q15 normalized LMS filter structure.
4256    * @param[in] numTaps    number of filter coefficients.
4257    * @param[in] pCoeffs    points to coefficient buffer.
4258    * @param[in] pState     points to state buffer.
4259    * @param[in] mu         step size that controls filter coefficient updates.
4260    * @param[in] blockSize  number of samples to process.
4261    * @param[in] postShift  bit shift applied to coefficients.
4262    */
4263   void arm_lms_norm_init_q15(
4264   arm_lms_norm_instance_q15 * S,
4265   uint16_t numTaps,
4266   q15_t * pCoeffs,
4267   q15_t * pState,
4268   q15_t mu,
4269   uint32_t blockSize,
4270   uint8_t postShift);
4271 
4272 
4273   /**
4274    * @brief Correlation of floating-point sequences.
4275    * @param[in]  pSrcA    points to the first input sequence.
4276    * @param[in]  srcALen  length of the first input sequence.
4277    * @param[in]  pSrcB    points to the second input sequence.
4278    * @param[in]  srcBLen  length of the second input sequence.
4279    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4280    */
4281   void arm_correlate_f32(
4282   float32_t * pSrcA,
4283   uint32_t srcALen,
4284   float32_t * pSrcB,
4285   uint32_t srcBLen,
4286   float32_t * pDst);
4287 
4288 
4289    /**
4290    * @brief Correlation of Q15 sequences
4291    * @param[in]  pSrcA     points to the first input sequence.
4292    * @param[in]  srcALen   length of the first input sequence.
4293    * @param[in]  pSrcB     points to the second input sequence.
4294    * @param[in]  srcBLen   length of the second input sequence.
4295    * @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4296    * @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4297    */
4298   void arm_correlate_opt_q15(
4299   q15_t * pSrcA,
4300   uint32_t srcALen,
4301   q15_t * pSrcB,
4302   uint32_t srcBLen,
4303   q15_t * pDst,
4304   q15_t * pScratch);
4305 
4306 
4307   /**
4308    * @brief Correlation of Q15 sequences.
4309    * @param[in]  pSrcA    points to the first input sequence.
4310    * @param[in]  srcALen  length of the first input sequence.
4311    * @param[in]  pSrcB    points to the second input sequence.
4312    * @param[in]  srcBLen  length of the second input sequence.
4313    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4314    */
4315 
4316   void arm_correlate_q15(
4317   q15_t * pSrcA,
4318   uint32_t srcALen,
4319   q15_t * pSrcB,
4320   uint32_t srcBLen,
4321   q15_t * pDst);
4322 
4323 
4324   /**
4325    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
4326    * @param[in]  pSrcA    points to the first input sequence.
4327    * @param[in]  srcALen  length of the first input sequence.
4328    * @param[in]  pSrcB    points to the second input sequence.
4329    * @param[in]  srcBLen  length of the second input sequence.
4330    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4331    */
4332 
4333   void arm_correlate_fast_q15(
4334   q15_t * pSrcA,
4335   uint32_t srcALen,
4336   q15_t * pSrcB,
4337   uint32_t srcBLen,
4338   q15_t * pDst);
4339 
4340 
4341   /**
4342    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
4343    * @param[in]  pSrcA     points to the first input sequence.
4344    * @param[in]  srcALen   length of the first input sequence.
4345    * @param[in]  pSrcB     points to the second input sequence.
4346    * @param[in]  srcBLen   length of the second input sequence.
4347    * @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4348    * @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4349    */
4350   void arm_correlate_fast_opt_q15(
4351   q15_t * pSrcA,
4352   uint32_t srcALen,
4353   q15_t * pSrcB,
4354   uint32_t srcBLen,
4355   q15_t * pDst,
4356   q15_t * pScratch);
4357 
4358 
4359   /**
4360    * @brief Correlation of Q31 sequences.
4361    * @param[in]  pSrcA    points to the first input sequence.
4362    * @param[in]  srcALen  length of the first input sequence.
4363    * @param[in]  pSrcB    points to the second input sequence.
4364    * @param[in]  srcBLen  length of the second input sequence.
4365    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4366    */
4367   void arm_correlate_q31(
4368   q31_t * pSrcA,
4369   uint32_t srcALen,
4370   q31_t * pSrcB,
4371   uint32_t srcBLen,
4372   q31_t * pDst);
4373 
4374 
4375   /**
4376    * @brief Correlation of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
4377    * @param[in]  pSrcA    points to the first input sequence.
4378    * @param[in]  srcALen  length of the first input sequence.
4379    * @param[in]  pSrcB    points to the second input sequence.
4380    * @param[in]  srcBLen  length of the second input sequence.
4381    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4382    */
4383   void arm_correlate_fast_q31(
4384   q31_t * pSrcA,
4385   uint32_t srcALen,
4386   q31_t * pSrcB,
4387   uint32_t srcBLen,
4388   q31_t * pDst);
4389 
4390 
4391  /**
4392    * @brief Correlation of Q7 sequences.
4393    * @param[in]  pSrcA      points to the first input sequence.
4394    * @param[in]  srcALen    length of the first input sequence.
4395    * @param[in]  pSrcB      points to the second input sequence.
4396    * @param[in]  srcBLen    length of the second input sequence.
4397    * @param[out] pDst       points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4398    * @param[in]  pScratch1  points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4399    * @param[in]  pScratch2  points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
4400    */
4401   void arm_correlate_opt_q7(
4402   q7_t * pSrcA,
4403   uint32_t srcALen,
4404   q7_t * pSrcB,
4405   uint32_t srcBLen,
4406   q7_t * pDst,
4407   q15_t * pScratch1,
4408   q15_t * pScratch2);
4409 
4410 
4411   /**
4412    * @brief Correlation of Q7 sequences.
4413    * @param[in]  pSrcA    points to the first input sequence.
4414    * @param[in]  srcALen  length of the first input sequence.
4415    * @param[in]  pSrcB    points to the second input sequence.
4416    * @param[in]  srcBLen  length of the second input sequence.
4417    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4418    */
4419   void arm_correlate_q7(
4420   q7_t * pSrcA,
4421   uint32_t srcALen,
4422   q7_t * pSrcB,
4423   uint32_t srcBLen,
4424   q7_t * pDst);
4425 
4426 
4427   /**
4428    * @brief Instance structure for the floating-point sparse FIR filter.
4429    */
4430   typedef struct
4431   {
4432     uint16_t numTaps;             /**< number of coefficients in the filter. */
4433     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4434     float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4435     float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
4436     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4437     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4438   } arm_fir_sparse_instance_f32;
4439 
4440   /**
4441    * @brief Instance structure for the Q31 sparse FIR filter.
4442    */
4443   typedef struct
4444   {
4445     uint16_t numTaps;             /**< number of coefficients in the filter. */
4446     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4447     q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4448     q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4449     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4450     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4451   } arm_fir_sparse_instance_q31;
4452 
4453   /**
4454    * @brief Instance structure for the Q15 sparse FIR filter.
4455    */
4456   typedef struct
4457   {
4458     uint16_t numTaps;             /**< number of coefficients in the filter. */
4459     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4460     q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4461     q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4462     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4463     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4464   } arm_fir_sparse_instance_q15;
4465 
4466   /**
4467    * @brief Instance structure for the Q7 sparse FIR filter.
4468    */
4469   typedef struct
4470   {
4471     uint16_t numTaps;             /**< number of coefficients in the filter. */
4472     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4473     q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4474     q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
4475     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4476     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4477   } arm_fir_sparse_instance_q7;
4478 
4479 
4480   /**
4481    * @brief Processing function for the floating-point sparse FIR filter.
4482    * @param[in]  S           points to an instance of the floating-point sparse FIR structure.
4483    * @param[in]  pSrc        points to the block of input data.
4484    * @param[out] pDst        points to the block of output data
4485    * @param[in]  pScratchIn  points to a temporary buffer of size blockSize.
4486    * @param[in]  blockSize   number of input samples to process per call.
4487    */
4488   void arm_fir_sparse_f32(
4489   arm_fir_sparse_instance_f32 * S,
4490   float32_t * pSrc,
4491   float32_t * pDst,
4492   float32_t * pScratchIn,
4493   uint32_t blockSize);
4494 
4495 
4496   /**
4497    * @brief  Initialization function for the floating-point sparse FIR filter.
4498    * @param[in,out] S          points to an instance of the floating-point sparse FIR structure.
4499    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4500    * @param[in]     pCoeffs    points to the array of filter coefficients.
4501    * @param[in]     pState     points to the state buffer.
4502    * @param[in]     pTapDelay  points to the array of offset times.
4503    * @param[in]     maxDelay   maximum offset time supported.
4504    * @param[in]     blockSize  number of samples that will be processed per block.
4505    */
4506   void arm_fir_sparse_init_f32(
4507   arm_fir_sparse_instance_f32 * S,
4508   uint16_t numTaps,
4509   float32_t * pCoeffs,
4510   float32_t * pState,
4511   int32_t * pTapDelay,
4512   uint16_t maxDelay,
4513   uint32_t blockSize);
4514 
4515 
4516   /**
4517    * @brief Processing function for the Q31 sparse FIR filter.
4518    * @param[in]  S           points to an instance of the Q31 sparse FIR structure.
4519    * @param[in]  pSrc        points to the block of input data.
4520    * @param[out] pDst        points to the block of output data
4521    * @param[in]  pScratchIn  points to a temporary buffer of size blockSize.
4522    * @param[in]  blockSize   number of input samples to process per call.
4523    */
4524   void arm_fir_sparse_q31(
4525   arm_fir_sparse_instance_q31 * S,
4526   q31_t * pSrc,
4527   q31_t * pDst,
4528   q31_t * pScratchIn,
4529   uint32_t blockSize);
4530 
4531 
4532   /**
4533    * @brief  Initialization function for the Q31 sparse FIR filter.
4534    * @param[in,out] S          points to an instance of the Q31 sparse FIR structure.
4535    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4536    * @param[in]     pCoeffs    points to the array of filter coefficients.
4537    * @param[in]     pState     points to the state buffer.
4538    * @param[in]     pTapDelay  points to the array of offset times.
4539    * @param[in]     maxDelay   maximum offset time supported.
4540    * @param[in]     blockSize  number of samples that will be processed per block.
4541    */
4542   void arm_fir_sparse_init_q31(
4543   arm_fir_sparse_instance_q31 * S,
4544   uint16_t numTaps,
4545   q31_t * pCoeffs,
4546   q31_t * pState,
4547   int32_t * pTapDelay,
4548   uint16_t maxDelay,
4549   uint32_t blockSize);
4550 
4551 
4552   /**
4553    * @brief Processing function for the Q15 sparse FIR filter.
4554    * @param[in]  S            points to an instance of the Q15 sparse FIR structure.
4555    * @param[in]  pSrc         points to the block of input data.
4556    * @param[out] pDst         points to the block of output data
4557    * @param[in]  pScratchIn   points to a temporary buffer of size blockSize.
4558    * @param[in]  pScratchOut  points to a temporary buffer of size blockSize.
4559    * @param[in]  blockSize    number of input samples to process per call.
4560    */
4561   void arm_fir_sparse_q15(
4562   arm_fir_sparse_instance_q15 * S,
4563   q15_t * pSrc,
4564   q15_t * pDst,
4565   q15_t * pScratchIn,
4566   q31_t * pScratchOut,
4567   uint32_t blockSize);
4568 
4569 
4570   /**
4571    * @brief  Initialization function for the Q15 sparse FIR filter.
4572    * @param[in,out] S          points to an instance of the Q15 sparse FIR structure.
4573    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4574    * @param[in]     pCoeffs    points to the array of filter coefficients.
4575    * @param[in]     pState     points to the state buffer.
4576    * @param[in]     pTapDelay  points to the array of offset times.
4577    * @param[in]     maxDelay   maximum offset time supported.
4578    * @param[in]     blockSize  number of samples that will be processed per block.
4579    */
4580   void arm_fir_sparse_init_q15(
4581   arm_fir_sparse_instance_q15 * S,
4582   uint16_t numTaps,
4583   q15_t * pCoeffs,
4584   q15_t * pState,
4585   int32_t * pTapDelay,
4586   uint16_t maxDelay,
4587   uint32_t blockSize);
4588 
4589 
4590   /**
4591    * @brief Processing function for the Q7 sparse FIR filter.
4592    * @param[in]  S            points to an instance of the Q7 sparse FIR structure.
4593    * @param[in]  pSrc         points to the block of input data.
4594    * @param[out] pDst         points to the block of output data
4595    * @param[in]  pScratchIn   points to a temporary buffer of size blockSize.
4596    * @param[in]  pScratchOut  points to a temporary buffer of size blockSize.
4597    * @param[in]  blockSize    number of input samples to process per call.
4598    */
4599   void arm_fir_sparse_q7(
4600   arm_fir_sparse_instance_q7 * S,
4601   q7_t * pSrc,
4602   q7_t * pDst,
4603   q7_t * pScratchIn,
4604   q31_t * pScratchOut,
4605   uint32_t blockSize);
4606 
4607 
4608   /**
4609    * @brief  Initialization function for the Q7 sparse FIR filter.
4610    * @param[in,out] S          points to an instance of the Q7 sparse FIR structure.
4611    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4612    * @param[in]     pCoeffs    points to the array of filter coefficients.
4613    * @param[in]     pState     points to the state buffer.
4614    * @param[in]     pTapDelay  points to the array of offset times.
4615    * @param[in]     maxDelay   maximum offset time supported.
4616    * @param[in]     blockSize  number of samples that will be processed per block.
4617    */
4618   void arm_fir_sparse_init_q7(
4619   arm_fir_sparse_instance_q7 * S,
4620   uint16_t numTaps,
4621   q7_t * pCoeffs,
4622   q7_t * pState,
4623   int32_t * pTapDelay,
4624   uint16_t maxDelay,
4625   uint32_t blockSize);
4626 
4627 
4628   /**
4629    * @brief  Floating-point sin_cos function.
4630    * @param[in]  theta   input value in degrees
4631    * @param[out] pSinVal  points to the processed sine output.
4632    * @param[out] pCosVal  points to the processed cos output.
4633    */
4634   void arm_sin_cos_f32(
4635   float32_t theta,
4636   float32_t * pSinVal,
4637   float32_t * pCosVal);
4638 
4639 
4640   /**
4641    * @brief  Q31 sin_cos function.
4642    * @param[in]  theta    scaled input value in degrees
4643    * @param[out] pSinVal  points to the processed sine output.
4644    * @param[out] pCosVal  points to the processed cosine output.
4645    */
4646   void arm_sin_cos_q31(
4647   q31_t theta,
4648   q31_t * pSinVal,
4649   q31_t * pCosVal);
4650 
4651 
4652   /**
4653    * @brief  Floating-point complex conjugate.
4654    * @param[in]  pSrc        points to the input vector
4655    * @param[out] pDst        points to the output vector
4656    * @param[in]  numSamples  number of complex samples in each vector
4657    */
4658   void arm_cmplx_conj_f32(
4659   float32_t * pSrc,
4660   float32_t * pDst,
4661   uint32_t numSamples);
4662 
4663   /**
4664    * @brief  Q31 complex conjugate.
4665    * @param[in]  pSrc        points to the input vector
4666    * @param[out] pDst        points to the output vector
4667    * @param[in]  numSamples  number of complex samples in each vector
4668    */
4669   void arm_cmplx_conj_q31(
4670   q31_t * pSrc,
4671   q31_t * pDst,
4672   uint32_t numSamples);
4673 
4674 
4675   /**
4676    * @brief  Q15 complex conjugate.
4677    * @param[in]  pSrc        points to the input vector
4678    * @param[out] pDst        points to the output vector
4679    * @param[in]  numSamples  number of complex samples in each vector
4680    */
4681   void arm_cmplx_conj_q15(
4682   q15_t * pSrc,
4683   q15_t * pDst,
4684   uint32_t numSamples);
4685 
4686 
4687   /**
4688    * @brief  Floating-point complex magnitude squared
4689    * @param[in]  pSrc        points to the complex input vector
4690    * @param[out] pDst        points to the real output vector
4691    * @param[in]  numSamples  number of complex samples in the input vector
4692    */
4693   void arm_cmplx_mag_squared_f32(
4694   float32_t * pSrc,
4695   float32_t * pDst,
4696   uint32_t numSamples);
4697 
4698 
4699   /**
4700    * @brief  Q31 complex magnitude squared
4701    * @param[in]  pSrc        points to the complex input vector
4702    * @param[out] pDst        points to the real output vector
4703    * @param[in]  numSamples  number of complex samples in the input vector
4704    */
4705   void arm_cmplx_mag_squared_q31(
4706   q31_t * pSrc,
4707   q31_t * pDst,
4708   uint32_t numSamples);
4709 
4710 
4711   /**
4712    * @brief  Q15 complex magnitude squared
4713    * @param[in]  pSrc        points to the complex input vector
4714    * @param[out] pDst        points to the real output vector
4715    * @param[in]  numSamples  number of complex samples in the input vector
4716    */
4717   void arm_cmplx_mag_squared_q15(
4718   q15_t * pSrc,
4719   q15_t * pDst,
4720   uint32_t numSamples);
4721 
4722 
4723  /**
4724    * @ingroup groupController
4725    */
4726 
4727   /**
4728    * @defgroup PID PID Motor Control
4729    *
4730    * A Proportional Integral Derivative (PID) controller is a generic feedback control
4731    * loop mechanism widely used in industrial control systems.
4732    * A PID controller is the most commonly used type of feedback controller.
4733    *
4734    * This set of functions implements (PID) controllers
4735    * for Q15, Q31, and floating-point data types.  The functions operate on a single sample
4736    * of data and each call to the function returns a single processed value.
4737    * <code>S</code> points to an instance of the PID control data structure.  <code>in</code>
4738    * is the input sample value. The functions return the output value.
4739    *
4740    * \par Algorithm:
4741    * <pre>
4742    *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
4743    *    A0 = Kp + Ki + Kd
4744    *    A1 = (-Kp ) - (2 * Kd )
4745    *    A2 = Kd  </pre>
4746    *
4747    * \par
4748    * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
4749    *
4750    * \par
4751    * \image html PID.gif "Proportional Integral Derivative Controller"
4752    *
4753    * \par
4754    * The PID controller calculates an "error" value as the difference between
4755    * the measured output and the reference input.
4756    * The controller attempts to minimize the error by adjusting the process control inputs.
4757    * The proportional value determines the reaction to the current error,
4758    * the integral value determines the reaction based on the sum of recent errors,
4759    * and the derivative value determines the reaction based on the rate at which the error has been changing.
4760    *
4761    * \par Instance Structure
4762    * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
4763    * A separate instance structure must be defined for each PID Controller.
4764    * There are separate instance structure declarations for each of the 3 supported data types.
4765    *
4766    * \par Reset Functions
4767    * There is also an associated reset function for each data type which clears the state array.
4768    *
4769    * \par Initialization Functions
4770    * There is also an associated initialization function for each data type.
4771    * The initialization function performs the following operations:
4772    * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
4773    * - Zeros out the values in the state buffer.
4774    *
4775    * \par
4776    * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.
4777    *
4778    * \par Fixed-Point Behavior
4779    * Care must be taken when using the fixed-point versions of the PID Controller functions.
4780    * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
4781    * Refer to the function specific documentation below for usage guidelines.
4782    */
4783 
4784   /**
4785    * @addtogroup PID
4786    * @{
4787    */
4788 
4789   /**
4790    * @brief  Process function for the floating-point PID Control.
4791    * @param[in,out] S   is an instance of the floating-point PID Control structure
4792    * @param[in]     in  input sample to process
4793    * @return out processed output sample.
4794    */
arm_pid_f32(arm_pid_instance_f32 * S,float32_t in)4795   static __INLINE float32_t arm_pid_f32(
4796   arm_pid_instance_f32 * S,
4797   float32_t in)
4798   {
4799     float32_t out;
4800 
4801     /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
4802     out = (S->A0 * in) +
4803       (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
4804 
4805     /* Update state */
4806     S->state[1] = S->state[0];
4807     S->state[0] = in;
4808     S->state[2] = out;
4809 
4810     /* return to application */
4811     return (out);
4812 
4813   }
4814 
4815   /**
4816    * @brief  Process function for the Q31 PID Control.
4817    * @param[in,out] S  points to an instance of the Q31 PID Control structure
4818    * @param[in]     in  input sample to process
4819    * @return out processed output sample.
4820    *
4821    * <b>Scaling and Overflow Behavior:</b>
4822    * \par
4823    * The function is implemented using an internal 64-bit accumulator.
4824    * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
4825    * Thus, if the accumulator result overflows it wraps around rather than clip.
4826    * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
4827    * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
4828    */
arm_pid_q31(arm_pid_instance_q31 * S,q31_t in)4829   static __INLINE q31_t arm_pid_q31(
4830   arm_pid_instance_q31 * S,
4831   q31_t in)
4832   {
4833     q63_t acc;
4834     q31_t out;
4835 
4836     /* acc = A0 * x[n]  */
4837     acc = (q63_t) S->A0 * in;
4838 
4839     /* acc += A1 * x[n-1] */
4840     acc += (q63_t) S->A1 * S->state[0];
4841 
4842     /* acc += A2 * x[n-2]  */
4843     acc += (q63_t) S->A2 * S->state[1];
4844 
4845     /* convert output to 1.31 format to add y[n-1] */
4846     out = (q31_t) (acc >> 31u);
4847 
4848     /* out += y[n-1] */
4849     out += S->state[2];
4850 
4851     /* Update state */
4852     S->state[1] = S->state[0];
4853     S->state[0] = in;
4854     S->state[2] = out;
4855 
4856     /* return to application */
4857     return (out);
4858   }
4859 
4860 
4861   /**
4862    * @brief  Process function for the Q15 PID Control.
4863    * @param[in,out] S   points to an instance of the Q15 PID Control structure
4864    * @param[in]     in  input sample to process
4865    * @return out processed output sample.
4866    *
4867    * <b>Scaling and Overflow Behavior:</b>
4868    * \par
4869    * The function is implemented using a 64-bit internal accumulator.
4870    * Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
4871    * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
4872    * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
4873    * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
4874    * Lastly, the accumulator is saturated to yield a result in 1.15 format.
4875    */
arm_pid_q15(arm_pid_instance_q15 * S,q15_t in)4876   static __INLINE q15_t arm_pid_q15(
4877   arm_pid_instance_q15 * S,
4878   q15_t in)
4879   {
4880     q63_t acc;
4881     q15_t out;
4882 
4883 #ifndef ARM_MATH_CM0_FAMILY
4884     __SIMD32_TYPE *vstate;
4885 
4886     /* Implementation of PID controller */
4887 
4888     /* acc = A0 * x[n]  */
4889     acc = (q31_t) __SMUAD((uint32_t)S->A0, (uint32_t)in);
4890 
4891     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
4892     vstate = __SIMD32_CONST(S->state);
4893     acc = (q63_t)__SMLALD((uint32_t)S->A1, (uint32_t)*vstate, (uint64_t)acc);
4894 #else
4895     /* acc = A0 * x[n]  */
4896     acc = ((q31_t) S->A0) * in;
4897 
4898     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
4899     acc += (q31_t) S->A1 * S->state[0];
4900     acc += (q31_t) S->A2 * S->state[1];
4901 #endif
4902 
4903     /* acc += y[n-1] */
4904     acc += (q31_t) S->state[2] << 15;
4905 
4906     /* saturate the output */
4907     out = (q15_t) (__SSAT((acc >> 15), 16));
4908 
4909     /* Update state */
4910     S->state[1] = S->state[0];
4911     S->state[0] = in;
4912     S->state[2] = out;
4913 
4914     /* return to application */
4915     return (out);
4916   }
4917 
4918   /**
4919    * @} end of PID group
4920    */
4921 
4922 
4923   /**
4924    * @brief Floating-point matrix inverse.
4925    * @param[in]  src   points to the instance of the input floating-point matrix structure.
4926    * @param[out] dst   points to the instance of the output floating-point matrix structure.
4927    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
4928    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
4929    */
4930   arm_status arm_mat_inverse_f32(
4931   const arm_matrix_instance_f32 * src,
4932   arm_matrix_instance_f32 * dst);
4933 
4934 
4935   /**
4936    * @brief Floating-point matrix inverse.
4937    * @param[in]  src   points to the instance of the input floating-point matrix structure.
4938    * @param[out] dst   points to the instance of the output floating-point matrix structure.
4939    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
4940    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
4941    */
4942   arm_status arm_mat_inverse_f64(
4943   const arm_matrix_instance_f64 * src,
4944   arm_matrix_instance_f64 * dst);
4945 
4946 
4947 
4948   /**
4949    * @ingroup groupController
4950    */
4951 
4952   /**
4953    * @defgroup clarke Vector Clarke Transform
4954    * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
4955    * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
4956    * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
4957    * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
4958    * \image html clarke.gif Stator current space vector and its components in (a,b).
4959    * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
4960    * can be calculated using only <code>Ia</code> and <code>Ib</code>.
4961    *
4962    * The function operates on a single sample of data and each call to the function returns the processed output.
4963    * The library provides separate functions for Q31 and floating-point data types.
4964    * \par Algorithm
4965    * \image html clarkeFormula.gif
4966    * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
4967    * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
4968    * \par Fixed-Point Behavior
4969    * Care must be taken when using the Q31 version of the Clarke transform.
4970    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
4971    * Refer to the function specific documentation below for usage guidelines.
4972    */
4973 
4974   /**
4975    * @addtogroup clarke
4976    * @{
4977    */
4978 
4979   /**
4980    *
4981    * @brief  Floating-point Clarke transform
4982    * @param[in]  Ia       input three-phase coordinate <code>a</code>
4983    * @param[in]  Ib       input three-phase coordinate <code>b</code>
4984    * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
4985    * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
4986    */
arm_clarke_f32(float32_t Ia,float32_t Ib,float32_t * pIalpha,float32_t * pIbeta)4987   static __INLINE void arm_clarke_f32(
4988   float32_t Ia,
4989   float32_t Ib,
4990   float32_t * pIalpha,
4991   float32_t * pIbeta)
4992   {
4993     /* Calculate pIalpha using the equation, pIalpha = Ia */
4994     *pIalpha = Ia;
4995 
4996     /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
4997     *pIbeta = ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
4998   }
4999 
5000 
5001   /**
5002    * @brief  Clarke transform for Q31 version
5003    * @param[in]  Ia       input three-phase coordinate <code>a</code>
5004    * @param[in]  Ib       input three-phase coordinate <code>b</code>
5005    * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
5006    * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
5007    *
5008    * <b>Scaling and Overflow Behavior:</b>
5009    * \par
5010    * The function is implemented using an internal 32-bit accumulator.
5011    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5012    * There is saturation on the addition, hence there is no risk of overflow.
5013    */
arm_clarke_q31(q31_t Ia,q31_t Ib,q31_t * pIalpha,q31_t * pIbeta)5014   static __INLINE void arm_clarke_q31(
5015   q31_t Ia,
5016   q31_t Ib,
5017   q31_t * pIalpha,
5018   q31_t * pIbeta)
5019   {
5020     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5021 
5022     /* Calculating pIalpha from Ia by equation pIalpha = Ia */
5023     *pIalpha = Ia;
5024 
5025     /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
5026     product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
5027 
5028     /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
5029     product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
5030 
5031     /* pIbeta is calculated by adding the intermediate products */
5032     *pIbeta = __QADD(product1, product2);
5033   }
5034 
5035   /**
5036    * @} end of clarke group
5037    */
5038 
5039   /**
5040    * @brief  Converts the elements of the Q7 vector to Q31 vector.
5041    * @param[in]  pSrc       input pointer
5042    * @param[out] pDst       output pointer
5043    * @param[in]  blockSize  number of samples to process
5044    */
5045   void arm_q7_to_q31(
5046   q7_t * pSrc,
5047   q31_t * pDst,
5048   uint32_t blockSize);
5049 
5050 
5051 
5052   /**
5053    * @ingroup groupController
5054    */
5055 
5056   /**
5057    * @defgroup inv_clarke Vector Inverse Clarke Transform
5058    * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
5059    *
5060    * The function operates on a single sample of data and each call to the function returns the processed output.
5061    * The library provides separate functions for Q31 and floating-point data types.
5062    * \par Algorithm
5063    * \image html clarkeInvFormula.gif
5064    * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
5065    * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
5066    * \par Fixed-Point Behavior
5067    * Care must be taken when using the Q31 version of the Clarke transform.
5068    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5069    * Refer to the function specific documentation below for usage guidelines.
5070    */
5071 
5072   /**
5073    * @addtogroup inv_clarke
5074    * @{
5075    */
5076 
5077    /**
5078    * @brief  Floating-point Inverse Clarke transform
5079    * @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
5080    * @param[in]  Ibeta   input two-phase orthogonal vector axis beta
5081    * @param[out] pIa     points to output three-phase coordinate <code>a</code>
5082    * @param[out] pIb     points to output three-phase coordinate <code>b</code>
5083    */
arm_inv_clarke_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pIa,float32_t * pIb)5084   static __INLINE void arm_inv_clarke_f32(
5085   float32_t Ialpha,
5086   float32_t Ibeta,
5087   float32_t * pIa,
5088   float32_t * pIb)
5089   {
5090     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5091     *pIa = Ialpha;
5092 
5093     /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
5094     *pIb = -0.5f * Ialpha + 0.8660254039f * Ibeta;
5095   }
5096 
5097 
5098   /**
5099    * @brief  Inverse Clarke transform for Q31 version
5100    * @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
5101    * @param[in]  Ibeta   input two-phase orthogonal vector axis beta
5102    * @param[out] pIa     points to output three-phase coordinate <code>a</code>
5103    * @param[out] pIb     points to output three-phase coordinate <code>b</code>
5104    *
5105    * <b>Scaling and Overflow Behavior:</b>
5106    * \par
5107    * The function is implemented using an internal 32-bit accumulator.
5108    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5109    * There is saturation on the subtraction, hence there is no risk of overflow.
5110    */
arm_inv_clarke_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pIa,q31_t * pIb)5111   static __INLINE void arm_inv_clarke_q31(
5112   q31_t Ialpha,
5113   q31_t Ibeta,
5114   q31_t * pIa,
5115   q31_t * pIb)
5116   {
5117     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5118 
5119     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5120     *pIa = Ialpha;
5121 
5122     /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
5123     product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
5124 
5125     /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
5126     product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
5127 
5128     /* pIb is calculated by subtracting the products */
5129     *pIb = __QSUB(product2, product1);
5130   }
5131 
5132   /**
5133    * @} end of inv_clarke group
5134    */
5135 
5136   /**
5137    * @brief  Converts the elements of the Q7 vector to Q15 vector.
5138    * @param[in]  pSrc       input pointer
5139    * @param[out] pDst       output pointer
5140    * @param[in]  blockSize  number of samples to process
5141    */
5142   void arm_q7_to_q15(
5143   q7_t * pSrc,
5144   q15_t * pDst,
5145   uint32_t blockSize);
5146 
5147 
5148 
5149   /**
5150    * @ingroup groupController
5151    */
5152 
5153   /**
5154    * @defgroup park Vector Park Transform
5155    *
5156    * Forward Park transform converts the input two-coordinate vector to flux and torque components.
5157    * The Park transform can be used to realize the transformation of the <code>Ialpha</code> and the <code>Ibeta</code> currents
5158    * from the stationary to the moving reference frame and control the spatial relationship between
5159    * the stator vector current and rotor flux vector.
5160    * If we consider the d axis aligned with the rotor flux, the diagram below shows the
5161    * current vector and the relationship from the two reference frames:
5162    * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
5163    *
5164    * The function operates on a single sample of data and each call to the function returns the processed output.
5165    * The library provides separate functions for Q31 and floating-point data types.
5166    * \par Algorithm
5167    * \image html parkFormula.gif
5168    * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
5169    * <code>pId</code> and <code>pIq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5170    * cosine and sine values of theta (rotor flux position).
5171    * \par Fixed-Point Behavior
5172    * Care must be taken when using the Q31 version of the Park transform.
5173    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5174    * Refer to the function specific documentation below for usage guidelines.
5175    */
5176 
5177   /**
5178    * @addtogroup park
5179    * @{
5180    */
5181 
5182   /**
5183    * @brief Floating-point Park transform
5184    * @param[in]  Ialpha  input two-phase vector coordinate alpha
5185    * @param[in]  Ibeta   input two-phase vector coordinate beta
5186    * @param[out] pId     points to output   rotor reference frame d
5187    * @param[out] pIq     points to output   rotor reference frame q
5188    * @param[in]  sinVal  sine value of rotation angle theta
5189    * @param[in]  cosVal  cosine value of rotation angle theta
5190    *
5191    * The function implements the forward Park transform.
5192    *
5193    */
arm_park_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pId,float32_t * pIq,float32_t sinVal,float32_t cosVal)5194   static __INLINE void arm_park_f32(
5195   float32_t Ialpha,
5196   float32_t Ibeta,
5197   float32_t * pId,
5198   float32_t * pIq,
5199   float32_t sinVal,
5200   float32_t cosVal)
5201   {
5202     /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
5203     *pId = Ialpha * cosVal + Ibeta * sinVal;
5204 
5205     /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
5206     *pIq = -Ialpha * sinVal + Ibeta * cosVal;
5207   }
5208 
5209 
5210   /**
5211    * @brief  Park transform for Q31 version
5212    * @param[in]  Ialpha  input two-phase vector coordinate alpha
5213    * @param[in]  Ibeta   input two-phase vector coordinate beta
5214    * @param[out] pId     points to output rotor reference frame d
5215    * @param[out] pIq     points to output rotor reference frame q
5216    * @param[in]  sinVal  sine value of rotation angle theta
5217    * @param[in]  cosVal  cosine value of rotation angle theta
5218    *
5219    * <b>Scaling and Overflow Behavior:</b>
5220    * \par
5221    * The function is implemented using an internal 32-bit accumulator.
5222    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5223    * There is saturation on the addition and subtraction, hence there is no risk of overflow.
5224    */
arm_park_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pId,q31_t * pIq,q31_t sinVal,q31_t cosVal)5225   static __INLINE void arm_park_q31(
5226   q31_t Ialpha,
5227   q31_t Ibeta,
5228   q31_t * pId,
5229   q31_t * pIq,
5230   q31_t sinVal,
5231   q31_t cosVal)
5232   {
5233     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5234     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5235 
5236     /* Intermediate product is calculated by (Ialpha * cosVal) */
5237     product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
5238 
5239     /* Intermediate product is calculated by (Ibeta * sinVal) */
5240     product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
5241 
5242 
5243     /* Intermediate product is calculated by (Ialpha * sinVal) */
5244     product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
5245 
5246     /* Intermediate product is calculated by (Ibeta * cosVal) */
5247     product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
5248 
5249     /* Calculate pId by adding the two intermediate products 1 and 2 */
5250     *pId = __QADD(product1, product2);
5251 
5252     /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
5253     *pIq = __QSUB(product4, product3);
5254   }
5255 
5256   /**
5257    * @} end of park group
5258    */
5259 
5260   /**
5261    * @brief  Converts the elements of the Q7 vector to floating-point vector.
5262    * @param[in]  pSrc       is input pointer
5263    * @param[out] pDst       is output pointer
5264    * @param[in]  blockSize  is the number of samples to process
5265    */
5266   void arm_q7_to_float(
5267   q7_t * pSrc,
5268   float32_t * pDst,
5269   uint32_t blockSize);
5270 
5271 
5272   /**
5273    * @ingroup groupController
5274    */
5275 
5276   /**
5277    * @defgroup inv_park Vector Inverse Park transform
5278    * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
5279    *
5280    * The function operates on a single sample of data and each call to the function returns the processed output.
5281    * The library provides separate functions for Q31 and floating-point data types.
5282    * \par Algorithm
5283    * \image html parkInvFormula.gif
5284    * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
5285    * <code>Id</code> and <code>Iq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5286    * cosine and sine values of theta (rotor flux position).
5287    * \par Fixed-Point Behavior
5288    * Care must be taken when using the Q31 version of the Park transform.
5289    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5290    * Refer to the function specific documentation below for usage guidelines.
5291    */
5292 
5293   /**
5294    * @addtogroup inv_park
5295    * @{
5296    */
5297 
5298    /**
5299    * @brief  Floating-point Inverse Park transform
5300    * @param[in]  Id       input coordinate of rotor reference frame d
5301    * @param[in]  Iq       input coordinate of rotor reference frame q
5302    * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
5303    * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
5304    * @param[in]  sinVal   sine value of rotation angle theta
5305    * @param[in]  cosVal   cosine value of rotation angle theta
5306    */
arm_inv_park_f32(float32_t Id,float32_t Iq,float32_t * pIalpha,float32_t * pIbeta,float32_t sinVal,float32_t cosVal)5307   static __INLINE void arm_inv_park_f32(
5308   float32_t Id,
5309   float32_t Iq,
5310   float32_t * pIalpha,
5311   float32_t * pIbeta,
5312   float32_t sinVal,
5313   float32_t cosVal)
5314   {
5315     /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
5316     *pIalpha = Id * cosVal - Iq * sinVal;
5317 
5318     /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
5319     *pIbeta = Id * sinVal + Iq * cosVal;
5320   }
5321 
5322 
5323   /**
5324    * @brief  Inverse Park transform for   Q31 version
5325    * @param[in]  Id       input coordinate of rotor reference frame d
5326    * @param[in]  Iq       input coordinate of rotor reference frame q
5327    * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
5328    * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
5329    * @param[in]  sinVal   sine value of rotation angle theta
5330    * @param[in]  cosVal   cosine value of rotation angle theta
5331    *
5332    * <b>Scaling and Overflow Behavior:</b>
5333    * \par
5334    * The function is implemented using an internal 32-bit accumulator.
5335    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5336    * There is saturation on the addition, hence there is no risk of overflow.
5337    */
arm_inv_park_q31(q31_t Id,q31_t Iq,q31_t * pIalpha,q31_t * pIbeta,q31_t sinVal,q31_t cosVal)5338   static __INLINE void arm_inv_park_q31(
5339   q31_t Id,
5340   q31_t Iq,
5341   q31_t * pIalpha,
5342   q31_t * pIbeta,
5343   q31_t sinVal,
5344   q31_t cosVal)
5345   {
5346     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5347     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5348 
5349     /* Intermediate product is calculated by (Id * cosVal) */
5350     product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
5351 
5352     /* Intermediate product is calculated by (Iq * sinVal) */
5353     product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
5354 
5355 
5356     /* Intermediate product is calculated by (Id * sinVal) */
5357     product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
5358 
5359     /* Intermediate product is calculated by (Iq * cosVal) */
5360     product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
5361 
5362     /* Calculate pIalpha by using the two intermediate products 1 and 2 */
5363     *pIalpha = __QSUB(product1, product2);
5364 
5365     /* Calculate pIbeta by using the two intermediate products 3 and 4 */
5366     *pIbeta = __QADD(product4, product3);
5367   }
5368 
5369   /**
5370    * @} end of Inverse park group
5371    */
5372 
5373 
5374   /**
5375    * @brief  Converts the elements of the Q31 vector to floating-point vector.
5376    * @param[in]  pSrc       is input pointer
5377    * @param[out] pDst       is output pointer
5378    * @param[in]  blockSize  is the number of samples to process
5379    */
5380   void arm_q31_to_float(
5381   q31_t * pSrc,
5382   float32_t * pDst,
5383   uint32_t blockSize);
5384 
5385   /**
5386    * @ingroup groupInterpolation
5387    */
5388 
5389   /**
5390    * @defgroup LinearInterpolate Linear Interpolation
5391    *
5392    * Linear interpolation is a method of curve fitting using linear polynomials.
5393    * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line
5394    *
5395    * \par
5396    * \image html LinearInterp.gif "Linear interpolation"
5397    *
5398    * \par
5399    * A  Linear Interpolate function calculates an output value(y), for the input(x)
5400    * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values)
5401    *
5402    * \par Algorithm:
5403    * <pre>
5404    *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
5405    *       where x0, x1 are nearest values of input x
5406    *             y0, y1 are nearest values to output y
5407    * </pre>
5408    *
5409    * \par
5410    * This set of functions implements Linear interpolation process
5411    * for Q7, Q15, Q31, and floating-point data types.  The functions operate on a single
5412    * sample of data and each call to the function returns a single processed value.
5413    * <code>S</code> points to an instance of the Linear Interpolate function data structure.
5414    * <code>x</code> is the input sample value. The functions returns the output value.
5415    *
5416    * \par
5417    * if x is outside of the table boundary, Linear interpolation returns first value of the table
5418    * if x is below input range and returns last value of table if x is above range.
5419    */
5420 
5421   /**
5422    * @addtogroup LinearInterpolate
5423    * @{
5424    */
5425 
5426   /**
5427    * @brief  Process function for the floating-point Linear Interpolation Function.
5428    * @param[in,out] S  is an instance of the floating-point Linear Interpolation structure
5429    * @param[in]     x  input sample to process
5430    * @return y processed output sample.
5431    *
5432    */
arm_linear_interp_f32(arm_linear_interp_instance_f32 * S,float32_t x)5433   static __INLINE float32_t arm_linear_interp_f32(
5434   arm_linear_interp_instance_f32 * S,
5435   float32_t x)
5436   {
5437     float32_t y;
5438     float32_t x0, x1;                            /* Nearest input values */
5439     float32_t y0, y1;                            /* Nearest output values */
5440     float32_t xSpacing = S->xSpacing;            /* spacing between input values */
5441     int32_t i;                                   /* Index variable */
5442     float32_t *pYData = S->pYData;               /* pointer to output table */
5443 
5444     /* Calculation of index */
5445     i = (int32_t) ((x - S->x1) / xSpacing);
5446 
5447     if(i < 0)
5448     {
5449       /* Iniatilize output for below specified range as least output value of table */
5450       y = pYData[0];
5451     }
5452     else if((uint32_t)i >= S->nValues)
5453     {
5454       /* Iniatilize output for above specified range as last output value of table */
5455       y = pYData[S->nValues - 1];
5456     }
5457     else
5458     {
5459       /* Calculation of nearest input values */
5460       x0 = S->x1 +  i      * xSpacing;
5461       x1 = S->x1 + (i + 1) * xSpacing;
5462 
5463       /* Read of nearest output values */
5464       y0 = pYData[i];
5465       y1 = pYData[i + 1];
5466 
5467       /* Calculation of output */
5468       y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0));
5469 
5470     }
5471 
5472     /* returns output value */
5473     return (y);
5474   }
5475 
5476 
5477    /**
5478    *
5479    * @brief  Process function for the Q31 Linear Interpolation Function.
5480    * @param[in] pYData   pointer to Q31 Linear Interpolation table
5481    * @param[in] x        input sample to process
5482    * @param[in] nValues  number of table values
5483    * @return y processed output sample.
5484    *
5485    * \par
5486    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5487    * This function can support maximum of table size 2^12.
5488    *
5489    */
arm_linear_interp_q31(q31_t * pYData,q31_t x,uint32_t nValues)5490   static __INLINE q31_t arm_linear_interp_q31(
5491   q31_t * pYData,
5492   q31_t x,
5493   uint32_t nValues)
5494   {
5495     q31_t y;                                     /* output */
5496     q31_t y0, y1;                                /* Nearest output values */
5497     q31_t fract;                                 /* fractional part */
5498     int32_t index;                               /* Index to read nearest output values */
5499 
5500     /* Input is in 12.20 format */
5501     /* 12 bits for the table index */
5502     /* Index value calculation */
5503     index = ((x & (q31_t)0xFFF00000) >> 20);
5504 
5505     if(index >= (int32_t)(nValues - 1))
5506     {
5507       return (pYData[nValues - 1]);
5508     }
5509     else if(index < 0)
5510     {
5511       return (pYData[0]);
5512     }
5513     else
5514     {
5515       /* 20 bits for the fractional part */
5516       /* shift left by 11 to keep fract in 1.31 format */
5517       fract = (x & 0x000FFFFF) << 11;
5518 
5519       /* Read two nearest output values from the index in 1.31(q31) format */
5520       y0 = pYData[index];
5521       y1 = pYData[index + 1];
5522 
5523       /* Calculation of y0 * (1-fract) and y is in 2.30 format */
5524       y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32));
5525 
5526       /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
5527       y += ((q31_t) (((q63_t) y1 * fract) >> 32));
5528 
5529       /* Convert y to 1.31 format */
5530       return (y << 1u);
5531     }
5532   }
5533 
5534 
5535   /**
5536    *
5537    * @brief  Process function for the Q15 Linear Interpolation Function.
5538    * @param[in] pYData   pointer to Q15 Linear Interpolation table
5539    * @param[in] x        input sample to process
5540    * @param[in] nValues  number of table values
5541    * @return y processed output sample.
5542    *
5543    * \par
5544    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5545    * This function can support maximum of table size 2^12.
5546    *
5547    */
arm_linear_interp_q15(q15_t * pYData,q31_t x,uint32_t nValues)5548   static __INLINE q15_t arm_linear_interp_q15(
5549   q15_t * pYData,
5550   q31_t x,
5551   uint32_t nValues)
5552   {
5553     q63_t y;                                     /* output */
5554     q15_t y0, y1;                                /* Nearest output values */
5555     q31_t fract;                                 /* fractional part */
5556     int32_t index;                               /* Index to read nearest output values */
5557 
5558     /* Input is in 12.20 format */
5559     /* 12 bits for the table index */
5560     /* Index value calculation */
5561     index = ((x & (int32_t)0xFFF00000) >> 20);
5562 
5563     if(index >= (int32_t)(nValues - 1))
5564     {
5565       return (pYData[nValues - 1]);
5566     }
5567     else if(index < 0)
5568     {
5569       return (pYData[0]);
5570     }
5571     else
5572     {
5573       /* 20 bits for the fractional part */
5574       /* fract is in 12.20 format */
5575       fract = (x & 0x000FFFFF);
5576 
5577       /* Read two nearest output values from the index */
5578       y0 = pYData[index];
5579       y1 = pYData[index + 1];
5580 
5581       /* Calculation of y0 * (1-fract) and y is in 13.35 format */
5582       y = ((q63_t) y0 * (0xFFFFF - fract));
5583 
5584       /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
5585       y += ((q63_t) y1 * (fract));
5586 
5587       /* convert y to 1.15 format */
5588       return (q15_t) (y >> 20);
5589     }
5590   }
5591 
5592 
5593   /**
5594    *
5595    * @brief  Process function for the Q7 Linear Interpolation Function.
5596    * @param[in] pYData   pointer to Q7 Linear Interpolation table
5597    * @param[in] x        input sample to process
5598    * @param[in] nValues  number of table values
5599    * @return y processed output sample.
5600    *
5601    * \par
5602    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5603    * This function can support maximum of table size 2^12.
5604    */
arm_linear_interp_q7(q7_t * pYData,q31_t x,uint32_t nValues)5605   static __INLINE q7_t arm_linear_interp_q7(
5606   q7_t * pYData,
5607   q31_t x,
5608   uint32_t nValues)
5609   {
5610     q31_t y;                                     /* output */
5611     q7_t y0, y1;                                 /* Nearest output values */
5612     q31_t fract;                                 /* fractional part */
5613     uint32_t index;                              /* Index to read nearest output values */
5614 
5615     /* Input is in 12.20 format */
5616     /* 12 bits for the table index */
5617     /* Index value calculation */
5618     if (x < 0)
5619     {
5620       return (pYData[0]);
5621     }
5622     index = (x >> 20) & 0xfff;
5623 
5624     if(index >= (nValues - 1))
5625     {
5626       return (pYData[nValues - 1]);
5627     }
5628     else
5629     {
5630       /* 20 bits for the fractional part */
5631       /* fract is in 12.20 format */
5632       fract = (x & 0x000FFFFF);
5633 
5634       /* Read two nearest output values from the index and are in 1.7(q7) format */
5635       y0 = pYData[index];
5636       y1 = pYData[index + 1];
5637 
5638       /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */
5639       y = ((y0 * (0xFFFFF - fract)));
5640 
5641       /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
5642       y += (y1 * fract);
5643 
5644       /* convert y to 1.7(q7) format */
5645       return (q7_t) (y >> 20);
5646      }
5647   }
5648 
5649   /**
5650    * @} end of LinearInterpolate group
5651    */
5652 
5653   /**
5654    * @brief  Fast approximation to the trigonometric sine function for floating-point data.
5655    * @param[in] x  input value in radians.
5656    * @return  sin(x).
5657    */
5658   float32_t arm_sin_f32(
5659   float32_t x);
5660 
5661 
5662   /**
5663    * @brief  Fast approximation to the trigonometric sine function for Q31 data.
5664    * @param[in] x  Scaled input value in radians.
5665    * @return  sin(x).
5666    */
5667   q31_t arm_sin_q31(
5668   q31_t x);
5669 
5670 
5671   /**
5672    * @brief  Fast approximation to the trigonometric sine function for Q15 data.
5673    * @param[in] x  Scaled input value in radians.
5674    * @return  sin(x).
5675    */
5676   q15_t arm_sin_q15(
5677   q15_t x);
5678 
5679 
5680   /**
5681    * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
5682    * @param[in] x  input value in radians.
5683    * @return  cos(x).
5684    */
5685   float32_t arm_cos_f32(
5686   float32_t x);
5687 
5688 
5689   /**
5690    * @brief Fast approximation to the trigonometric cosine function for Q31 data.
5691    * @param[in] x  Scaled input value in radians.
5692    * @return  cos(x).
5693    */
5694   q31_t arm_cos_q31(
5695   q31_t x);
5696 
5697 
5698   /**
5699    * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
5700    * @param[in] x  Scaled input value in radians.
5701    * @return  cos(x).
5702    */
5703   q15_t arm_cos_q15(
5704   q15_t x);
5705 
5706 
5707   /**
5708    * @ingroup groupFastMath
5709    */
5710 
5711 
5712   /**
5713    * @defgroup SQRT Square Root
5714    *
5715    * Computes the square root of a number.
5716    * There are separate functions for Q15, Q31, and floating-point data types.
5717    * The square root function is computed using the Newton-Raphson algorithm.
5718    * This is an iterative algorithm of the form:
5719    * <pre>
5720    *      x1 = x0 - f(x0)/f'(x0)
5721    * </pre>
5722    * where <code>x1</code> is the current estimate,
5723    * <code>x0</code> is the previous estimate, and
5724    * <code>f'(x0)</code> is the derivative of <code>f()</code> evaluated at <code>x0</code>.
5725    * For the square root function, the algorithm reduces to:
5726    * <pre>
5727    *     x0 = in/2                         [initial guess]
5728    *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
5729    * </pre>
5730    */
5731 
5732 
5733   /**
5734    * @addtogroup SQRT
5735    * @{
5736    */
5737 
5738   /**
5739    * @brief  Floating-point square root function.
5740    * @param[in]  in    input value.
5741    * @param[out] pOut  square root of input value.
5742    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5743    * <code>in</code> is negative value and returns zero output for negative values.
5744    */
arm_sqrt_f32(float32_t in,float32_t * pOut)5745   static __INLINE arm_status arm_sqrt_f32(
5746   float32_t in,
5747   float32_t * pOut)
5748   {
5749     if(in >= 0.0f)
5750     {
5751 
5752 #if   (__FPU_USED == 1) && defined ( __CC_ARM   )
5753       *pOut = __sqrtf(in);
5754 #elif (__FPU_USED == 1) && (defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050))
5755       *pOut = __builtin_sqrtf(in);
5756 #elif (__FPU_USED == 1) && defined(__GNUC__)
5757       *pOut = __builtin_sqrtf(in);
5758 #elif (__FPU_USED == 1) && defined ( __ICCARM__ ) && (__VER__ >= 6040000)
5759       __ASM("VSQRT.F32 %0,%1" : "=t"(*pOut) : "t"(in));
5760 #else
5761       *pOut = sqrtf(in);
5762 #endif
5763 
5764       return (ARM_MATH_SUCCESS);
5765     }
5766     else
5767     {
5768       *pOut = 0.0f;
5769       return (ARM_MATH_ARGUMENT_ERROR);
5770     }
5771   }
5772 
5773 
5774   /**
5775    * @brief Q31 square root function.
5776    * @param[in]  in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
5777    * @param[out] pOut  square root of input value.
5778    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5779    * <code>in</code> is negative value and returns zero output for negative values.
5780    */
5781   arm_status arm_sqrt_q31(
5782   q31_t in,
5783   q31_t * pOut);
5784 
5785 
5786   /**
5787    * @brief  Q15 square root function.
5788    * @param[in]  in    input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
5789    * @param[out] pOut  square root of input value.
5790    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5791    * <code>in</code> is negative value and returns zero output for negative values.
5792    */
5793   arm_status arm_sqrt_q15(
5794   q15_t in,
5795   q15_t * pOut);
5796 
5797   /**
5798    * @} end of SQRT group
5799    */
5800 
5801 
5802   /**
5803    * @brief floating-point Circular write function.
5804    */
arm_circularWrite_f32(int32_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const int32_t * src,int32_t srcInc,uint32_t blockSize)5805   static __INLINE void arm_circularWrite_f32(
5806   int32_t * circBuffer,
5807   int32_t L,
5808   uint16_t * writeOffset,
5809   int32_t bufferInc,
5810   const int32_t * src,
5811   int32_t srcInc,
5812   uint32_t blockSize)
5813   {
5814     uint32_t i = 0u;
5815     int32_t wOffset;
5816 
5817     /* Copy the value of Index pointer that points
5818      * to the current location where the input samples to be copied */
5819     wOffset = *writeOffset;
5820 
5821     /* Loop over the blockSize */
5822     i = blockSize;
5823 
5824     while(i > 0u)
5825     {
5826       /* copy the input sample to the circular buffer */
5827       circBuffer[wOffset] = *src;
5828 
5829       /* Update the input pointer */
5830       src += srcInc;
5831 
5832       /* Circularly update wOffset.  Watch out for positive and negative value */
5833       wOffset += bufferInc;
5834       if(wOffset >= L)
5835         wOffset -= L;
5836 
5837       /* Decrement the loop counter */
5838       i--;
5839     }
5840 
5841     /* Update the index pointer */
5842     *writeOffset = (uint16_t)wOffset;
5843   }
5844 
5845 
5846 
5847   /**
5848    * @brief floating-point Circular Read function.
5849    */
arm_circularRead_f32(int32_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,int32_t * dst,int32_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)5850   static __INLINE void arm_circularRead_f32(
5851   int32_t * circBuffer,
5852   int32_t L,
5853   int32_t * readOffset,
5854   int32_t bufferInc,
5855   int32_t * dst,
5856   int32_t * dst_base,
5857   int32_t dst_length,
5858   int32_t dstInc,
5859   uint32_t blockSize)
5860   {
5861     uint32_t i = 0u;
5862     int32_t rOffset, dst_end;
5863 
5864     /* Copy the value of Index pointer that points
5865      * to the current location from where the input samples to be read */
5866     rOffset = *readOffset;
5867     dst_end = (int32_t) (dst_base + dst_length);
5868 
5869     /* Loop over the blockSize */
5870     i = blockSize;
5871 
5872     while(i > 0u)
5873     {
5874       /* copy the sample from the circular buffer to the destination buffer */
5875       *dst = circBuffer[rOffset];
5876 
5877       /* Update the input pointer */
5878       dst += dstInc;
5879 
5880       if(dst == (int32_t *) dst_end)
5881       {
5882         dst = dst_base;
5883       }
5884 
5885       /* Circularly update rOffset.  Watch out for positive and negative value  */
5886       rOffset += bufferInc;
5887 
5888       if(rOffset >= L)
5889       {
5890         rOffset -= L;
5891       }
5892 
5893       /* Decrement the loop counter */
5894       i--;
5895     }
5896 
5897     /* Update the index pointer */
5898     *readOffset = rOffset;
5899   }
5900 
5901 
5902   /**
5903    * @brief Q15 Circular write function.
5904    */
arm_circularWrite_q15(q15_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q15_t * src,int32_t srcInc,uint32_t blockSize)5905   static __INLINE void arm_circularWrite_q15(
5906   q15_t * circBuffer,
5907   int32_t L,
5908   uint16_t * writeOffset,
5909   int32_t bufferInc,
5910   const q15_t * src,
5911   int32_t srcInc,
5912   uint32_t blockSize)
5913   {
5914     uint32_t i = 0u;
5915     int32_t wOffset;
5916 
5917     /* Copy the value of Index pointer that points
5918      * to the current location where the input samples to be copied */
5919     wOffset = *writeOffset;
5920 
5921     /* Loop over the blockSize */
5922     i = blockSize;
5923 
5924     while(i > 0u)
5925     {
5926       /* copy the input sample to the circular buffer */
5927       circBuffer[wOffset] = *src;
5928 
5929       /* Update the input pointer */
5930       src += srcInc;
5931 
5932       /* Circularly update wOffset.  Watch out for positive and negative value */
5933       wOffset += bufferInc;
5934       if(wOffset >= L)
5935         wOffset -= L;
5936 
5937       /* Decrement the loop counter */
5938       i--;
5939     }
5940 
5941     /* Update the index pointer */
5942     *writeOffset = (uint16_t)wOffset;
5943   }
5944 
5945 
5946   /**
5947    * @brief Q15 Circular Read function.
5948    */
arm_circularRead_q15(q15_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q15_t * dst,q15_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)5949   static __INLINE void arm_circularRead_q15(
5950   q15_t * circBuffer,
5951   int32_t L,
5952   int32_t * readOffset,
5953   int32_t bufferInc,
5954   q15_t * dst,
5955   q15_t * dst_base,
5956   int32_t dst_length,
5957   int32_t dstInc,
5958   uint32_t blockSize)
5959   {
5960     uint32_t i = 0;
5961     int32_t rOffset, dst_end;
5962 
5963     /* Copy the value of Index pointer that points
5964      * to the current location from where the input samples to be read */
5965     rOffset = *readOffset;
5966 
5967     dst_end = (int32_t) (dst_base + dst_length);
5968 
5969     /* Loop over the blockSize */
5970     i = blockSize;
5971 
5972     while(i > 0u)
5973     {
5974       /* copy the sample from the circular buffer to the destination buffer */
5975       *dst = circBuffer[rOffset];
5976 
5977       /* Update the input pointer */
5978       dst += dstInc;
5979 
5980       if(dst == (q15_t *) dst_end)
5981       {
5982         dst = dst_base;
5983       }
5984 
5985       /* Circularly update wOffset.  Watch out for positive and negative value */
5986       rOffset += bufferInc;
5987 
5988       if(rOffset >= L)
5989       {
5990         rOffset -= L;
5991       }
5992 
5993       /* Decrement the loop counter */
5994       i--;
5995     }
5996 
5997     /* Update the index pointer */
5998     *readOffset = rOffset;
5999   }
6000 
6001 
6002   /**
6003    * @brief Q7 Circular write function.
6004    */
arm_circularWrite_q7(q7_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q7_t * src,int32_t srcInc,uint32_t blockSize)6005   static __INLINE void arm_circularWrite_q7(
6006   q7_t * circBuffer,
6007   int32_t L,
6008   uint16_t * writeOffset,
6009   int32_t bufferInc,
6010   const q7_t * src,
6011   int32_t srcInc,
6012   uint32_t blockSize)
6013   {
6014     uint32_t i = 0u;
6015     int32_t wOffset;
6016 
6017     /* Copy the value of Index pointer that points
6018      * to the current location where the input samples to be copied */
6019     wOffset = *writeOffset;
6020 
6021     /* Loop over the blockSize */
6022     i = blockSize;
6023 
6024     while(i > 0u)
6025     {
6026       /* copy the input sample to the circular buffer */
6027       circBuffer[wOffset] = *src;
6028 
6029       /* Update the input pointer */
6030       src += srcInc;
6031 
6032       /* Circularly update wOffset.  Watch out for positive and negative value */
6033       wOffset += bufferInc;
6034       if(wOffset >= L)
6035         wOffset -= L;
6036 
6037       /* Decrement the loop counter */
6038       i--;
6039     }
6040 
6041     /* Update the index pointer */
6042     *writeOffset = (uint16_t)wOffset;
6043   }
6044 
6045 
6046   /**
6047    * @brief Q7 Circular Read function.
6048    */
arm_circularRead_q7(q7_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q7_t * dst,q7_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)6049   static __INLINE void arm_circularRead_q7(
6050   q7_t * circBuffer,
6051   int32_t L,
6052   int32_t * readOffset,
6053   int32_t bufferInc,
6054   q7_t * dst,
6055   q7_t * dst_base,
6056   int32_t dst_length,
6057   int32_t dstInc,
6058   uint32_t blockSize)
6059   {
6060     uint32_t i = 0;
6061     int32_t rOffset, dst_end;
6062 
6063     /* Copy the value of Index pointer that points
6064      * to the current location from where the input samples to be read */
6065     rOffset = *readOffset;
6066 
6067     dst_end = (int32_t) (dst_base + dst_length);
6068 
6069     /* Loop over the blockSize */
6070     i = blockSize;
6071 
6072     while(i > 0u)
6073     {
6074       /* copy the sample from the circular buffer to the destination buffer */
6075       *dst = circBuffer[rOffset];
6076 
6077       /* Update the input pointer */
6078       dst += dstInc;
6079 
6080       if(dst == (q7_t *) dst_end)
6081       {
6082         dst = dst_base;
6083       }
6084 
6085       /* Circularly update rOffset.  Watch out for positive and negative value */
6086       rOffset += bufferInc;
6087 
6088       if(rOffset >= L)
6089       {
6090         rOffset -= L;
6091       }
6092 
6093       /* Decrement the loop counter */
6094       i--;
6095     }
6096 
6097     /* Update the index pointer */
6098     *readOffset = rOffset;
6099   }
6100 
6101 
6102   /**
6103    * @brief  Sum of the squares of the elements of a Q31 vector.
6104    * @param[in]  pSrc       is input pointer
6105    * @param[in]  blockSize  is the number of samples to process
6106    * @param[out] pResult    is output value.
6107    */
6108   void arm_power_q31(
6109   q31_t * pSrc,
6110   uint32_t blockSize,
6111   q63_t * pResult);
6112 
6113 
6114   /**
6115    * @brief  Sum of the squares of the elements of a floating-point vector.
6116    * @param[in]  pSrc       is input pointer
6117    * @param[in]  blockSize  is the number of samples to process
6118    * @param[out] pResult    is output value.
6119    */
6120   void arm_power_f32(
6121   float32_t * pSrc,
6122   uint32_t blockSize,
6123   float32_t * pResult);
6124 
6125 
6126   /**
6127    * @brief  Sum of the squares of the elements of a Q15 vector.
6128    * @param[in]  pSrc       is input pointer
6129    * @param[in]  blockSize  is the number of samples to process
6130    * @param[out] pResult    is output value.
6131    */
6132   void arm_power_q15(
6133   q15_t * pSrc,
6134   uint32_t blockSize,
6135   q63_t * pResult);
6136 
6137 
6138   /**
6139    * @brief  Sum of the squares of the elements of a Q7 vector.
6140    * @param[in]  pSrc       is input pointer
6141    * @param[in]  blockSize  is the number of samples to process
6142    * @param[out] pResult    is output value.
6143    */
6144   void arm_power_q7(
6145   q7_t * pSrc,
6146   uint32_t blockSize,
6147   q31_t * pResult);
6148 
6149 
6150   /**
6151    * @brief  Mean value of a Q7 vector.
6152    * @param[in]  pSrc       is input pointer
6153    * @param[in]  blockSize  is the number of samples to process
6154    * @param[out] pResult    is output value.
6155    */
6156   void arm_mean_q7(
6157   q7_t * pSrc,
6158   uint32_t blockSize,
6159   q7_t * pResult);
6160 
6161 
6162   /**
6163    * @brief  Mean value of a Q15 vector.
6164    * @param[in]  pSrc       is input pointer
6165    * @param[in]  blockSize  is the number of samples to process
6166    * @param[out] pResult    is output value.
6167    */
6168   void arm_mean_q15(
6169   q15_t * pSrc,
6170   uint32_t blockSize,
6171   q15_t * pResult);
6172 
6173 
6174   /**
6175    * @brief  Mean value of a Q31 vector.
6176    * @param[in]  pSrc       is input pointer
6177    * @param[in]  blockSize  is the number of samples to process
6178    * @param[out] pResult    is output value.
6179    */
6180   void arm_mean_q31(
6181   q31_t * pSrc,
6182   uint32_t blockSize,
6183   q31_t * pResult);
6184 
6185 
6186   /**
6187    * @brief  Mean value of a floating-point vector.
6188    * @param[in]  pSrc       is input pointer
6189    * @param[in]  blockSize  is the number of samples to process
6190    * @param[out] pResult    is output value.
6191    */
6192   void arm_mean_f32(
6193   float32_t * pSrc,
6194   uint32_t blockSize,
6195   float32_t * pResult);
6196 
6197 
6198   /**
6199    * @brief  Variance of the elements of a floating-point vector.
6200    * @param[in]  pSrc       is input pointer
6201    * @param[in]  blockSize  is the number of samples to process
6202    * @param[out] pResult    is output value.
6203    */
6204   void arm_var_f32(
6205   float32_t * pSrc,
6206   uint32_t blockSize,
6207   float32_t * pResult);
6208 
6209 
6210   /**
6211    * @brief  Variance of the elements of a Q31 vector.
6212    * @param[in]  pSrc       is input pointer
6213    * @param[in]  blockSize  is the number of samples to process
6214    * @param[out] pResult    is output value.
6215    */
6216   void arm_var_q31(
6217   q31_t * pSrc,
6218   uint32_t blockSize,
6219   q31_t * pResult);
6220 
6221 
6222   /**
6223    * @brief  Variance of the elements of a Q15 vector.
6224    * @param[in]  pSrc       is input pointer
6225    * @param[in]  blockSize  is the number of samples to process
6226    * @param[out] pResult    is output value.
6227    */
6228   void arm_var_q15(
6229   q15_t * pSrc,
6230   uint32_t blockSize,
6231   q15_t * pResult);
6232 
6233 
6234   /**
6235    * @brief  Root Mean Square of the elements of a floating-point vector.
6236    * @param[in]  pSrc       is input pointer
6237    * @param[in]  blockSize  is the number of samples to process
6238    * @param[out] pResult    is output value.
6239    */
6240   void arm_rms_f32(
6241   float32_t * pSrc,
6242   uint32_t blockSize,
6243   float32_t * pResult);
6244 
6245 
6246   /**
6247    * @brief  Root Mean Square of the elements of a Q31 vector.
6248    * @param[in]  pSrc       is input pointer
6249    * @param[in]  blockSize  is the number of samples to process
6250    * @param[out] pResult    is output value.
6251    */
6252   void arm_rms_q31(
6253   q31_t * pSrc,
6254   uint32_t blockSize,
6255   q31_t * pResult);
6256 
6257 
6258   /**
6259    * @brief  Root Mean Square of the elements of a Q15 vector.
6260    * @param[in]  pSrc       is input pointer
6261    * @param[in]  blockSize  is the number of samples to process
6262    * @param[out] pResult    is output value.
6263    */
6264   void arm_rms_q15(
6265   q15_t * pSrc,
6266   uint32_t blockSize,
6267   q15_t * pResult);
6268 
6269 
6270   /**
6271    * @brief  Standard deviation of the elements of a floating-point vector.
6272    * @param[in]  pSrc       is input pointer
6273    * @param[in]  blockSize  is the number of samples to process
6274    * @param[out] pResult    is output value.
6275    */
6276   void arm_std_f32(
6277   float32_t * pSrc,
6278   uint32_t blockSize,
6279   float32_t * pResult);
6280 
6281 
6282   /**
6283    * @brief  Standard deviation of the elements of a Q31 vector.
6284    * @param[in]  pSrc       is input pointer
6285    * @param[in]  blockSize  is the number of samples to process
6286    * @param[out] pResult    is output value.
6287    */
6288   void arm_std_q31(
6289   q31_t * pSrc,
6290   uint32_t blockSize,
6291   q31_t * pResult);
6292 
6293 
6294   /**
6295    * @brief  Standard deviation of the elements of a Q15 vector.
6296    * @param[in]  pSrc       is input pointer
6297    * @param[in]  blockSize  is the number of samples to process
6298    * @param[out] pResult    is output value.
6299    */
6300   void arm_std_q15(
6301   q15_t * pSrc,
6302   uint32_t blockSize,
6303   q15_t * pResult);
6304 
6305 
6306   /**
6307    * @brief  Floating-point complex magnitude
6308    * @param[in]  pSrc        points to the complex input vector
6309    * @param[out] pDst        points to the real output vector
6310    * @param[in]  numSamples  number of complex samples in the input vector
6311    */
6312   void arm_cmplx_mag_f32(
6313   float32_t * pSrc,
6314   float32_t * pDst,
6315   uint32_t numSamples);
6316 
6317 
6318   /**
6319    * @brief  Q31 complex magnitude
6320    * @param[in]  pSrc        points to the complex input vector
6321    * @param[out] pDst        points to the real output vector
6322    * @param[in]  numSamples  number of complex samples in the input vector
6323    */
6324   void arm_cmplx_mag_q31(
6325   q31_t * pSrc,
6326   q31_t * pDst,
6327   uint32_t numSamples);
6328 
6329 
6330   /**
6331    * @brief  Q15 complex magnitude
6332    * @param[in]  pSrc        points to the complex input vector
6333    * @param[out] pDst        points to the real output vector
6334    * @param[in]  numSamples  number of complex samples in the input vector
6335    */
6336   void arm_cmplx_mag_q15(
6337   q15_t * pSrc,
6338   q15_t * pDst,
6339   uint32_t numSamples);
6340 
6341 
6342   /**
6343    * @brief  Q15 complex dot product
6344    * @param[in]  pSrcA       points to the first input vector
6345    * @param[in]  pSrcB       points to the second input vector
6346    * @param[in]  numSamples  number of complex samples in each vector
6347    * @param[out] realResult  real part of the result returned here
6348    * @param[out] imagResult  imaginary part of the result returned here
6349    */
6350   void arm_cmplx_dot_prod_q15(
6351   q15_t * pSrcA,
6352   q15_t * pSrcB,
6353   uint32_t numSamples,
6354   q31_t * realResult,
6355   q31_t * imagResult);
6356 
6357 
6358   /**
6359    * @brief  Q31 complex dot product
6360    * @param[in]  pSrcA       points to the first input vector
6361    * @param[in]  pSrcB       points to the second input vector
6362    * @param[in]  numSamples  number of complex samples in each vector
6363    * @param[out] realResult  real part of the result returned here
6364    * @param[out] imagResult  imaginary part of the result returned here
6365    */
6366   void arm_cmplx_dot_prod_q31(
6367   q31_t * pSrcA,
6368   q31_t * pSrcB,
6369   uint32_t numSamples,
6370   q63_t * realResult,
6371   q63_t * imagResult);
6372 
6373 
6374   /**
6375    * @brief  Floating-point complex dot product
6376    * @param[in]  pSrcA       points to the first input vector
6377    * @param[in]  pSrcB       points to the second input vector
6378    * @param[in]  numSamples  number of complex samples in each vector
6379    * @param[out] realResult  real part of the result returned here
6380    * @param[out] imagResult  imaginary part of the result returned here
6381    */
6382   void arm_cmplx_dot_prod_f32(
6383   float32_t * pSrcA,
6384   float32_t * pSrcB,
6385   uint32_t numSamples,
6386   float32_t * realResult,
6387   float32_t * imagResult);
6388 
6389 
6390   /**
6391    * @brief  Q15 complex-by-real multiplication
6392    * @param[in]  pSrcCmplx   points to the complex input vector
6393    * @param[in]  pSrcReal    points to the real input vector
6394    * @param[out] pCmplxDst   points to the complex output vector
6395    * @param[in]  numSamples  number of samples in each vector
6396    */
6397   void arm_cmplx_mult_real_q15(
6398   q15_t * pSrcCmplx,
6399   q15_t * pSrcReal,
6400   q15_t * pCmplxDst,
6401   uint32_t numSamples);
6402 
6403 
6404   /**
6405    * @brief  Q31 complex-by-real multiplication
6406    * @param[in]  pSrcCmplx   points to the complex input vector
6407    * @param[in]  pSrcReal    points to the real input vector
6408    * @param[out] pCmplxDst   points to the complex output vector
6409    * @param[in]  numSamples  number of samples in each vector
6410    */
6411   void arm_cmplx_mult_real_q31(
6412   q31_t * pSrcCmplx,
6413   q31_t * pSrcReal,
6414   q31_t * pCmplxDst,
6415   uint32_t numSamples);
6416 
6417 
6418   /**
6419    * @brief  Floating-point complex-by-real multiplication
6420    * @param[in]  pSrcCmplx   points to the complex input vector
6421    * @param[in]  pSrcReal    points to the real input vector
6422    * @param[out] pCmplxDst   points to the complex output vector
6423    * @param[in]  numSamples  number of samples in each vector
6424    */
6425   void arm_cmplx_mult_real_f32(
6426   float32_t * pSrcCmplx,
6427   float32_t * pSrcReal,
6428   float32_t * pCmplxDst,
6429   uint32_t numSamples);
6430 
6431 
6432   /**
6433    * @brief  Minimum value of a Q7 vector.
6434    * @param[in]  pSrc       is input pointer
6435    * @param[in]  blockSize  is the number of samples to process
6436    * @param[out] result     is output pointer
6437    * @param[in]  index      is the array index of the minimum value in the input buffer.
6438    */
6439   void arm_min_q7(
6440   q7_t * pSrc,
6441   uint32_t blockSize,
6442   q7_t * result,
6443   uint32_t * index);
6444 
6445 
6446   /**
6447    * @brief  Minimum value of a Q15 vector.
6448    * @param[in]  pSrc       is input pointer
6449    * @param[in]  blockSize  is the number of samples to process
6450    * @param[out] pResult    is output pointer
6451    * @param[in]  pIndex     is the array index of the minimum value in the input buffer.
6452    */
6453   void arm_min_q15(
6454   q15_t * pSrc,
6455   uint32_t blockSize,
6456   q15_t * pResult,
6457   uint32_t * pIndex);
6458 
6459 
6460   /**
6461    * @brief  Minimum value of a Q31 vector.
6462    * @param[in]  pSrc       is input pointer
6463    * @param[in]  blockSize  is the number of samples to process
6464    * @param[out] pResult    is output pointer
6465    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
6466    */
6467   void arm_min_q31(
6468   q31_t * pSrc,
6469   uint32_t blockSize,
6470   q31_t * pResult,
6471   uint32_t * pIndex);
6472 
6473 
6474   /**
6475    * @brief  Minimum value of a floating-point vector.
6476    * @param[in]  pSrc       is input pointer
6477    * @param[in]  blockSize  is the number of samples to process
6478    * @param[out] pResult    is output pointer
6479    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
6480    */
6481   void arm_min_f32(
6482   float32_t * pSrc,
6483   uint32_t blockSize,
6484   float32_t * pResult,
6485   uint32_t * pIndex);
6486 
6487 
6488 /**
6489  * @brief Maximum value of a Q7 vector.
6490  * @param[in]  pSrc       points to the input buffer
6491  * @param[in]  blockSize  length of the input vector
6492  * @param[out] pResult    maximum value returned here
6493  * @param[out] pIndex     index of maximum value returned here
6494  */
6495   void arm_max_q7(
6496   q7_t * pSrc,
6497   uint32_t blockSize,
6498   q7_t * pResult,
6499   uint32_t * pIndex);
6500 
6501 
6502 /**
6503  * @brief Maximum value of a Q15 vector.
6504  * @param[in]  pSrc       points to the input buffer
6505  * @param[in]  blockSize  length of the input vector
6506  * @param[out] pResult    maximum value returned here
6507  * @param[out] pIndex     index of maximum value returned here
6508  */
6509   void arm_max_q15(
6510   q15_t * pSrc,
6511   uint32_t blockSize,
6512   q15_t * pResult,
6513   uint32_t * pIndex);
6514 
6515 
6516 /**
6517  * @brief Maximum value of a Q31 vector.
6518  * @param[in]  pSrc       points to the input buffer
6519  * @param[in]  blockSize  length of the input vector
6520  * @param[out] pResult    maximum value returned here
6521  * @param[out] pIndex     index of maximum value returned here
6522  */
6523   void arm_max_q31(
6524   q31_t * pSrc,
6525   uint32_t blockSize,
6526   q31_t * pResult,
6527   uint32_t * pIndex);
6528 
6529 
6530 /**
6531  * @brief Maximum value of a floating-point vector.
6532  * @param[in]  pSrc       points to the input buffer
6533  * @param[in]  blockSize  length of the input vector
6534  * @param[out] pResult    maximum value returned here
6535  * @param[out] pIndex     index of maximum value returned here
6536  */
6537   void arm_max_f32(
6538   float32_t * pSrc,
6539   uint32_t blockSize,
6540   float32_t * pResult,
6541   uint32_t * pIndex);
6542 
6543 
6544   /**
6545    * @brief  Q15 complex-by-complex multiplication
6546    * @param[in]  pSrcA       points to the first input vector
6547    * @param[in]  pSrcB       points to the second input vector
6548    * @param[out] pDst        points to the output vector
6549    * @param[in]  numSamples  number of complex samples in each vector
6550    */
6551   void arm_cmplx_mult_cmplx_q15(
6552   q15_t * pSrcA,
6553   q15_t * pSrcB,
6554   q15_t * pDst,
6555   uint32_t numSamples);
6556 
6557 
6558   /**
6559    * @brief  Q31 complex-by-complex multiplication
6560    * @param[in]  pSrcA       points to the first input vector
6561    * @param[in]  pSrcB       points to the second input vector
6562    * @param[out] pDst        points to the output vector
6563    * @param[in]  numSamples  number of complex samples in each vector
6564    */
6565   void arm_cmplx_mult_cmplx_q31(
6566   q31_t * pSrcA,
6567   q31_t * pSrcB,
6568   q31_t * pDst,
6569   uint32_t numSamples);
6570 
6571 
6572   /**
6573    * @brief  Floating-point complex-by-complex multiplication
6574    * @param[in]  pSrcA       points to the first input vector
6575    * @param[in]  pSrcB       points to the second input vector
6576    * @param[out] pDst        points to the output vector
6577    * @param[in]  numSamples  number of complex samples in each vector
6578    */
6579   void arm_cmplx_mult_cmplx_f32(
6580   float32_t * pSrcA,
6581   float32_t * pSrcB,
6582   float32_t * pDst,
6583   uint32_t numSamples);
6584 
6585 
6586   /**
6587    * @brief Converts the elements of the floating-point vector to Q31 vector.
6588    * @param[in]  pSrc       points to the floating-point input vector
6589    * @param[out] pDst       points to the Q31 output vector
6590    * @param[in]  blockSize  length of the input vector
6591    */
6592   void arm_float_to_q31(
6593   float32_t * pSrc,
6594   q31_t * pDst,
6595   uint32_t blockSize);
6596 
6597 
6598   /**
6599    * @brief Converts the elements of the floating-point vector to Q15 vector.
6600    * @param[in]  pSrc       points to the floating-point input vector
6601    * @param[out] pDst       points to the Q15 output vector
6602    * @param[in]  blockSize  length of the input vector
6603    */
6604   void arm_float_to_q15(
6605   float32_t * pSrc,
6606   q15_t * pDst,
6607   uint32_t blockSize);
6608 
6609 
6610   /**
6611    * @brief Converts the elements of the floating-point vector to Q7 vector.
6612    * @param[in]  pSrc       points to the floating-point input vector
6613    * @param[out] pDst       points to the Q7 output vector
6614    * @param[in]  blockSize  length of the input vector
6615    */
6616   void arm_float_to_q7(
6617   float32_t * pSrc,
6618   q7_t * pDst,
6619   uint32_t blockSize);
6620 
6621 
6622   /**
6623    * @brief  Converts the elements of the Q31 vector to Q15 vector.
6624    * @param[in]  pSrc       is input pointer
6625    * @param[out] pDst       is output pointer
6626    * @param[in]  blockSize  is the number of samples to process
6627    */
6628   void arm_q31_to_q15(
6629   q31_t * pSrc,
6630   q15_t * pDst,
6631   uint32_t blockSize);
6632 
6633 
6634   /**
6635    * @brief  Converts the elements of the Q31 vector to Q7 vector.
6636    * @param[in]  pSrc       is input pointer
6637    * @param[out] pDst       is output pointer
6638    * @param[in]  blockSize  is the number of samples to process
6639    */
6640   void arm_q31_to_q7(
6641   q31_t * pSrc,
6642   q7_t * pDst,
6643   uint32_t blockSize);
6644 
6645 
6646   /**
6647    * @brief  Converts the elements of the Q15 vector to floating-point vector.
6648    * @param[in]  pSrc       is input pointer
6649    * @param[out] pDst       is output pointer
6650    * @param[in]  blockSize  is the number of samples to process
6651    */
6652   void arm_q15_to_float(
6653   q15_t * pSrc,
6654   float32_t * pDst,
6655   uint32_t blockSize);
6656 
6657 
6658   /**
6659    * @brief  Converts the elements of the Q15 vector to Q31 vector.
6660    * @param[in]  pSrc       is input pointer
6661    * @param[out] pDst       is output pointer
6662    * @param[in]  blockSize  is the number of samples to process
6663    */
6664   void arm_q15_to_q31(
6665   q15_t * pSrc,
6666   q31_t * pDst,
6667   uint32_t blockSize);
6668 
6669 
6670   /**
6671    * @brief  Converts the elements of the Q15 vector to Q7 vector.
6672    * @param[in]  pSrc       is input pointer
6673    * @param[out] pDst       is output pointer
6674    * @param[in]  blockSize  is the number of samples to process
6675    */
6676   void arm_q15_to_q7(
6677   q15_t * pSrc,
6678   q7_t * pDst,
6679   uint32_t blockSize);
6680 
6681 
6682   /**
6683    * @ingroup groupInterpolation
6684    */
6685 
6686   /**
6687    * @defgroup BilinearInterpolate Bilinear Interpolation
6688    *
6689    * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
6690    * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
6691    * determines values between the grid points.
6692    * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension.
6693    * Bilinear interpolation is often used in image processing to rescale images.
6694    * The CMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
6695    *
6696    * <b>Algorithm</b>
6697    * \par
6698    * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
6699    * For floating-point, the instance structure is defined as:
6700    * <pre>
6701    *   typedef struct
6702    *   {
6703    *     uint16_t numRows;
6704    *     uint16_t numCols;
6705    *     float32_t *pData;
6706    * } arm_bilinear_interp_instance_f32;
6707    * </pre>
6708    *
6709    * \par
6710    * where <code>numRows</code> specifies the number of rows in the table;
6711    * <code>numCols</code> specifies the number of columns in the table;
6712    * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
6713    * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
6714    * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
6715    *
6716    * \par
6717    * Let <code>(x, y)</code> specify the desired interpolation point.  Then define:
6718    * <pre>
6719    *     XF = floor(x)
6720    *     YF = floor(y)
6721    * </pre>
6722    * \par
6723    * The interpolated output point is computed as:
6724    * <pre>
6725    *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
6726    *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
6727    *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
6728    *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
6729    * </pre>
6730    * Note that the coordinates (x, y) contain integer and fractional components.
6731    * The integer components specify which portion of the table to use while the
6732    * fractional components control the interpolation processor.
6733    *
6734    * \par
6735    * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
6736    */
6737 
6738   /**
6739    * @addtogroup BilinearInterpolate
6740    * @{
6741    */
6742 
6743 
6744   /**
6745   *
6746   * @brief  Floating-point bilinear interpolation.
6747   * @param[in,out] S  points to an instance of the interpolation structure.
6748   * @param[in]     X  interpolation coordinate.
6749   * @param[in]     Y  interpolation coordinate.
6750   * @return out interpolated value.
6751   */
arm_bilinear_interp_f32(const arm_bilinear_interp_instance_f32 * S,float32_t X,float32_t Y)6752   static __INLINE float32_t arm_bilinear_interp_f32(
6753   const arm_bilinear_interp_instance_f32 * S,
6754   float32_t X,
6755   float32_t Y)
6756   {
6757     float32_t out;
6758     float32_t f00, f01, f10, f11;
6759     float32_t *pData = S->pData;
6760     int32_t xIndex, yIndex, index;
6761     float32_t xdiff, ydiff;
6762     float32_t b1, b2, b3, b4;
6763 
6764     xIndex = (int32_t) X;
6765     yIndex = (int32_t) Y;
6766 
6767     /* Care taken for table outside boundary */
6768     /* Returns zero output when values are outside table boundary */
6769     if(xIndex < 0 || xIndex > (S->numRows - 1) || yIndex < 0 || yIndex > (S->numCols - 1))
6770     {
6771       return (0);
6772     }
6773 
6774     /* Calculation of index for two nearest points in X-direction */
6775     index = (xIndex - 1) + (yIndex - 1) * S->numCols;
6776 
6777 
6778     /* Read two nearest points in X-direction */
6779     f00 = pData[index];
6780     f01 = pData[index + 1];
6781 
6782     /* Calculation of index for two nearest points in Y-direction */
6783     index = (xIndex - 1) + (yIndex) * S->numCols;
6784 
6785 
6786     /* Read two nearest points in Y-direction */
6787     f10 = pData[index];
6788     f11 = pData[index + 1];
6789 
6790     /* Calculation of intermediate values */
6791     b1 = f00;
6792     b2 = f01 - f00;
6793     b3 = f10 - f00;
6794     b4 = f00 - f01 - f10 + f11;
6795 
6796     /* Calculation of fractional part in X */
6797     xdiff = X - xIndex;
6798 
6799     /* Calculation of fractional part in Y */
6800     ydiff = Y - yIndex;
6801 
6802     /* Calculation of bi-linear interpolated output */
6803     out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
6804 
6805     /* return to application */
6806     return (out);
6807   }
6808 
6809 
6810   /**
6811   *
6812   * @brief  Q31 bilinear interpolation.
6813   * @param[in,out] S  points to an instance of the interpolation structure.
6814   * @param[in]     X  interpolation coordinate in 12.20 format.
6815   * @param[in]     Y  interpolation coordinate in 12.20 format.
6816   * @return out interpolated value.
6817   */
arm_bilinear_interp_q31(arm_bilinear_interp_instance_q31 * S,q31_t X,q31_t Y)6818   static __INLINE q31_t arm_bilinear_interp_q31(
6819   arm_bilinear_interp_instance_q31 * S,
6820   q31_t X,
6821   q31_t Y)
6822   {
6823     q31_t out;                                   /* Temporary output */
6824     q31_t acc = 0;                               /* output */
6825     q31_t xfract, yfract;                        /* X, Y fractional parts */
6826     q31_t x1, x2, y1, y2;                        /* Nearest output values */
6827     int32_t rI, cI;                              /* Row and column indices */
6828     q31_t *pYData = S->pData;                    /* pointer to output table values */
6829     uint32_t nCols = S->numCols;                 /* num of rows */
6830 
6831     /* Input is in 12.20 format */
6832     /* 12 bits for the table index */
6833     /* Index value calculation */
6834     rI = ((X & (q31_t)0xFFF00000) >> 20);
6835 
6836     /* Input is in 12.20 format */
6837     /* 12 bits for the table index */
6838     /* Index value calculation */
6839     cI = ((Y & (q31_t)0xFFF00000) >> 20);
6840 
6841     /* Care taken for table outside boundary */
6842     /* Returns zero output when values are outside table boundary */
6843     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
6844     {
6845       return (0);
6846     }
6847 
6848     /* 20 bits for the fractional part */
6849     /* shift left xfract by 11 to keep 1.31 format */
6850     xfract = (X & 0x000FFFFF) << 11u;
6851 
6852     /* Read two nearest output values from the index */
6853     x1 = pYData[(rI) + (int32_t)nCols * (cI)    ];
6854     x2 = pYData[(rI) + (int32_t)nCols * (cI) + 1];
6855 
6856     /* 20 bits for the fractional part */
6857     /* shift left yfract by 11 to keep 1.31 format */
6858     yfract = (Y & 0x000FFFFF) << 11u;
6859 
6860     /* Read two nearest output values from the index */
6861     y1 = pYData[(rI) + (int32_t)nCols * (cI + 1)    ];
6862     y2 = pYData[(rI) + (int32_t)nCols * (cI + 1) + 1];
6863 
6864     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */
6865     out = ((q31_t) (((q63_t) x1  * (0x7FFFFFFF - xfract)) >> 32));
6866     acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32));
6867 
6868     /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
6869     out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32));
6870     acc += ((q31_t) ((q63_t) out * (xfract) >> 32));
6871 
6872     /* y1 * (1 - xfract) * (yfract)  in 3.29(q29) and adding to acc */
6873     out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32));
6874     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
6875 
6876     /* y2 * (xfract) * (yfract)  in 3.29(q29) and adding to acc */
6877     out = ((q31_t) ((q63_t) y2 * (xfract) >> 32));
6878     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
6879 
6880     /* Convert acc to 1.31(q31) format */
6881     return ((q31_t)(acc << 2));
6882   }
6883 
6884 
6885   /**
6886   * @brief  Q15 bilinear interpolation.
6887   * @param[in,out] S  points to an instance of the interpolation structure.
6888   * @param[in]     X  interpolation coordinate in 12.20 format.
6889   * @param[in]     Y  interpolation coordinate in 12.20 format.
6890   * @return out interpolated value.
6891   */
arm_bilinear_interp_q15(arm_bilinear_interp_instance_q15 * S,q31_t X,q31_t Y)6892   static __INLINE q15_t arm_bilinear_interp_q15(
6893   arm_bilinear_interp_instance_q15 * S,
6894   q31_t X,
6895   q31_t Y)
6896   {
6897     q63_t acc = 0;                               /* output */
6898     q31_t out;                                   /* Temporary output */
6899     q15_t x1, x2, y1, y2;                        /* Nearest output values */
6900     q31_t xfract, yfract;                        /* X, Y fractional parts */
6901     int32_t rI, cI;                              /* Row and column indices */
6902     q15_t *pYData = S->pData;                    /* pointer to output table values */
6903     uint32_t nCols = S->numCols;                 /* num of rows */
6904 
6905     /* Input is in 12.20 format */
6906     /* 12 bits for the table index */
6907     /* Index value calculation */
6908     rI = ((X & (q31_t)0xFFF00000) >> 20);
6909 
6910     /* Input is in 12.20 format */
6911     /* 12 bits for the table index */
6912     /* Index value calculation */
6913     cI = ((Y & (q31_t)0xFFF00000) >> 20);
6914 
6915     /* Care taken for table outside boundary */
6916     /* Returns zero output when values are outside table boundary */
6917     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
6918     {
6919       return (0);
6920     }
6921 
6922     /* 20 bits for the fractional part */
6923     /* xfract should be in 12.20 format */
6924     xfract = (X & 0x000FFFFF);
6925 
6926     /* Read two nearest output values from the index */
6927     x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI)    ];
6928     x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
6929 
6930     /* 20 bits for the fractional part */
6931     /* yfract should be in 12.20 format */
6932     yfract = (Y & 0x000FFFFF);
6933 
6934     /* Read two nearest output values from the index */
6935     y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1)    ];
6936     y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
6937 
6938     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */
6939 
6940     /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
6941     /* convert 13.35 to 13.31 by right shifting  and out is in 1.31 */
6942     out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
6943     acc = ((q63_t) out * (0xFFFFF - yfract));
6944 
6945     /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
6946     out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
6947     acc += ((q63_t) out * (xfract));
6948 
6949     /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
6950     out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
6951     acc += ((q63_t) out * (yfract));
6952 
6953     /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
6954     out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
6955     acc += ((q63_t) out * (yfract));
6956 
6957     /* acc is in 13.51 format and down shift acc by 36 times */
6958     /* Convert out to 1.15 format */
6959     return ((q15_t)(acc >> 36));
6960   }
6961 
6962 
6963   /**
6964   * @brief  Q7 bilinear interpolation.
6965   * @param[in,out] S  points to an instance of the interpolation structure.
6966   * @param[in]     X  interpolation coordinate in 12.20 format.
6967   * @param[in]     Y  interpolation coordinate in 12.20 format.
6968   * @return out interpolated value.
6969   */
arm_bilinear_interp_q7(arm_bilinear_interp_instance_q7 * S,q31_t X,q31_t Y)6970   static __INLINE q7_t arm_bilinear_interp_q7(
6971   arm_bilinear_interp_instance_q7 * S,
6972   q31_t X,
6973   q31_t Y)
6974   {
6975     q63_t acc = 0;                               /* output */
6976     q31_t out;                                   /* Temporary output */
6977     q31_t xfract, yfract;                        /* X, Y fractional parts */
6978     q7_t x1, x2, y1, y2;                         /* Nearest output values */
6979     int32_t rI, cI;                              /* Row and column indices */
6980     q7_t *pYData = S->pData;                     /* pointer to output table values */
6981     uint32_t nCols = S->numCols;                 /* num of rows */
6982 
6983     /* Input is in 12.20 format */
6984     /* 12 bits for the table index */
6985     /* Index value calculation */
6986     rI = ((X & (q31_t)0xFFF00000) >> 20);
6987 
6988     /* Input is in 12.20 format */
6989     /* 12 bits for the table index */
6990     /* Index value calculation */
6991     cI = ((Y & (q31_t)0xFFF00000) >> 20);
6992 
6993     /* Care taken for table outside boundary */
6994     /* Returns zero output when values are outside table boundary */
6995     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
6996     {
6997       return (0);
6998     }
6999 
7000     /* 20 bits for the fractional part */
7001     /* xfract should be in 12.20 format */
7002     xfract = (X & (q31_t)0x000FFFFF);
7003 
7004     /* Read two nearest output values from the index */
7005     x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI)    ];
7006     x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
7007 
7008     /* 20 bits for the fractional part */
7009     /* yfract should be in 12.20 format */
7010     yfract = (Y & (q31_t)0x000FFFFF);
7011 
7012     /* Read two nearest output values from the index */
7013     y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1)    ];
7014     y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
7015 
7016     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */
7017     out = ((x1 * (0xFFFFF - xfract)));
7018     acc = (((q63_t) out * (0xFFFFF - yfract)));
7019 
7020     /* x2 * (xfract) * (1-yfract)  in 2.22 and adding to acc */
7021     out = ((x2 * (0xFFFFF - yfract)));
7022     acc += (((q63_t) out * (xfract)));
7023 
7024     /* y1 * (1 - xfract) * (yfract)  in 2.22 and adding to acc */
7025     out = ((y1 * (0xFFFFF - xfract)));
7026     acc += (((q63_t) out * (yfract)));
7027 
7028     /* y2 * (xfract) * (yfract)  in 2.22 and adding to acc */
7029     out = ((y2 * (yfract)));
7030     acc += (((q63_t) out * (xfract)));
7031 
7032     /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
7033     return ((q7_t)(acc >> 40));
7034   }
7035 
7036   /**
7037    * @} end of BilinearInterpolate group
7038    */
7039 
7040 
7041 /* SMMLAR */
7042 #define multAcc_32x32_keep32_R(a, x, y) \
7043     a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
7044 
7045 /* SMMLSR */
7046 #define multSub_32x32_keep32_R(a, x, y) \
7047     a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
7048 
7049 /* SMMULR */
7050 #define mult_32x32_keep32_R(a, x, y) \
7051     a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
7052 
7053 /* SMMLA */
7054 #define multAcc_32x32_keep32(a, x, y) \
7055     a += (q31_t) (((q63_t) x * y) >> 32)
7056 
7057 /* SMMLS */
7058 #define multSub_32x32_keep32(a, x, y) \
7059     a -= (q31_t) (((q63_t) x * y) >> 32)
7060 
7061 /* SMMUL */
7062 #define mult_32x32_keep32(a, x, y) \
7063     a = (q31_t) (((q63_t) x * y ) >> 32)
7064 
7065 
7066 #if defined ( __CC_ARM )
7067   /* Enter low optimization region - place directly above function definition */
7068   #if defined( ARM_MATH_CM4 ) || defined( ARM_MATH_CM7)
7069     #define LOW_OPTIMIZATION_ENTER \
7070        _Pragma ("push")         \
7071        _Pragma ("O1")
7072   #else
7073     #define LOW_OPTIMIZATION_ENTER
7074   #endif
7075 
7076   /* Exit low optimization region - place directly after end of function definition */
7077   #if defined( ARM_MATH_CM4 ) || defined( ARM_MATH_CM7)
7078     #define LOW_OPTIMIZATION_EXIT \
7079        _Pragma ("pop")
7080   #else
7081     #define LOW_OPTIMIZATION_EXIT
7082   #endif
7083 
7084   /* Enter low optimization region - place directly above function definition */
7085   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7086 
7087   /* Exit low optimization region - place directly after end of function definition */
7088   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7089 
7090 #elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
7091   #define LOW_OPTIMIZATION_ENTER
7092   #define LOW_OPTIMIZATION_EXIT
7093   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7094   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7095 
7096 #elif defined(__GNUC__)
7097   #define LOW_OPTIMIZATION_ENTER __attribute__(( optimize("-O1") ))
7098   #define LOW_OPTIMIZATION_EXIT
7099   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7100   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7101 
7102 #elif defined(__ICCARM__)
7103   /* Enter low optimization region - place directly above function definition */
7104   #if defined( ARM_MATH_CM4 ) || defined( ARM_MATH_CM7)
7105     #define LOW_OPTIMIZATION_ENTER \
7106        _Pragma ("optimize=low")
7107   #else
7108     #define LOW_OPTIMIZATION_ENTER
7109   #endif
7110 
7111   /* Exit low optimization region - place directly after end of function definition */
7112   #define LOW_OPTIMIZATION_EXIT
7113 
7114   /* Enter low optimization region - place directly above function definition */
7115   #if defined( ARM_MATH_CM4 ) || defined( ARM_MATH_CM7)
7116     #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \
7117        _Pragma ("optimize=low")
7118   #else
7119     #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7120   #endif
7121 
7122   /* Exit low optimization region - place directly after end of function definition */
7123   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7124 
7125 #elif defined(__CSMC__)
7126   #define LOW_OPTIMIZATION_ENTER
7127   #define LOW_OPTIMIZATION_EXIT
7128   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7129   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7130 
7131 #elif defined(__TASKING__)
7132   #define LOW_OPTIMIZATION_ENTER
7133   #define LOW_OPTIMIZATION_EXIT
7134   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7135   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7136 
7137 #endif
7138 
7139 
7140 #ifdef   __cplusplus
7141 }
7142 #endif
7143 
7144 
7145 #if defined ( __GNUC__ )
7146 #pragma GCC diagnostic pop
7147 #endif
7148 
7149 #endif /* _ARM_MATH_H */
7150 
7151 /**
7152  *
7153  * End of file.
7154  */
7155