1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
3 *
4 * $Date:        19. March 2015
5 * $Revision: 	V.1.4.5
6 *
7 * Project: 	    CMSIS DSP Library
8 * Title:	    arm_math.h
9 *
10 * Description:	Public header file for CMSIS DSP Library
11 *
12 * Target Processor: Cortex-M7/Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *   - Redistributions of source code must retain the above copyright
18 *     notice, this list of conditions and the following disclaimer.
19 *   - Redistributions in binary form must reproduce the above copyright
20 *     notice, this list of conditions and the following disclaimer in
21 *     the documentation and/or other materials provided with the
22 *     distribution.
23 *   - Neither the name of ARM LIMITED nor the names of its contributors
24 *     may be used to endorse or promote products derived from this
25 *     software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39  * -------------------------------------------------------------------- */
40 
41 /**
42    \mainpage CMSIS DSP Software Library
43    *
44    * Introduction
45    * ------------
46    *
47    * This user manual describes the CMSIS DSP software library,
48    * a suite of common signal processing functions for use on Cortex-M processor based devices.
49    *
50    * The library is divided into a number of functions each covering a specific category:
51    * - Basic math functions
52    * - Fast math functions
53    * - Complex math functions
54    * - Filters
55    * - Matrix functions
56    * - Transforms
57    * - Motor control functions
58    * - Statistical functions
59    * - Support functions
60    * - Interpolation functions
61    *
62    * The library has separate functions for operating on 8-bit integers, 16-bit integers,
63    * 32-bit integer and 32-bit floating-point values.
64    *
65    * Using the Library
66    * ------------
67    *
68    * The library installer contains prebuilt versions of the libraries in the <code>Lib</code> folder.
69    * - arm_cortexM7lfdp_math.lib (Little endian and Double Precision Floating Point Unit on Cortex-M7)
70    * - arm_cortexM7bfdp_math.lib (Big endian and Double Precision Floating Point Unit on Cortex-M7)
71    * - arm_cortexM7lfsp_math.lib (Little endian and Single Precision Floating Point Unit on Cortex-M7)
72    * - arm_cortexM7bfsp_math.lib (Big endian and Single Precision Floating Point Unit on Cortex-M7)
73    * - arm_cortexM7l_math.lib (Little endian on Cortex-M7)
74    * - arm_cortexM7b_math.lib (Big endian on Cortex-M7)
75    * - arm_cortexM4lf_math.lib (Little endian and Floating Point Unit on Cortex-M4)
76    * - arm_cortexM4bf_math.lib (Big endian and Floating Point Unit on Cortex-M4)
77    * - arm_cortexM4l_math.lib (Little endian on Cortex-M4)
78    * - arm_cortexM4b_math.lib (Big endian on Cortex-M4)
79    * - arm_cortexM3l_math.lib (Little endian on Cortex-M3)
80    * - arm_cortexM3b_math.lib (Big endian on Cortex-M3)
81    * - arm_cortexM0l_math.lib (Little endian on Cortex-M0 / CortexM0+)
82    * - arm_cortexM0b_math.lib (Big endian on Cortex-M0 / CortexM0+)
83    *
84    * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
85    * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single
86    * public header file <code> arm_math.h</code> for Cortex-M7/M4/M3/M0/M0+ with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
87    * Define the appropriate pre processor MACRO ARM_MATH_CM7 or ARM_MATH_CM4 or  ARM_MATH_CM3 or
88    * ARM_MATH_CM0 or ARM_MATH_CM0PLUS depending on the target processor in the application.
89    *
90    * Examples
91    * --------
92    *
93    * The library ships with a number of examples which demonstrate how to use the library functions.
94    *
95    * Toolchain Support
96    * ------------
97    *
98    * The library has been developed and tested with MDK-ARM version 5.14.0.0
99    * The library is being tested in GCC and IAR toolchains and updates on this activity will be made available shortly.
100    *
101    * Building the Library
102    * ------------
103    *
104    * The library installer contains a project file to re build libraries on MDK-ARM Tool chain in the <code>CMSIS\\DSP_Lib\\Source\\ARM</code> folder.
105    * - arm_cortexM_math.uvprojx
106    *
107    *
108    * The libraries can be built by opening the arm_cortexM_math.uvprojx project in MDK-ARM, selecting a specific target, and defining the optional pre processor MACROs detailed above.
109    *
110    * Pre-processor Macros
111    * ------------
112    *
113    * Each library project have differant pre-processor macros.
114    *
115    * - UNALIGNED_SUPPORT_DISABLE:
116    *
117    * Define macro UNALIGNED_SUPPORT_DISABLE, If the silicon does not support unaligned memory access
118    *
119    * - ARM_MATH_BIG_ENDIAN:
120    *
121    * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
122    *
123    * - ARM_MATH_MATRIX_CHECK:
124    *
125    * Define macro ARM_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices
126    *
127    * - ARM_MATH_ROUNDING:
128    *
129    * Define macro ARM_MATH_ROUNDING for rounding on support functions
130    *
131    * - ARM_MATH_CMx:
132    *
133    * Define macro ARM_MATH_CM4 for building the library on Cortex-M4 target, ARM_MATH_CM3 for building library on Cortex-M3 target
134    * and ARM_MATH_CM0 for building library on Cortex-M0 target, ARM_MATH_CM0PLUS for building library on Cortex-M0+ target, and
135    * ARM_MATH_CM7 for building the library on cortex-M7.
136    *
137    * - __FPU_PRESENT:
138    *
139    * Initialize macro __FPU_PRESENT = 1 when building on FPU supported Targets. Enable this macro for M4bf and M4lf libraries
140    *
141    * <hr>
142    * CMSIS-DSP in ARM::CMSIS Pack
143    * -----------------------------
144    *
145    * The following files relevant to CMSIS-DSP are present in the <b>ARM::CMSIS</b> Pack directories:
146    * |File/Folder                   |Content                                                                 |
147    * |------------------------------|------------------------------------------------------------------------|
148    * |\b CMSIS\\Documentation\\DSP  | This documentation                                                     |
149    * |\b CMSIS\\DSP_Lib             | Software license agreement (license.txt)                               |
150    * |\b CMSIS\\DSP_Lib\\Examples   | Example projects demonstrating the usage of the library functions      |
151    * |\b CMSIS\\DSP_Lib\\Source     | Source files for rebuilding the library                                |
152    *
153    * <hr>
154    * Revision History of CMSIS-DSP
155    * ------------
156    * Please refer to \ref ChangeLog_pg.
157    *
158    * Copyright Notice
159    * ------------
160    *
161    * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
162    */
163 
164 
165 /**
166  * @defgroup groupMath Basic Math Functions
167  */
168 
169 /**
170  * @defgroup groupFastMath Fast Math Functions
171  * This set of functions provides a fast approximation to sine, cosine, and square root.
172  * As compared to most of the other functions in the CMSIS math library, the fast math functions
173  * operate on individual values and not arrays.
174  * There are separate functions for Q15, Q31, and floating-point data.
175  *
176  */
177 
178 /**
179  * @defgroup groupCmplxMath Complex Math Functions
180  * This set of functions operates on complex data vectors.
181  * The data in the complex arrays is stored in an interleaved fashion
182  * (real, imag, real, imag, ...).
183  * In the API functions, the number of samples in a complex array refers
184  * to the number of complex values; the array contains twice this number of
185  * real values.
186  */
187 
188 /**
189  * @defgroup groupFilters Filtering Functions
190  */
191 
192 /**
193  * @defgroup groupMatrix Matrix Functions
194  *
195  * This set of functions provides basic matrix math operations.
196  * The functions operate on matrix data structures.  For example,
197  * the type
198  * definition for the floating-point matrix structure is shown
199  * below:
200  * <pre>
201  *     typedef struct
202  *     {
203  *       uint16_t numRows;     // number of rows of the matrix.
204  *       uint16_t numCols;     // number of columns of the matrix.
205  *       float32_t *pData;     // points to the data of the matrix.
206  *     } arm_matrix_instance_f32;
207  * </pre>
208  * There are similar definitions for Q15 and Q31 data types.
209  *
210  * The structure specifies the size of the matrix and then points to
211  * an array of data.  The array is of size <code>numRows X numCols</code>
212  * and the values are arranged in row order.  That is, the
213  * matrix element (i, j) is stored at:
214  * <pre>
215  *     pData[i*numCols + j]
216  * </pre>
217  *
218  * \par Init Functions
219  * There is an associated initialization function for each type of matrix
220  * data structure.
221  * The initialization function sets the values of the internal structure fields.
222  * Refer to the function <code>arm_mat_init_f32()</code>, <code>arm_mat_init_q31()</code>
223  * and <code>arm_mat_init_q15()</code> for floating-point, Q31 and Q15 types,  respectively.
224  *
225  * \par
226  * Use of the initialization function is optional. However, if initialization function is used
227  * then the instance structure cannot be placed into a const data section.
228  * To place the instance structure in a const data
229  * section, manually initialize the data structure.  For example:
230  * <pre>
231  * <code>arm_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
232  * <code>arm_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
233  * <code>arm_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
234  * </pre>
235  * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
236  * specifies the number of columns, and <code>pData</code> points to the
237  * data array.
238  *
239  * \par Size Checking
240  * By default all of the matrix functions perform size checking on the input and
241  * output matrices.  For example, the matrix addition function verifies that the
242  * two input matrices and the output matrix all have the same number of rows and
243  * columns.  If the size check fails the functions return:
244  * <pre>
245  *     ARM_MATH_SIZE_MISMATCH
246  * </pre>
247  * Otherwise the functions return
248  * <pre>
249  *     ARM_MATH_SUCCESS
250  * </pre>
251  * There is some overhead associated with this matrix size checking.
252  * The matrix size checking is enabled via the \#define
253  * <pre>
254  *     ARM_MATH_MATRIX_CHECK
255  * </pre>
256  * within the library project settings.  By default this macro is defined
257  * and size checking is enabled.  By changing the project settings and
258  * undefining this macro size checking is eliminated and the functions
259  * run a bit faster.  With size checking disabled the functions always
260  * return <code>ARM_MATH_SUCCESS</code>.
261  */
262 
263 /**
264  * @defgroup groupTransforms Transform Functions
265  */
266 
267 /**
268  * @defgroup groupController Controller Functions
269  */
270 
271 /**
272  * @defgroup groupStats Statistics Functions
273  */
274 /**
275  * @defgroup groupSupport Support Functions
276  */
277 
278 /**
279  * @defgroup groupInterpolation Interpolation Functions
280  * These functions perform 1- and 2-dimensional interpolation of data.
281  * Linear interpolation is used for 1-dimensional data and
282  * bilinear interpolation is used for 2-dimensional data.
283  */
284 
285 /**
286  * @defgroup groupExamples Examples
287  */
288 #ifndef _ARM_MATH_H
289 #define _ARM_MATH_H
290 
291 #define __CMSIS_GENERIC         /* disable NVIC and Systick functions */
292 
293 #if defined(ARM_MATH_CM7)
294   #include "core_cm7.h"
295 #elif defined (ARM_MATH_CM4)
296   #include "core_cm4.h"
297 #elif defined (ARM_MATH_CM3)
298   #include "core_cm3.h"
299 #elif defined (ARM_MATH_CM0)
300   #include "core_cm0.h"
301 #define ARM_MATH_CM0_FAMILY
302   #elif defined (ARM_MATH_CM0PLUS)
303 #include "core_cm0plus.h"
304   #define ARM_MATH_CM0_FAMILY
305 #else
306   #error "Define according the used Cortex core ARM_MATH_CM7, ARM_MATH_CM4, ARM_MATH_CM3, ARM_MATH_CM0PLUS or ARM_MATH_CM0"
307 #endif
308 
309 #undef  __CMSIS_GENERIC         /* enable NVIC and Systick functions */
310 #include "string.h"
311 #include "math.h"
312 #ifdef	__cplusplus
313 extern "C"
314 {
315 #endif
316 
317 
318   /**
319    * @brief Macros required for reciprocal calculation in Normalized LMS
320    */
321 
322 #define DELTA_Q31 			(0x100)
323 #define DELTA_Q15 			0x5
324 #define INDEX_MASK 			0x0000003F
325 #ifndef PI
326 #define PI					3.14159265358979f
327 #endif
328 
329   /**
330    * @brief Macros required for SINE and COSINE Fast math approximations
331    */
332 
333 #define FAST_MATH_TABLE_SIZE  512
334 #define FAST_MATH_Q31_SHIFT   (32 - 10)
335 #define FAST_MATH_Q15_SHIFT   (16 - 10)
336 #define CONTROLLER_Q31_SHIFT  (32 - 9)
337 #define TABLE_SIZE  256
338 #define TABLE_SPACING_Q31	   0x400000
339 #define TABLE_SPACING_Q15	   0x80
340 
341   /**
342    * @brief Macros required for SINE and COSINE Controller functions
343    */
344   /* 1.31(q31) Fixed value of 2/360 */
345   /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
346 #define INPUT_SPACING			0xB60B61
347 
348   /**
349    * @brief Macro for Unaligned Support
350    */
351 #ifndef UNALIGNED_SUPPORT_DISABLE
352     #define ALIGN4
353 #else
354   #if defined  (__GNUC__)
355     #define ALIGN4 __attribute__((aligned(4)))
356   #else
357     #define ALIGN4 __align(4)
358   #endif
359 #endif	/*	#ifndef UNALIGNED_SUPPORT_DISABLE	*/
360 
361   /**
362    * @brief Error status returned by some functions in the library.
363    */
364 
365   typedef enum
366   {
367     ARM_MATH_SUCCESS = 0,                /**< No error */
368     ARM_MATH_ARGUMENT_ERROR = -1,        /**< One or more arguments are incorrect */
369     ARM_MATH_LENGTH_ERROR = -2,          /**< Length of data buffer is incorrect */
370     ARM_MATH_SIZE_MISMATCH = -3,         /**< Size of matrices is not compatible with the operation. */
371     ARM_MATH_NANINF = -4,                /**< Not-a-number (NaN) or infinity is generated */
372     ARM_MATH_SINGULAR = -5,              /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
373     ARM_MATH_TEST_FAILURE = -6           /**< Test Failed  */
374   } arm_status;
375 
376   /**
377    * @brief 8-bit fractional data type in 1.7 format.
378    */
379   typedef int8_t q7_t;
380 
381   /**
382    * @brief 16-bit fractional data type in 1.15 format.
383    */
384   typedef int16_t q15_t;
385 
386   /**
387    * @brief 32-bit fractional data type in 1.31 format.
388    */
389   typedef int32_t q31_t;
390 
391   /**
392    * @brief 64-bit fractional data type in 1.63 format.
393    */
394   typedef int64_t q63_t;
395 
396   /**
397    * @brief 32-bit floating-point type definition.
398    */
399   typedef float float32_t;
400 
401   /**
402    * @brief 64-bit floating-point type definition.
403    */
404   typedef double float64_t;
405 
406   /**
407    * @brief definition to read/write two 16 bit values.
408    */
409 #if defined __CC_ARM
410   #define __SIMD32_TYPE int32_t __packed
411   #define CMSIS_UNUSED __attribute__((unused))
412 #elif defined __ICCARM__
413   #define __SIMD32_TYPE int32_t __packed
414   #define CMSIS_UNUSED
415 #elif defined __GNUC__
416   #define __SIMD32_TYPE int32_t
417   #define CMSIS_UNUSED __attribute__((unused))
418 #elif defined __CSMC__			/* Cosmic */
419   #define __SIMD32_TYPE int32_t
420   #define CMSIS_UNUSED
421 #elif defined __TASKING__
422   #define __SIMD32_TYPE __unaligned int32_t
423   #define CMSIS_UNUSED
424 #else
425   #error Unknown compiler
426 #endif
427 
428 #define __SIMD32(addr)  (*(__SIMD32_TYPE **) & (addr))
429 #define __SIMD32_CONST(addr)  ((__SIMD32_TYPE *)(addr))
430 
431 #define _SIMD32_OFFSET(addr)  (*(__SIMD32_TYPE *)  (addr))
432 
433 #define __SIMD64(addr)  (*(int64_t **) & (addr))
434 
435 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY)
436   /**
437    * @brief definition to pack two 16 bit values.
438    */
439 #define __PKHBT(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0x0000FFFF) | \
440                                          (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
441 #define __PKHTB(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0xFFFF0000) | \
442                                          (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
443 
444 #endif
445 
446 
447    /**
448    * @brief definition to pack four 8 bit values.
449    */
450 #ifndef ARM_MATH_BIG_ENDIAN
451 
452 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) |	\
453                                 (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) |	\
454 							    (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) |	\
455 							    (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
456 #else
457 
458 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) |	\
459                                 (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) |	\
460 							    (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) |	\
461 							    (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
462 
463 #endif
464 
465 
466   /**
467    * @brief Clips Q63 to Q31 values.
468    */
clip_q63_to_q31(q63_t x)469   static __INLINE q31_t clip_q63_to_q31(
470   q63_t x)
471   {
472     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
473       ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
474   }
475 
476   /**
477    * @brief Clips Q63 to Q15 values.
478    */
clip_q63_to_q15(q63_t x)479   static __INLINE q15_t clip_q63_to_q15(
480   q63_t x)
481   {
482     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
483       ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
484   }
485 
486   /**
487    * @brief Clips Q31 to Q7 values.
488    */
clip_q31_to_q7(q31_t x)489   static __INLINE q7_t clip_q31_to_q7(
490   q31_t x)
491   {
492     return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
493       ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
494   }
495 
496   /**
497    * @brief Clips Q31 to Q15 values.
498    */
clip_q31_to_q15(q31_t x)499   static __INLINE q15_t clip_q31_to_q15(
500   q31_t x)
501   {
502     return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
503       ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
504   }
505 
506   /**
507    * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
508    */
509 
mult32x64(q63_t x,q31_t y)510   static __INLINE q63_t mult32x64(
511   q63_t x,
512   q31_t y)
513   {
514     return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
515             (((q63_t) (x >> 32) * y)));
516   }
517 
518 
519 //#if defined (ARM_MATH_CM0_FAMILY) && defined ( __CC_ARM   )
520 //#define __CLZ __clz
521 //#endif
522 
523 //note: function can be removed when all toolchain support __CLZ for Cortex-M0
524 #if defined (ARM_MATH_CM0_FAMILY) && ((defined (__ICCARM__))  )
525 
526   static __INLINE uint32_t __CLZ(
527   q31_t data);
528 
529 
__CLZ(q31_t data)530   static __INLINE uint32_t __CLZ(
531   q31_t data)
532   {
533     uint32_t count = 0;
534     uint32_t mask = 0x80000000;
535 
536     while((data & mask) == 0)
537     {
538       count += 1u;
539       mask = mask >> 1u;
540     }
541 
542     return (count);
543 
544   }
545 
546 #endif
547 
548   /**
549    * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type.
550    */
551 
arm_recip_q31(q31_t in,q31_t * dst,q31_t * pRecipTable)552   static __INLINE uint32_t arm_recip_q31(
553   q31_t in,
554   q31_t * dst,
555   q31_t * pRecipTable)
556   {
557 
558     uint32_t out, tempVal;
559     uint32_t index, i;
560     uint32_t signBits;
561 
562     if(in > 0)
563     {
564       signBits = __CLZ(in) - 1;
565     }
566     else
567     {
568       signBits = __CLZ(-in) - 1;
569     }
570 
571     /* Convert input sample to 1.31 format */
572     in = in << signBits;
573 
574     /* calculation of index for initial approximated Val */
575     index = (uint32_t) (in >> 24u);
576     index = (index & INDEX_MASK);
577 
578     /* 1.31 with exp 1 */
579     out = pRecipTable[index];
580 
581     /* calculation of reciprocal value */
582     /* running approximation for two iterations */
583     for (i = 0u; i < 2u; i++)
584     {
585       tempVal = (q31_t) (((q63_t) in * out) >> 31u);
586       tempVal = 0x7FFFFFFF - tempVal;
587       /*      1.31 with exp 1 */
588       //out = (q31_t) (((q63_t) out * tempVal) >> 30u);
589       out = (q31_t) clip_q63_to_q31(((q63_t) out * tempVal) >> 30u);
590     }
591 
592     /* write output */
593     *dst = out;
594 
595     /* return num of signbits of out = 1/in value */
596     return (signBits + 1u);
597 
598   }
599 
600   /**
601    * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type.
602    */
arm_recip_q15(q15_t in,q15_t * dst,q15_t * pRecipTable)603   static __INLINE uint32_t arm_recip_q15(
604   q15_t in,
605   q15_t * dst,
606   q15_t * pRecipTable)
607   {
608 
609     uint32_t out = 0, tempVal = 0;
610     uint32_t index = 0, i = 0;
611     uint32_t signBits = 0;
612 
613     if(in > 0)
614     {
615       signBits = __CLZ(in) - 17;
616     }
617     else
618     {
619       signBits = __CLZ(-in) - 17;
620     }
621 
622     /* Convert input sample to 1.15 format */
623     in = in << signBits;
624 
625     /* calculation of index for initial approximated Val */
626     index = in >> 8;
627     index = (index & INDEX_MASK);
628 
629     /*      1.15 with exp 1  */
630     out = pRecipTable[index];
631 
632     /* calculation of reciprocal value */
633     /* running approximation for two iterations */
634     for (i = 0; i < 2; i++)
635     {
636       tempVal = (q15_t) (((q31_t) in * out) >> 15);
637       tempVal = 0x7FFF - tempVal;
638       /*      1.15 with exp 1 */
639       out = (q15_t) (((q31_t) out * tempVal) >> 14);
640     }
641 
642     /* write output */
643     *dst = out;
644 
645     /* return num of signbits of out = 1/in value */
646     return (signBits + 1);
647 
648   }
649 
650 
651   /*
652    * @brief C custom defined intrinisic function for only M0 processors
653    */
654 #if defined(ARM_MATH_CM0_FAMILY)
655 
__SSAT(q31_t x,uint32_t y)656   static __INLINE q31_t __SSAT(
657   q31_t x,
658   uint32_t y)
659   {
660     int32_t posMax, negMin;
661     uint32_t i;
662 
663     posMax = 1;
664     for (i = 0; i < (y - 1); i++)
665     {
666       posMax = posMax * 2;
667     }
668 
669     if(x > 0)
670     {
671       posMax = (posMax - 1);
672 
673       if(x > posMax)
674       {
675         x = posMax;
676       }
677     }
678     else
679     {
680       negMin = -posMax;
681 
682       if(x < negMin)
683       {
684         x = negMin;
685       }
686     }
687     return (x);
688 
689 
690   }
691 
692 #endif /* end of ARM_MATH_CM0_FAMILY */
693 
694 
695 
696   /*
697    * @brief C custom defined intrinsic function for M3 and M0 processors
698    */
699 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY)
700 
701   /*
702    * @brief C custom defined QADD8 for M3 and M0 processors
703    */
__QADD8(q31_t x,q31_t y)704   static __INLINE q31_t __QADD8(
705   q31_t x,
706   q31_t y)
707   {
708 
709     q31_t sum;
710     q7_t r, s, t, u;
711 
712     r = (q7_t) x;
713     s = (q7_t) y;
714 
715     r = __SSAT((q31_t) (r + s), 8);
716     s = __SSAT(((q31_t) (((x << 16) >> 24) + ((y << 16) >> 24))), 8);
717     t = __SSAT(((q31_t) (((x << 8) >> 24) + ((y << 8) >> 24))), 8);
718     u = __SSAT(((q31_t) ((x >> 24) + (y >> 24))), 8);
719 
720     sum =
721       (((q31_t) u << 24) & 0xFF000000) | (((q31_t) t << 16) & 0x00FF0000) |
722       (((q31_t) s << 8) & 0x0000FF00) | (r & 0x000000FF);
723 
724     return sum;
725 
726   }
727 
728   /*
729    * @brief C custom defined QSUB8 for M3 and M0 processors
730    */
__QSUB8(q31_t x,q31_t y)731   static __INLINE q31_t __QSUB8(
732   q31_t x,
733   q31_t y)
734   {
735 
736     q31_t sum;
737     q31_t r, s, t, u;
738 
739     r = (q7_t) x;
740     s = (q7_t) y;
741 
742     r = __SSAT((r - s), 8);
743     s = __SSAT(((q31_t) (((x << 16) >> 24) - ((y << 16) >> 24))), 8) << 8;
744     t = __SSAT(((q31_t) (((x << 8) >> 24) - ((y << 8) >> 24))), 8) << 16;
745     u = __SSAT(((q31_t) ((x >> 24) - (y >> 24))), 8) << 24;
746 
747     sum =
748       (u & 0xFF000000) | (t & 0x00FF0000) | (s & 0x0000FF00) | (r &
749                                                                 0x000000FF);
750 
751     return sum;
752   }
753 
754   /*
755    * @brief C custom defined QADD16 for M3 and M0 processors
756    */
757 
758   /*
759    * @brief C custom defined QADD16 for M3 and M0 processors
760    */
__QADD16(q31_t x,q31_t y)761   static __INLINE q31_t __QADD16(
762   q31_t x,
763   q31_t y)
764   {
765 
766     q31_t sum;
767     q31_t r, s;
768 
769     r = (q15_t) x;
770     s = (q15_t) y;
771 
772     r = __SSAT(r + s, 16);
773     s = __SSAT(((q31_t) ((x >> 16) + (y >> 16))), 16) << 16;
774 
775     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
776 
777     return sum;
778 
779   }
780 
781   /*
782    * @brief C custom defined SHADD16 for M3 and M0 processors
783    */
__SHADD16(q31_t x,q31_t y)784   static __INLINE q31_t __SHADD16(
785   q31_t x,
786   q31_t y)
787   {
788 
789     q31_t sum;
790     q31_t r, s;
791 
792     r = (q15_t) x;
793     s = (q15_t) y;
794 
795     r = ((r >> 1) + (s >> 1));
796     s = ((q31_t) ((x >> 17) + (y >> 17))) << 16;
797 
798     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
799 
800     return sum;
801 
802   }
803 
804   /*
805    * @brief C custom defined QSUB16 for M3 and M0 processors
806    */
__QSUB16(q31_t x,q31_t y)807   static __INLINE q31_t __QSUB16(
808   q31_t x,
809   q31_t y)
810   {
811 
812     q31_t sum;
813     q31_t r, s;
814 
815     r = (q15_t) x;
816     s = (q15_t) y;
817 
818     r = __SSAT(r - s, 16);
819     s = __SSAT(((q31_t) ((x >> 16) - (y >> 16))), 16) << 16;
820 
821     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
822 
823     return sum;
824   }
825 
826   /*
827    * @brief C custom defined SHSUB16 for M3 and M0 processors
828    */
__SHSUB16(q31_t x,q31_t y)829   static __INLINE q31_t __SHSUB16(
830   q31_t x,
831   q31_t y)
832   {
833 
834     q31_t diff;
835     q31_t r, s;
836 
837     r = (q15_t) x;
838     s = (q15_t) y;
839 
840     r = ((r >> 1) - (s >> 1));
841     s = (((x >> 17) - (y >> 17)) << 16);
842 
843     diff = (s & 0xFFFF0000) | (r & 0x0000FFFF);
844 
845     return diff;
846   }
847 
848   /*
849    * @brief C custom defined QASX for M3 and M0 processors
850    */
__QASX(q31_t x,q31_t y)851   static __INLINE q31_t __QASX(
852   q31_t x,
853   q31_t y)
854   {
855 
856     q31_t sum = 0;
857 
858     sum =
859       ((sum +
860         clip_q31_to_q15((q31_t) ((q15_t) (x >> 16) + (q15_t) y))) << 16) +
861       clip_q31_to_q15((q31_t) ((q15_t) x - (q15_t) (y >> 16)));
862 
863     return sum;
864   }
865 
866   /*
867    * @brief C custom defined SHASX for M3 and M0 processors
868    */
__SHASX(q31_t x,q31_t y)869   static __INLINE q31_t __SHASX(
870   q31_t x,
871   q31_t y)
872   {
873 
874     q31_t sum;
875     q31_t r, s;
876 
877     r = (q15_t) x;
878     s = (q15_t) y;
879 
880     r = ((r >> 1) - (y >> 17));
881     s = (((x >> 17) + (s >> 1)) << 16);
882 
883     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
884 
885     return sum;
886   }
887 
888 
889   /*
890    * @brief C custom defined QSAX for M3 and M0 processors
891    */
__QSAX(q31_t x,q31_t y)892   static __INLINE q31_t __QSAX(
893   q31_t x,
894   q31_t y)
895   {
896 
897     q31_t sum = 0;
898 
899     sum =
900       ((sum +
901         clip_q31_to_q15((q31_t) ((q15_t) (x >> 16) - (q15_t) y))) << 16) +
902       clip_q31_to_q15((q31_t) ((q15_t) x + (q15_t) (y >> 16)));
903 
904     return sum;
905   }
906 
907   /*
908    * @brief C custom defined SHSAX for M3 and M0 processors
909    */
__SHSAX(q31_t x,q31_t y)910   static __INLINE q31_t __SHSAX(
911   q31_t x,
912   q31_t y)
913   {
914 
915     q31_t sum;
916     q31_t r, s;
917 
918     r = (q15_t) x;
919     s = (q15_t) y;
920 
921     r = ((r >> 1) + (y >> 17));
922     s = (((x >> 17) - (s >> 1)) << 16);
923 
924     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
925 
926     return sum;
927   }
928 
929   /*
930    * @brief C custom defined SMUSDX for M3 and M0 processors
931    */
__SMUSDX(q31_t x,q31_t y)932   static __INLINE q31_t __SMUSDX(
933   q31_t x,
934   q31_t y)
935   {
936 
937     return ((q31_t) (((q15_t) x * (q15_t) (y >> 16)) -
938                      ((q15_t) (x >> 16) * (q15_t) y)));
939   }
940 
941   /*
942    * @brief C custom defined SMUADX for M3 and M0 processors
943    */
__SMUADX(q31_t x,q31_t y)944   static __INLINE q31_t __SMUADX(
945   q31_t x,
946   q31_t y)
947   {
948 
949     return ((q31_t) (((q15_t) x * (q15_t) (y >> 16)) +
950                      ((q15_t) (x >> 16) * (q15_t) y)));
951   }
952 
953   /*
954    * @brief C custom defined QADD for M3 and M0 processors
955    */
__QADD(q31_t x,q31_t y)956   static __INLINE q31_t __QADD(
957   q31_t x,
958   q31_t y)
959   {
960     return clip_q63_to_q31((q63_t) x + y);
961   }
962 
963   /*
964    * @brief C custom defined QSUB for M3 and M0 processors
965    */
__QSUB(q31_t x,q31_t y)966   static __INLINE q31_t __QSUB(
967   q31_t x,
968   q31_t y)
969   {
970     return clip_q63_to_q31((q63_t) x - y);
971   }
972 
973   /*
974    * @brief C custom defined SMLAD for M3 and M0 processors
975    */
__SMLAD(q31_t x,q31_t y,q31_t sum)976   static __INLINE q31_t __SMLAD(
977   q31_t x,
978   q31_t y,
979   q31_t sum)
980   {
981 
982     return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) +
983             ((q15_t) x * (q15_t) y));
984   }
985 
986   /*
987    * @brief C custom defined SMLADX for M3 and M0 processors
988    */
__SMLADX(q31_t x,q31_t y,q31_t sum)989   static __INLINE q31_t __SMLADX(
990   q31_t x,
991   q31_t y,
992   q31_t sum)
993   {
994 
995     return (sum + ((q15_t) (x >> 16) * (q15_t) (y)) +
996             ((q15_t) x * (q15_t) (y >> 16)));
997   }
998 
999   /*
1000    * @brief C custom defined SMLSDX for M3 and M0 processors
1001    */
__SMLSDX(q31_t x,q31_t y,q31_t sum)1002   static __INLINE q31_t __SMLSDX(
1003   q31_t x,
1004   q31_t y,
1005   q31_t sum)
1006   {
1007 
1008     return (sum - ((q15_t) (x >> 16) * (q15_t) (y)) +
1009             ((q15_t) x * (q15_t) (y >> 16)));
1010   }
1011 
1012   /*
1013    * @brief C custom defined SMLALD for M3 and M0 processors
1014    */
__SMLALD(q31_t x,q31_t y,q63_t sum)1015   static __INLINE q63_t __SMLALD(
1016   q31_t x,
1017   q31_t y,
1018   q63_t sum)
1019   {
1020 
1021     return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) +
1022             ((q15_t) x * (q15_t) y));
1023   }
1024 
1025   /*
1026    * @brief C custom defined SMLALDX for M3 and M0 processors
1027    */
__SMLALDX(q31_t x,q31_t y,q63_t sum)1028   static __INLINE q63_t __SMLALDX(
1029   q31_t x,
1030   q31_t y,
1031   q63_t sum)
1032   {
1033 
1034     return (sum + ((q15_t) (x >> 16) * (q15_t) y)) +
1035       ((q15_t) x * (q15_t) (y >> 16));
1036   }
1037 
1038   /*
1039    * @brief C custom defined SMUAD for M3 and M0 processors
1040    */
__SMUAD(q31_t x,q31_t y)1041   static __INLINE q31_t __SMUAD(
1042   q31_t x,
1043   q31_t y)
1044   {
1045 
1046     return (((x >> 16) * (y >> 16)) +
1047             (((x << 16) >> 16) * ((y << 16) >> 16)));
1048   }
1049 
1050   /*
1051    * @brief C custom defined SMUSD for M3 and M0 processors
1052    */
__SMUSD(q31_t x,q31_t y)1053   static __INLINE q31_t __SMUSD(
1054   q31_t x,
1055   q31_t y)
1056   {
1057 
1058     return (-((x >> 16) * (y >> 16)) +
1059             (((x << 16) >> 16) * ((y << 16) >> 16)));
1060   }
1061 
1062 
1063   /*
1064    * @brief C custom defined SXTB16 for M3 and M0 processors
1065    */
__SXTB16(q31_t x)1066   static __INLINE q31_t __SXTB16(
1067   q31_t x)
1068   {
1069 
1070     return ((((x << 24) >> 24) & 0x0000FFFF) |
1071             (((x << 8) >> 8) & 0xFFFF0000));
1072   }
1073 
1074 
1075 #endif /* defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY) */
1076 
1077 
1078   /**
1079    * @brief Instance structure for the Q7 FIR filter.
1080    */
1081   typedef struct
1082   {
1083     uint16_t numTaps;        /**< number of filter coefficients in the filter. */
1084     q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1085     q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1086   } arm_fir_instance_q7;
1087 
1088   /**
1089    * @brief Instance structure for the Q15 FIR filter.
1090    */
1091   typedef struct
1092   {
1093     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1094     q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1095     q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1096   } arm_fir_instance_q15;
1097 
1098   /**
1099    * @brief Instance structure for the Q31 FIR filter.
1100    */
1101   typedef struct
1102   {
1103     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1104     q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1105     q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
1106   } arm_fir_instance_q31;
1107 
1108   /**
1109    * @brief Instance structure for the floating-point FIR filter.
1110    */
1111   typedef struct
1112   {
1113     uint16_t numTaps;     /**< number of filter coefficients in the filter. */
1114     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1115     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
1116   } arm_fir_instance_f32;
1117 
1118 
1119   /**
1120    * @brief Processing function for the Q7 FIR filter.
1121    * @param[in] *S points to an instance of the Q7 FIR filter structure.
1122    * @param[in] *pSrc points to the block of input data.
1123    * @param[out] *pDst points to the block of output data.
1124    * @param[in] blockSize number of samples to process.
1125    * @return none.
1126    */
1127   void arm_fir_q7(
1128   const arm_fir_instance_q7 * S,
1129   q7_t * pSrc,
1130   q7_t * pDst,
1131   uint32_t blockSize);
1132 
1133 
1134   /**
1135    * @brief  Initialization function for the Q7 FIR filter.
1136    * @param[in,out] *S points to an instance of the Q7 FIR structure.
1137    * @param[in] numTaps  Number of filter coefficients in the filter.
1138    * @param[in] *pCoeffs points to the filter coefficients.
1139    * @param[in] *pState points to the state buffer.
1140    * @param[in] blockSize number of samples that are processed.
1141    * @return none
1142    */
1143   void arm_fir_init_q7(
1144   arm_fir_instance_q7 * S,
1145   uint16_t numTaps,
1146   q7_t * pCoeffs,
1147   q7_t * pState,
1148   uint32_t blockSize);
1149 
1150 
1151   /**
1152    * @brief Processing function for the Q15 FIR filter.
1153    * @param[in] *S points to an instance of the Q15 FIR structure.
1154    * @param[in] *pSrc points to the block of input data.
1155    * @param[out] *pDst points to the block of output data.
1156    * @param[in] blockSize number of samples to process.
1157    * @return none.
1158    */
1159   void arm_fir_q15(
1160   const arm_fir_instance_q15 * S,
1161   q15_t * pSrc,
1162   q15_t * pDst,
1163   uint32_t blockSize);
1164 
1165   /**
1166    * @brief Processing function for the fast Q15 FIR filter for Cortex-M3 and Cortex-M4.
1167    * @param[in] *S points to an instance of the Q15 FIR filter structure.
1168    * @param[in] *pSrc points to the block of input data.
1169    * @param[out] *pDst points to the block of output data.
1170    * @param[in] blockSize number of samples to process.
1171    * @return none.
1172    */
1173   void arm_fir_fast_q15(
1174   const arm_fir_instance_q15 * S,
1175   q15_t * pSrc,
1176   q15_t * pDst,
1177   uint32_t blockSize);
1178 
1179   /**
1180    * @brief  Initialization function for the Q15 FIR filter.
1181    * @param[in,out] *S points to an instance of the Q15 FIR filter structure.
1182    * @param[in] numTaps  Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
1183    * @param[in] *pCoeffs points to the filter coefficients.
1184    * @param[in] *pState points to the state buffer.
1185    * @param[in] blockSize number of samples that are processed at a time.
1186    * @return The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_ARGUMENT_ERROR if
1187    * <code>numTaps</code> is not a supported value.
1188    */
1189 
1190   arm_status arm_fir_init_q15(
1191   arm_fir_instance_q15 * S,
1192   uint16_t numTaps,
1193   q15_t * pCoeffs,
1194   q15_t * pState,
1195   uint32_t blockSize);
1196 
1197   /**
1198    * @brief Processing function for the Q31 FIR filter.
1199    * @param[in] *S points to an instance of the Q31 FIR filter structure.
1200    * @param[in] *pSrc points to the block of input data.
1201    * @param[out] *pDst points to the block of output data.
1202    * @param[in] blockSize number of samples to process.
1203    * @return none.
1204    */
1205   void arm_fir_q31(
1206   const arm_fir_instance_q31 * S,
1207   q31_t * pSrc,
1208   q31_t * pDst,
1209   uint32_t blockSize);
1210 
1211   /**
1212    * @brief Processing function for the fast Q31 FIR filter for Cortex-M3 and Cortex-M4.
1213    * @param[in] *S points to an instance of the Q31 FIR structure.
1214    * @param[in] *pSrc points to the block of input data.
1215    * @param[out] *pDst points to the block of output data.
1216    * @param[in] blockSize number of samples to process.
1217    * @return none.
1218    */
1219   void arm_fir_fast_q31(
1220   const arm_fir_instance_q31 * S,
1221   q31_t * pSrc,
1222   q31_t * pDst,
1223   uint32_t blockSize);
1224 
1225   /**
1226    * @brief  Initialization function for the Q31 FIR filter.
1227    * @param[in,out] *S points to an instance of the Q31 FIR structure.
1228    * @param[in] 	numTaps  Number of filter coefficients in the filter.
1229    * @param[in] 	*pCoeffs points to the filter coefficients.
1230    * @param[in] 	*pState points to the state buffer.
1231    * @param[in] 	blockSize number of samples that are processed at a time.
1232    * @return 		none.
1233    */
1234   void arm_fir_init_q31(
1235   arm_fir_instance_q31 * S,
1236   uint16_t numTaps,
1237   q31_t * pCoeffs,
1238   q31_t * pState,
1239   uint32_t blockSize);
1240 
1241   /**
1242    * @brief Processing function for the floating-point FIR filter.
1243    * @param[in] *S points to an instance of the floating-point FIR structure.
1244    * @param[in] *pSrc points to the block of input data.
1245    * @param[out] *pDst points to the block of output data.
1246    * @param[in] blockSize number of samples to process.
1247    * @return none.
1248    */
1249   void arm_fir_f32(
1250   const arm_fir_instance_f32 * S,
1251   float32_t * pSrc,
1252   float32_t * pDst,
1253   uint32_t blockSize);
1254 
1255   /**
1256    * @brief  Initialization function for the floating-point FIR filter.
1257    * @param[in,out] *S points to an instance of the floating-point FIR filter structure.
1258    * @param[in] 	numTaps  Number of filter coefficients in the filter.
1259    * @param[in] 	*pCoeffs points to the filter coefficients.
1260    * @param[in] 	*pState points to the state buffer.
1261    * @param[in] 	blockSize number of samples that are processed at a time.
1262    * @return    	none.
1263    */
1264   void arm_fir_init_f32(
1265   arm_fir_instance_f32 * S,
1266   uint16_t numTaps,
1267   float32_t * pCoeffs,
1268   float32_t * pState,
1269   uint32_t blockSize);
1270 
1271 
1272   /**
1273    * @brief Instance structure for the Q15 Biquad cascade filter.
1274    */
1275   typedef struct
1276   {
1277     int8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1278     q15_t *pState;            /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1279     q15_t *pCoeffs;           /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1280     int8_t postShift;         /**< Additional shift, in bits, applied to each output sample. */
1281 
1282   } arm_biquad_casd_df1_inst_q15;
1283 
1284 
1285   /**
1286    * @brief Instance structure for the Q31 Biquad cascade filter.
1287    */
1288   typedef struct
1289   {
1290     uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1291     q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1292     q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1293     uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
1294 
1295   } arm_biquad_casd_df1_inst_q31;
1296 
1297   /**
1298    * @brief Instance structure for the floating-point Biquad cascade filter.
1299    */
1300   typedef struct
1301   {
1302     uint32_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1303     float32_t *pState;          /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1304     float32_t *pCoeffs;         /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1305 
1306 
1307   } arm_biquad_casd_df1_inst_f32;
1308 
1309 
1310 
1311   /**
1312    * @brief Processing function for the Q15 Biquad cascade filter.
1313    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
1314    * @param[in]  *pSrc points to the block of input data.
1315    * @param[out] *pDst points to the block of output data.
1316    * @param[in]  blockSize number of samples to process.
1317    * @return     none.
1318    */
1319 
1320   void arm_biquad_cascade_df1_q15(
1321   const arm_biquad_casd_df1_inst_q15 * S,
1322   q15_t * pSrc,
1323   q15_t * pDst,
1324   uint32_t blockSize);
1325 
1326   /**
1327    * @brief  Initialization function for the Q15 Biquad cascade filter.
1328    * @param[in,out] *S           points to an instance of the Q15 Biquad cascade structure.
1329    * @param[in]     numStages    number of 2nd order stages in the filter.
1330    * @param[in]     *pCoeffs     points to the filter coefficients.
1331    * @param[in]     *pState      points to the state buffer.
1332    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
1333    * @return        none
1334    */
1335 
1336   void arm_biquad_cascade_df1_init_q15(
1337   arm_biquad_casd_df1_inst_q15 * S,
1338   uint8_t numStages,
1339   q15_t * pCoeffs,
1340   q15_t * pState,
1341   int8_t postShift);
1342 
1343 
1344   /**
1345    * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1346    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
1347    * @param[in]  *pSrc points to the block of input data.
1348    * @param[out] *pDst points to the block of output data.
1349    * @param[in]  blockSize number of samples to process.
1350    * @return     none.
1351    */
1352 
1353   void arm_biquad_cascade_df1_fast_q15(
1354   const arm_biquad_casd_df1_inst_q15 * S,
1355   q15_t * pSrc,
1356   q15_t * pDst,
1357   uint32_t blockSize);
1358 
1359 
1360   /**
1361    * @brief Processing function for the Q31 Biquad cascade filter
1362    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
1363    * @param[in]  *pSrc      points to the block of input data.
1364    * @param[out] *pDst      points to the block of output data.
1365    * @param[in]  blockSize  number of samples to process.
1366    * @return     none.
1367    */
1368 
1369   void arm_biquad_cascade_df1_q31(
1370   const arm_biquad_casd_df1_inst_q31 * S,
1371   q31_t * pSrc,
1372   q31_t * pDst,
1373   uint32_t blockSize);
1374 
1375   /**
1376    * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1377    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
1378    * @param[in]  *pSrc      points to the block of input data.
1379    * @param[out] *pDst      points to the block of output data.
1380    * @param[in]  blockSize  number of samples to process.
1381    * @return     none.
1382    */
1383 
1384   void arm_biquad_cascade_df1_fast_q31(
1385   const arm_biquad_casd_df1_inst_q31 * S,
1386   q31_t * pSrc,
1387   q31_t * pDst,
1388   uint32_t blockSize);
1389 
1390   /**
1391    * @brief  Initialization function for the Q31 Biquad cascade filter.
1392    * @param[in,out] *S           points to an instance of the Q31 Biquad cascade structure.
1393    * @param[in]     numStages      number of 2nd order stages in the filter.
1394    * @param[in]     *pCoeffs     points to the filter coefficients.
1395    * @param[in]     *pState      points to the state buffer.
1396    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
1397    * @return        none
1398    */
1399 
1400   void arm_biquad_cascade_df1_init_q31(
1401   arm_biquad_casd_df1_inst_q31 * S,
1402   uint8_t numStages,
1403   q31_t * pCoeffs,
1404   q31_t * pState,
1405   int8_t postShift);
1406 
1407   /**
1408    * @brief Processing function for the floating-point Biquad cascade filter.
1409    * @param[in]  *S         points to an instance of the floating-point Biquad cascade structure.
1410    * @param[in]  *pSrc      points to the block of input data.
1411    * @param[out] *pDst      points to the block of output data.
1412    * @param[in]  blockSize  number of samples to process.
1413    * @return     none.
1414    */
1415 
1416   void arm_biquad_cascade_df1_f32(
1417   const arm_biquad_casd_df1_inst_f32 * S,
1418   float32_t * pSrc,
1419   float32_t * pDst,
1420   uint32_t blockSize);
1421 
1422   /**
1423    * @brief  Initialization function for the floating-point Biquad cascade filter.
1424    * @param[in,out] *S           points to an instance of the floating-point Biquad cascade structure.
1425    * @param[in]     numStages    number of 2nd order stages in the filter.
1426    * @param[in]     *pCoeffs     points to the filter coefficients.
1427    * @param[in]     *pState      points to the state buffer.
1428    * @return        none
1429    */
1430 
1431   void arm_biquad_cascade_df1_init_f32(
1432   arm_biquad_casd_df1_inst_f32 * S,
1433   uint8_t numStages,
1434   float32_t * pCoeffs,
1435   float32_t * pState);
1436 
1437 
1438   /**
1439    * @brief Instance structure for the floating-point matrix structure.
1440    */
1441 
1442   typedef struct
1443   {
1444     uint16_t numRows;     /**< number of rows of the matrix.     */
1445     uint16_t numCols;     /**< number of columns of the matrix.  */
1446     float32_t *pData;     /**< points to the data of the matrix. */
1447   } arm_matrix_instance_f32;
1448 
1449 
1450   /**
1451    * @brief Instance structure for the floating-point matrix structure.
1452    */
1453 
1454   typedef struct
1455   {
1456     uint16_t numRows;     /**< number of rows of the matrix.     */
1457     uint16_t numCols;     /**< number of columns of the matrix.  */
1458     float64_t *pData;     /**< points to the data of the matrix. */
1459   } arm_matrix_instance_f64;
1460 
1461   /**
1462    * @brief Instance structure for the Q15 matrix structure.
1463    */
1464 
1465   typedef struct
1466   {
1467     uint16_t numRows;     /**< number of rows of the matrix.     */
1468     uint16_t numCols;     /**< number of columns of the matrix.  */
1469     q15_t *pData;         /**< points to the data of the matrix. */
1470 
1471   } arm_matrix_instance_q15;
1472 
1473   /**
1474    * @brief Instance structure for the Q31 matrix structure.
1475    */
1476 
1477   typedef struct
1478   {
1479     uint16_t numRows;     /**< number of rows of the matrix.     */
1480     uint16_t numCols;     /**< number of columns of the matrix.  */
1481     q31_t *pData;         /**< points to the data of the matrix. */
1482 
1483   } arm_matrix_instance_q31;
1484 
1485 
1486 
1487   /**
1488    * @brief Floating-point matrix addition.
1489    * @param[in]       *pSrcA points to the first input matrix structure
1490    * @param[in]       *pSrcB points to the second input matrix structure
1491    * @param[out]      *pDst points to output matrix structure
1492    * @return     The function returns either
1493    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1494    */
1495 
1496   arm_status arm_mat_add_f32(
1497   const arm_matrix_instance_f32 * pSrcA,
1498   const arm_matrix_instance_f32 * pSrcB,
1499   arm_matrix_instance_f32 * pDst);
1500 
1501   /**
1502    * @brief Q15 matrix addition.
1503    * @param[in]       *pSrcA points to the first input matrix structure
1504    * @param[in]       *pSrcB points to the second input matrix structure
1505    * @param[out]      *pDst points to output matrix structure
1506    * @return     The function returns either
1507    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1508    */
1509 
1510   arm_status arm_mat_add_q15(
1511   const arm_matrix_instance_q15 * pSrcA,
1512   const arm_matrix_instance_q15 * pSrcB,
1513   arm_matrix_instance_q15 * pDst);
1514 
1515   /**
1516    * @brief Q31 matrix addition.
1517    * @param[in]       *pSrcA points to the first input matrix structure
1518    * @param[in]       *pSrcB points to the second input matrix structure
1519    * @param[out]      *pDst points to output matrix structure
1520    * @return     The function returns either
1521    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1522    */
1523 
1524   arm_status arm_mat_add_q31(
1525   const arm_matrix_instance_q31 * pSrcA,
1526   const arm_matrix_instance_q31 * pSrcB,
1527   arm_matrix_instance_q31 * pDst);
1528 
1529   /**
1530    * @brief Floating-point, complex, matrix multiplication.
1531    * @param[in]       *pSrcA points to the first input matrix structure
1532    * @param[in]       *pSrcB points to the second input matrix structure
1533    * @param[out]      *pDst points to output matrix structure
1534    * @return     The function returns either
1535    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1536    */
1537 
1538   arm_status arm_mat_cmplx_mult_f32(
1539   const arm_matrix_instance_f32 * pSrcA,
1540   const arm_matrix_instance_f32 * pSrcB,
1541   arm_matrix_instance_f32 * pDst);
1542 
1543   /**
1544    * @brief Q15, complex,  matrix multiplication.
1545    * @param[in]       *pSrcA points to the first input matrix structure
1546    * @param[in]       *pSrcB points to the second input matrix structure
1547    * @param[out]      *pDst points to output matrix structure
1548    * @return     The function returns either
1549    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1550    */
1551 
1552   arm_status arm_mat_cmplx_mult_q15(
1553   const arm_matrix_instance_q15 * pSrcA,
1554   const arm_matrix_instance_q15 * pSrcB,
1555   arm_matrix_instance_q15 * pDst,
1556   q15_t * pScratch);
1557 
1558   /**
1559    * @brief Q31, complex, matrix multiplication.
1560    * @param[in]       *pSrcA points to the first input matrix structure
1561    * @param[in]       *pSrcB points to the second input matrix structure
1562    * @param[out]      *pDst points to output matrix structure
1563    * @return     The function returns either
1564    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1565    */
1566 
1567   arm_status arm_mat_cmplx_mult_q31(
1568   const arm_matrix_instance_q31 * pSrcA,
1569   const arm_matrix_instance_q31 * pSrcB,
1570   arm_matrix_instance_q31 * pDst);
1571 
1572 
1573   /**
1574    * @brief Floating-point matrix transpose.
1575    * @param[in]  *pSrc points to the input matrix
1576    * @param[out] *pDst points to the output matrix
1577    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1578    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1579    */
1580 
1581   arm_status arm_mat_trans_f32(
1582   const arm_matrix_instance_f32 * pSrc,
1583   arm_matrix_instance_f32 * pDst);
1584 
1585 
1586   /**
1587    * @brief Q15 matrix transpose.
1588    * @param[in]  *pSrc points to the input matrix
1589    * @param[out] *pDst points to the output matrix
1590    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1591    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1592    */
1593 
1594   arm_status arm_mat_trans_q15(
1595   const arm_matrix_instance_q15 * pSrc,
1596   arm_matrix_instance_q15 * pDst);
1597 
1598   /**
1599    * @brief Q31 matrix transpose.
1600    * @param[in]  *pSrc points to the input matrix
1601    * @param[out] *pDst points to the output matrix
1602    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1603    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1604    */
1605 
1606   arm_status arm_mat_trans_q31(
1607   const arm_matrix_instance_q31 * pSrc,
1608   arm_matrix_instance_q31 * pDst);
1609 
1610 
1611   /**
1612    * @brief Floating-point matrix multiplication
1613    * @param[in]       *pSrcA points to the first input matrix structure
1614    * @param[in]       *pSrcB points to the second input matrix structure
1615    * @param[out]      *pDst points to output matrix structure
1616    * @return     The function returns either
1617    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1618    */
1619 
1620   arm_status arm_mat_mult_f32(
1621   const arm_matrix_instance_f32 * pSrcA,
1622   const arm_matrix_instance_f32 * pSrcB,
1623   arm_matrix_instance_f32 * pDst);
1624 
1625   /**
1626    * @brief Q15 matrix multiplication
1627    * @param[in]       *pSrcA points to the first input matrix structure
1628    * @param[in]       *pSrcB points to the second input matrix structure
1629    * @param[out]      *pDst points to output matrix structure
1630    * @param[in]		 *pState points to the array for storing intermediate results
1631    * @return     The function returns either
1632    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1633    */
1634 
1635   arm_status arm_mat_mult_q15(
1636   const arm_matrix_instance_q15 * pSrcA,
1637   const arm_matrix_instance_q15 * pSrcB,
1638   arm_matrix_instance_q15 * pDst,
1639   q15_t * pState);
1640 
1641   /**
1642    * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1643    * @param[in]       *pSrcA  points to the first input matrix structure
1644    * @param[in]       *pSrcB  points to the second input matrix structure
1645    * @param[out]      *pDst   points to output matrix structure
1646    * @param[in]		  *pState points to the array for storing intermediate results
1647    * @return     The function returns either
1648    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1649    */
1650 
1651   arm_status arm_mat_mult_fast_q15(
1652   const arm_matrix_instance_q15 * pSrcA,
1653   const arm_matrix_instance_q15 * pSrcB,
1654   arm_matrix_instance_q15 * pDst,
1655   q15_t * pState);
1656 
1657   /**
1658    * @brief Q31 matrix multiplication
1659    * @param[in]       *pSrcA points to the first input matrix structure
1660    * @param[in]       *pSrcB points to the second input matrix structure
1661    * @param[out]      *pDst points to output matrix structure
1662    * @return     The function returns either
1663    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1664    */
1665 
1666   arm_status arm_mat_mult_q31(
1667   const arm_matrix_instance_q31 * pSrcA,
1668   const arm_matrix_instance_q31 * pSrcB,
1669   arm_matrix_instance_q31 * pDst);
1670 
1671   /**
1672    * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1673    * @param[in]       *pSrcA points to the first input matrix structure
1674    * @param[in]       *pSrcB points to the second input matrix structure
1675    * @param[out]      *pDst points to output matrix structure
1676    * @return     The function returns either
1677    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1678    */
1679 
1680   arm_status arm_mat_mult_fast_q31(
1681   const arm_matrix_instance_q31 * pSrcA,
1682   const arm_matrix_instance_q31 * pSrcB,
1683   arm_matrix_instance_q31 * pDst);
1684 
1685 
1686   /**
1687    * @brief Floating-point matrix subtraction
1688    * @param[in]       *pSrcA points to the first input matrix structure
1689    * @param[in]       *pSrcB points to the second input matrix structure
1690    * @param[out]      *pDst points to output matrix structure
1691    * @return     The function returns either
1692    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1693    */
1694 
1695   arm_status arm_mat_sub_f32(
1696   const arm_matrix_instance_f32 * pSrcA,
1697   const arm_matrix_instance_f32 * pSrcB,
1698   arm_matrix_instance_f32 * pDst);
1699 
1700   /**
1701    * @brief Q15 matrix subtraction
1702    * @param[in]       *pSrcA points to the first input matrix structure
1703    * @param[in]       *pSrcB points to the second input matrix structure
1704    * @param[out]      *pDst points to output matrix structure
1705    * @return     The function returns either
1706    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1707    */
1708 
1709   arm_status arm_mat_sub_q15(
1710   const arm_matrix_instance_q15 * pSrcA,
1711   const arm_matrix_instance_q15 * pSrcB,
1712   arm_matrix_instance_q15 * pDst);
1713 
1714   /**
1715    * @brief Q31 matrix subtraction
1716    * @param[in]       *pSrcA points to the first input matrix structure
1717    * @param[in]       *pSrcB points to the second input matrix structure
1718    * @param[out]      *pDst points to output matrix structure
1719    * @return     The function returns either
1720    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1721    */
1722 
1723   arm_status arm_mat_sub_q31(
1724   const arm_matrix_instance_q31 * pSrcA,
1725   const arm_matrix_instance_q31 * pSrcB,
1726   arm_matrix_instance_q31 * pDst);
1727 
1728   /**
1729    * @brief Floating-point matrix scaling.
1730    * @param[in]  *pSrc points to the input matrix
1731    * @param[in]  scale scale factor
1732    * @param[out] *pDst points to the output matrix
1733    * @return     The function returns either
1734    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1735    */
1736 
1737   arm_status arm_mat_scale_f32(
1738   const arm_matrix_instance_f32 * pSrc,
1739   float32_t scale,
1740   arm_matrix_instance_f32 * pDst);
1741 
1742   /**
1743    * @brief Q15 matrix scaling.
1744    * @param[in]       *pSrc points to input matrix
1745    * @param[in]       scaleFract fractional portion of the scale factor
1746    * @param[in]       shift number of bits to shift the result by
1747    * @param[out]      *pDst points to output matrix
1748    * @return     The function returns either
1749    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1750    */
1751 
1752   arm_status arm_mat_scale_q15(
1753   const arm_matrix_instance_q15 * pSrc,
1754   q15_t scaleFract,
1755   int32_t shift,
1756   arm_matrix_instance_q15 * pDst);
1757 
1758   /**
1759    * @brief Q31 matrix scaling.
1760    * @param[in]       *pSrc points to input matrix
1761    * @param[in]       scaleFract fractional portion of the scale factor
1762    * @param[in]       shift number of bits to shift the result by
1763    * @param[out]      *pDst points to output matrix structure
1764    * @return     The function returns either
1765    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1766    */
1767 
1768   arm_status arm_mat_scale_q31(
1769   const arm_matrix_instance_q31 * pSrc,
1770   q31_t scaleFract,
1771   int32_t shift,
1772   arm_matrix_instance_q31 * pDst);
1773 
1774 
1775   /**
1776    * @brief  Q31 matrix initialization.
1777    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1778    * @param[in]     nRows          number of rows in the matrix.
1779    * @param[in]     nColumns       number of columns in the matrix.
1780    * @param[in]     *pData	       points to the matrix data array.
1781    * @return        none
1782    */
1783 
1784   void arm_mat_init_q31(
1785   arm_matrix_instance_q31 * S,
1786   uint16_t nRows,
1787   uint16_t nColumns,
1788   q31_t * pData);
1789 
1790   /**
1791    * @brief  Q15 matrix initialization.
1792    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1793    * @param[in]     nRows          number of rows in the matrix.
1794    * @param[in]     nColumns       number of columns in the matrix.
1795    * @param[in]     *pData	       points to the matrix data array.
1796    * @return        none
1797    */
1798 
1799   void arm_mat_init_q15(
1800   arm_matrix_instance_q15 * S,
1801   uint16_t nRows,
1802   uint16_t nColumns,
1803   q15_t * pData);
1804 
1805   /**
1806    * @brief  Floating-point matrix initialization.
1807    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1808    * @param[in]     nRows          number of rows in the matrix.
1809    * @param[in]     nColumns       number of columns in the matrix.
1810    * @param[in]     *pData	       points to the matrix data array.
1811    * @return        none
1812    */
1813 
1814   void arm_mat_init_f32(
1815   arm_matrix_instance_f32 * S,
1816   uint16_t nRows,
1817   uint16_t nColumns,
1818   float32_t * pData);
1819 
1820 
1821 
1822   /**
1823    * @brief Instance structure for the Q15 PID Control.
1824    */
1825   typedef struct
1826   {
1827     q15_t A0;    /**< The derived gain, A0 = Kp + Ki + Kd . */
1828 #ifdef ARM_MATH_CM0_FAMILY
1829     q15_t A1;
1830     q15_t A2;
1831 #else
1832     q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
1833 #endif
1834     q15_t state[3];       /**< The state array of length 3. */
1835     q15_t Kp;           /**< The proportional gain. */
1836     q15_t Ki;           /**< The integral gain. */
1837     q15_t Kd;           /**< The derivative gain. */
1838   } arm_pid_instance_q15;
1839 
1840   /**
1841    * @brief Instance structure for the Q31 PID Control.
1842    */
1843   typedef struct
1844   {
1845     q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
1846     q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
1847     q31_t A2;            /**< The derived gain, A2 = Kd . */
1848     q31_t state[3];      /**< The state array of length 3. */
1849     q31_t Kp;            /**< The proportional gain. */
1850     q31_t Ki;            /**< The integral gain. */
1851     q31_t Kd;            /**< The derivative gain. */
1852 
1853   } arm_pid_instance_q31;
1854 
1855   /**
1856    * @brief Instance structure for the floating-point PID Control.
1857    */
1858   typedef struct
1859   {
1860     float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
1861     float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
1862     float32_t A2;          /**< The derived gain, A2 = Kd . */
1863     float32_t state[3];    /**< The state array of length 3. */
1864     float32_t Kp;               /**< The proportional gain. */
1865     float32_t Ki;               /**< The integral gain. */
1866     float32_t Kd;               /**< The derivative gain. */
1867   } arm_pid_instance_f32;
1868 
1869 
1870 
1871   /**
1872    * @brief  Initialization function for the floating-point PID Control.
1873    * @param[in,out] *S      points to an instance of the PID structure.
1874    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1875    * @return none.
1876    */
1877   void arm_pid_init_f32(
1878   arm_pid_instance_f32 * S,
1879   int32_t resetStateFlag);
1880 
1881   /**
1882    * @brief  Reset function for the floating-point PID Control.
1883    * @param[in,out] *S is an instance of the floating-point PID Control structure
1884    * @return none
1885    */
1886   void arm_pid_reset_f32(
1887   arm_pid_instance_f32 * S);
1888 
1889 
1890   /**
1891    * @brief  Initialization function for the Q31 PID Control.
1892    * @param[in,out] *S points to an instance of the Q15 PID structure.
1893    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1894    * @return none.
1895    */
1896   void arm_pid_init_q31(
1897   arm_pid_instance_q31 * S,
1898   int32_t resetStateFlag);
1899 
1900 
1901   /**
1902    * @brief  Reset function for the Q31 PID Control.
1903    * @param[in,out] *S points to an instance of the Q31 PID Control structure
1904    * @return none
1905    */
1906 
1907   void arm_pid_reset_q31(
1908   arm_pid_instance_q31 * S);
1909 
1910   /**
1911    * @brief  Initialization function for the Q15 PID Control.
1912    * @param[in,out] *S points to an instance of the Q15 PID structure.
1913    * @param[in] resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1914    * @return none.
1915    */
1916   void arm_pid_init_q15(
1917   arm_pid_instance_q15 * S,
1918   int32_t resetStateFlag);
1919 
1920   /**
1921    * @brief  Reset function for the Q15 PID Control.
1922    * @param[in,out] *S points to an instance of the q15 PID Control structure
1923    * @return none
1924    */
1925   void arm_pid_reset_q15(
1926   arm_pid_instance_q15 * S);
1927 
1928 
1929   /**
1930    * @brief Instance structure for the floating-point Linear Interpolate function.
1931    */
1932   typedef struct
1933   {
1934     uint32_t nValues;           /**< nValues */
1935     float32_t x1;               /**< x1 */
1936     float32_t xSpacing;         /**< xSpacing */
1937     float32_t *pYData;          /**< pointer to the table of Y values */
1938   } arm_linear_interp_instance_f32;
1939 
1940   /**
1941    * @brief Instance structure for the floating-point bilinear interpolation function.
1942    */
1943 
1944   typedef struct
1945   {
1946     uint16_t numRows;   /**< number of rows in the data table. */
1947     uint16_t numCols;   /**< number of columns in the data table. */
1948     float32_t *pData;   /**< points to the data table. */
1949   } arm_bilinear_interp_instance_f32;
1950 
1951    /**
1952    * @brief Instance structure for the Q31 bilinear interpolation function.
1953    */
1954 
1955   typedef struct
1956   {
1957     uint16_t numRows;   /**< number of rows in the data table. */
1958     uint16_t numCols;   /**< number of columns in the data table. */
1959     q31_t *pData;       /**< points to the data table. */
1960   } arm_bilinear_interp_instance_q31;
1961 
1962    /**
1963    * @brief Instance structure for the Q15 bilinear interpolation function.
1964    */
1965 
1966   typedef struct
1967   {
1968     uint16_t numRows;   /**< number of rows in the data table. */
1969     uint16_t numCols;   /**< number of columns in the data table. */
1970     q15_t *pData;       /**< points to the data table. */
1971   } arm_bilinear_interp_instance_q15;
1972 
1973    /**
1974    * @brief Instance structure for the Q15 bilinear interpolation function.
1975    */
1976 
1977   typedef struct
1978   {
1979     uint16_t numRows;   /**< number of rows in the data table. */
1980     uint16_t numCols;   /**< number of columns in the data table. */
1981     q7_t *pData;                /**< points to the data table. */
1982   } arm_bilinear_interp_instance_q7;
1983 
1984 
1985   /**
1986    * @brief Q7 vector multiplication.
1987    * @param[in]       *pSrcA points to the first input vector
1988    * @param[in]       *pSrcB points to the second input vector
1989    * @param[out]      *pDst  points to the output vector
1990    * @param[in]       blockSize number of samples in each vector
1991    * @return none.
1992    */
1993 
1994   void arm_mult_q7(
1995   q7_t * pSrcA,
1996   q7_t * pSrcB,
1997   q7_t * pDst,
1998   uint32_t blockSize);
1999 
2000   /**
2001    * @brief Q15 vector multiplication.
2002    * @param[in]       *pSrcA points to the first input vector
2003    * @param[in]       *pSrcB points to the second input vector
2004    * @param[out]      *pDst  points to the output vector
2005    * @param[in]       blockSize number of samples in each vector
2006    * @return none.
2007    */
2008 
2009   void arm_mult_q15(
2010   q15_t * pSrcA,
2011   q15_t * pSrcB,
2012   q15_t * pDst,
2013   uint32_t blockSize);
2014 
2015   /**
2016    * @brief Q31 vector multiplication.
2017    * @param[in]       *pSrcA points to the first input vector
2018    * @param[in]       *pSrcB points to the second input vector
2019    * @param[out]      *pDst points to the output vector
2020    * @param[in]       blockSize number of samples in each vector
2021    * @return none.
2022    */
2023 
2024   void arm_mult_q31(
2025   q31_t * pSrcA,
2026   q31_t * pSrcB,
2027   q31_t * pDst,
2028   uint32_t blockSize);
2029 
2030   /**
2031    * @brief Floating-point vector multiplication.
2032    * @param[in]       *pSrcA points to the first input vector
2033    * @param[in]       *pSrcB points to the second input vector
2034    * @param[out]      *pDst points to the output vector
2035    * @param[in]       blockSize number of samples in each vector
2036    * @return none.
2037    */
2038 
2039   void arm_mult_f32(
2040   float32_t * pSrcA,
2041   float32_t * pSrcB,
2042   float32_t * pDst,
2043   uint32_t blockSize);
2044 
2045 
2046 
2047 
2048 
2049 
2050   /**
2051    * @brief Instance structure for the Q15 CFFT/CIFFT function.
2052    */
2053 
2054   typedef struct
2055   {
2056     uint16_t fftLen;                 /**< length of the FFT. */
2057     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2058     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2059     q15_t *pTwiddle;                     /**< points to the Sin twiddle factor table. */
2060     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2061     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2062     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2063   } arm_cfft_radix2_instance_q15;
2064 
2065 /* Deprecated */
2066   arm_status arm_cfft_radix2_init_q15(
2067   arm_cfft_radix2_instance_q15 * S,
2068   uint16_t fftLen,
2069   uint8_t ifftFlag,
2070   uint8_t bitReverseFlag);
2071 
2072 /* Deprecated */
2073   void arm_cfft_radix2_q15(
2074   const arm_cfft_radix2_instance_q15 * S,
2075   q15_t * pSrc);
2076 
2077 
2078 
2079   /**
2080    * @brief Instance structure for the Q15 CFFT/CIFFT function.
2081    */
2082 
2083   typedef struct
2084   {
2085     uint16_t fftLen;                 /**< length of the FFT. */
2086     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2087     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2088     q15_t *pTwiddle;                 /**< points to the twiddle factor table. */
2089     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2090     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2091     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2092   } arm_cfft_radix4_instance_q15;
2093 
2094 /* Deprecated */
2095   arm_status arm_cfft_radix4_init_q15(
2096   arm_cfft_radix4_instance_q15 * S,
2097   uint16_t fftLen,
2098   uint8_t ifftFlag,
2099   uint8_t bitReverseFlag);
2100 
2101 /* Deprecated */
2102   void arm_cfft_radix4_q15(
2103   const arm_cfft_radix4_instance_q15 * S,
2104   q15_t * pSrc);
2105 
2106   /**
2107    * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
2108    */
2109 
2110   typedef struct
2111   {
2112     uint16_t fftLen;                 /**< length of the FFT. */
2113     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2114     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2115     q31_t *pTwiddle;                     /**< points to the Twiddle factor table. */
2116     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2117     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2118     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2119   } arm_cfft_radix2_instance_q31;
2120 
2121 /* Deprecated */
2122   arm_status arm_cfft_radix2_init_q31(
2123   arm_cfft_radix2_instance_q31 * S,
2124   uint16_t fftLen,
2125   uint8_t ifftFlag,
2126   uint8_t bitReverseFlag);
2127 
2128 /* Deprecated */
2129   void arm_cfft_radix2_q31(
2130   const arm_cfft_radix2_instance_q31 * S,
2131   q31_t * pSrc);
2132 
2133   /**
2134    * @brief Instance structure for the Q31 CFFT/CIFFT function.
2135    */
2136 
2137   typedef struct
2138   {
2139     uint16_t fftLen;                 /**< length of the FFT. */
2140     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2141     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2142     q31_t *pTwiddle;                 /**< points to the twiddle factor table. */
2143     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2144     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2145     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2146   } arm_cfft_radix4_instance_q31;
2147 
2148 /* Deprecated */
2149   void arm_cfft_radix4_q31(
2150   const arm_cfft_radix4_instance_q31 * S,
2151   q31_t * pSrc);
2152 
2153 /* Deprecated */
2154   arm_status arm_cfft_radix4_init_q31(
2155   arm_cfft_radix4_instance_q31 * S,
2156   uint16_t fftLen,
2157   uint8_t ifftFlag,
2158   uint8_t bitReverseFlag);
2159 
2160   /**
2161    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2162    */
2163 
2164   typedef struct
2165   {
2166     uint16_t fftLen;                   /**< length of the FFT. */
2167     uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2168     uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2169     float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
2170     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
2171     uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2172     uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2173     float32_t onebyfftLen;                 /**< value of 1/fftLen. */
2174   } arm_cfft_radix2_instance_f32;
2175 
2176 /* Deprecated */
2177   arm_status arm_cfft_radix2_init_f32(
2178   arm_cfft_radix2_instance_f32 * S,
2179   uint16_t fftLen,
2180   uint8_t ifftFlag,
2181   uint8_t bitReverseFlag);
2182 
2183 /* Deprecated */
2184   void arm_cfft_radix2_f32(
2185   const arm_cfft_radix2_instance_f32 * S,
2186   float32_t * pSrc);
2187 
2188   /**
2189    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2190    */
2191 
2192   typedef struct
2193   {
2194     uint16_t fftLen;                   /**< length of the FFT. */
2195     uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2196     uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2197     float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
2198     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
2199     uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2200     uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2201     float32_t onebyfftLen;                 /**< value of 1/fftLen. */
2202   } arm_cfft_radix4_instance_f32;
2203 
2204 /* Deprecated */
2205   arm_status arm_cfft_radix4_init_f32(
2206   arm_cfft_radix4_instance_f32 * S,
2207   uint16_t fftLen,
2208   uint8_t ifftFlag,
2209   uint8_t bitReverseFlag);
2210 
2211 /* Deprecated */
2212   void arm_cfft_radix4_f32(
2213   const arm_cfft_radix4_instance_f32 * S,
2214   float32_t * pSrc);
2215 
2216   /**
2217    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
2218    */
2219 
2220   typedef struct
2221   {
2222     uint16_t fftLen;                   /**< length of the FFT. */
2223     const q15_t *pTwiddle;             /**< points to the Twiddle factor table. */
2224     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2225     uint16_t bitRevLength;             /**< bit reversal table length. */
2226   } arm_cfft_instance_q15;
2227 
2228 void arm_cfft_q15(
2229     const arm_cfft_instance_q15 * S,
2230     q15_t * p1,
2231     uint8_t ifftFlag,
2232     uint8_t bitReverseFlag);
2233 
2234   /**
2235    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
2236    */
2237 
2238   typedef struct
2239   {
2240     uint16_t fftLen;                   /**< length of the FFT. */
2241     const q31_t *pTwiddle;             /**< points to the Twiddle factor table. */
2242     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2243     uint16_t bitRevLength;             /**< bit reversal table length. */
2244   } arm_cfft_instance_q31;
2245 
2246 void arm_cfft_q31(
2247     const arm_cfft_instance_q31 * S,
2248     q31_t * p1,
2249     uint8_t ifftFlag,
2250     uint8_t bitReverseFlag);
2251 
2252   /**
2253    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2254    */
2255 
2256   typedef struct
2257   {
2258     uint16_t fftLen;                   /**< length of the FFT. */
2259     const float32_t *pTwiddle;         /**< points to the Twiddle factor table. */
2260     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2261     uint16_t bitRevLength;             /**< bit reversal table length. */
2262   } arm_cfft_instance_f32;
2263 
2264   void arm_cfft_f32(
2265   const arm_cfft_instance_f32 * S,
2266   float32_t * p1,
2267   uint8_t ifftFlag,
2268   uint8_t bitReverseFlag);
2269 
2270   /**
2271    * @brief Instance structure for the Q15 RFFT/RIFFT function.
2272    */
2273 
2274   typedef struct
2275   {
2276     uint32_t fftLenReal;                      /**< length of the real FFT. */
2277     uint8_t ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2278     uint8_t bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2279     uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2280     q15_t *pTwiddleAReal;                     /**< points to the real twiddle factor table. */
2281     q15_t *pTwiddleBReal;                     /**< points to the imag twiddle factor table. */
2282     const arm_cfft_instance_q15 *pCfft;       /**< points to the complex FFT instance. */
2283   } arm_rfft_instance_q15;
2284 
2285   arm_status arm_rfft_init_q15(
2286   arm_rfft_instance_q15 * S,
2287   uint32_t fftLenReal,
2288   uint32_t ifftFlagR,
2289   uint32_t bitReverseFlag);
2290 
2291   void arm_rfft_q15(
2292   const arm_rfft_instance_q15 * S,
2293   q15_t * pSrc,
2294   q15_t * pDst);
2295 
2296   /**
2297    * @brief Instance structure for the Q31 RFFT/RIFFT function.
2298    */
2299 
2300   typedef struct
2301   {
2302     uint32_t fftLenReal;                        /**< length of the real FFT. */
2303     uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2304     uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2305     uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2306     q31_t *pTwiddleAReal;                       /**< points to the real twiddle factor table. */
2307     q31_t *pTwiddleBReal;                       /**< points to the imag twiddle factor table. */
2308     const arm_cfft_instance_q31 *pCfft;         /**< points to the complex FFT instance. */
2309   } arm_rfft_instance_q31;
2310 
2311   arm_status arm_rfft_init_q31(
2312   arm_rfft_instance_q31 * S,
2313   uint32_t fftLenReal,
2314   uint32_t ifftFlagR,
2315   uint32_t bitReverseFlag);
2316 
2317   void arm_rfft_q31(
2318   const arm_rfft_instance_q31 * S,
2319   q31_t * pSrc,
2320   q31_t * pDst);
2321 
2322   /**
2323    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2324    */
2325 
2326   typedef struct
2327   {
2328     uint32_t fftLenReal;                        /**< length of the real FFT. */
2329     uint16_t fftLenBy2;                         /**< length of the complex FFT. */
2330     uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2331     uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2332     uint32_t twidCoefRModifier;                     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2333     float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
2334     float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
2335     arm_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
2336   } arm_rfft_instance_f32;
2337 
2338   arm_status arm_rfft_init_f32(
2339   arm_rfft_instance_f32 * S,
2340   arm_cfft_radix4_instance_f32 * S_CFFT,
2341   uint32_t fftLenReal,
2342   uint32_t ifftFlagR,
2343   uint32_t bitReverseFlag);
2344 
2345   void arm_rfft_f32(
2346   const arm_rfft_instance_f32 * S,
2347   float32_t * pSrc,
2348   float32_t * pDst);
2349 
2350   /**
2351    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2352    */
2353 
2354 typedef struct
2355   {
2356     arm_cfft_instance_f32 Sint;      /**< Internal CFFT structure. */
2357     uint16_t fftLenRFFT;                        /**< length of the real sequence */
2358 	float32_t * pTwiddleRFFT;					/**< Twiddle factors real stage  */
2359   } arm_rfft_fast_instance_f32 ;
2360 
2361 arm_status arm_rfft_fast_init_f32 (
2362 	arm_rfft_fast_instance_f32 * S,
2363 	uint16_t fftLen);
2364 
2365 void arm_rfft_fast_f32(
2366   arm_rfft_fast_instance_f32 * S,
2367   float32_t * p, float32_t * pOut,
2368   uint8_t ifftFlag);
2369 
2370   /**
2371    * @brief Instance structure for the floating-point DCT4/IDCT4 function.
2372    */
2373 
2374   typedef struct
2375   {
2376     uint16_t N;                         /**< length of the DCT4. */
2377     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2378     float32_t normalize;                /**< normalizing factor. */
2379     float32_t *pTwiddle;                /**< points to the twiddle factor table. */
2380     float32_t *pCosFactor;              /**< points to the cosFactor table. */
2381     arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
2382     arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
2383   } arm_dct4_instance_f32;
2384 
2385   /**
2386    * @brief  Initialization function for the floating-point DCT4/IDCT4.
2387    * @param[in,out] *S         points to an instance of floating-point DCT4/IDCT4 structure.
2388    * @param[in]     *S_RFFT    points to an instance of floating-point RFFT/RIFFT structure.
2389    * @param[in]     *S_CFFT    points to an instance of floating-point CFFT/CIFFT structure.
2390    * @param[in]     N          length of the DCT4.
2391    * @param[in]     Nby2       half of the length of the DCT4.
2392    * @param[in]     normalize  normalizing factor.
2393    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
2394    */
2395 
2396   arm_status arm_dct4_init_f32(
2397   arm_dct4_instance_f32 * S,
2398   arm_rfft_instance_f32 * S_RFFT,
2399   arm_cfft_radix4_instance_f32 * S_CFFT,
2400   uint16_t N,
2401   uint16_t Nby2,
2402   float32_t normalize);
2403 
2404   /**
2405    * @brief Processing function for the floating-point DCT4/IDCT4.
2406    * @param[in]       *S             points to an instance of the floating-point DCT4/IDCT4 structure.
2407    * @param[in]       *pState        points to state buffer.
2408    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2409    * @return none.
2410    */
2411 
2412   void arm_dct4_f32(
2413   const arm_dct4_instance_f32 * S,
2414   float32_t * pState,
2415   float32_t * pInlineBuffer);
2416 
2417   /**
2418    * @brief Instance structure for the Q31 DCT4/IDCT4 function.
2419    */
2420 
2421   typedef struct
2422   {
2423     uint16_t N;                         /**< length of the DCT4. */
2424     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2425     q31_t normalize;                    /**< normalizing factor. */
2426     q31_t *pTwiddle;                    /**< points to the twiddle factor table. */
2427     q31_t *pCosFactor;                  /**< points to the cosFactor table. */
2428     arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
2429     arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
2430   } arm_dct4_instance_q31;
2431 
2432   /**
2433    * @brief  Initialization function for the Q31 DCT4/IDCT4.
2434    * @param[in,out] *S         points to an instance of Q31 DCT4/IDCT4 structure.
2435    * @param[in]     *S_RFFT    points to an instance of Q31 RFFT/RIFFT structure
2436    * @param[in]     *S_CFFT    points to an instance of Q31 CFFT/CIFFT structure
2437    * @param[in]     N          length of the DCT4.
2438    * @param[in]     Nby2       half of the length of the DCT4.
2439    * @param[in]     normalize  normalizing factor.
2440    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2441    */
2442 
2443   arm_status arm_dct4_init_q31(
2444   arm_dct4_instance_q31 * S,
2445   arm_rfft_instance_q31 * S_RFFT,
2446   arm_cfft_radix4_instance_q31 * S_CFFT,
2447   uint16_t N,
2448   uint16_t Nby2,
2449   q31_t normalize);
2450 
2451   /**
2452    * @brief Processing function for the Q31 DCT4/IDCT4.
2453    * @param[in]       *S             points to an instance of the Q31 DCT4 structure.
2454    * @param[in]       *pState        points to state buffer.
2455    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2456    * @return none.
2457    */
2458 
2459   void arm_dct4_q31(
2460   const arm_dct4_instance_q31 * S,
2461   q31_t * pState,
2462   q31_t * pInlineBuffer);
2463 
2464   /**
2465    * @brief Instance structure for the Q15 DCT4/IDCT4 function.
2466    */
2467 
2468   typedef struct
2469   {
2470     uint16_t N;                         /**< length of the DCT4. */
2471     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2472     q15_t normalize;                    /**< normalizing factor. */
2473     q15_t *pTwiddle;                    /**< points to the twiddle factor table. */
2474     q15_t *pCosFactor;                  /**< points to the cosFactor table. */
2475     arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
2476     arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
2477   } arm_dct4_instance_q15;
2478 
2479   /**
2480    * @brief  Initialization function for the Q15 DCT4/IDCT4.
2481    * @param[in,out] *S         points to an instance of Q15 DCT4/IDCT4 structure.
2482    * @param[in]     *S_RFFT    points to an instance of Q15 RFFT/RIFFT structure.
2483    * @param[in]     *S_CFFT    points to an instance of Q15 CFFT/CIFFT structure.
2484    * @param[in]     N          length of the DCT4.
2485    * @param[in]     Nby2       half of the length of the DCT4.
2486    * @param[in]     normalize  normalizing factor.
2487    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2488    */
2489 
2490   arm_status arm_dct4_init_q15(
2491   arm_dct4_instance_q15 * S,
2492   arm_rfft_instance_q15 * S_RFFT,
2493   arm_cfft_radix4_instance_q15 * S_CFFT,
2494   uint16_t N,
2495   uint16_t Nby2,
2496   q15_t normalize);
2497 
2498   /**
2499    * @brief Processing function for the Q15 DCT4/IDCT4.
2500    * @param[in]       *S             points to an instance of the Q15 DCT4 structure.
2501    * @param[in]       *pState        points to state buffer.
2502    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2503    * @return none.
2504    */
2505 
2506   void arm_dct4_q15(
2507   const arm_dct4_instance_q15 * S,
2508   q15_t * pState,
2509   q15_t * pInlineBuffer);
2510 
2511   /**
2512    * @brief Floating-point vector addition.
2513    * @param[in]       *pSrcA points to the first input vector
2514    * @param[in]       *pSrcB points to the second input vector
2515    * @param[out]      *pDst points to the output vector
2516    * @param[in]       blockSize number of samples in each vector
2517    * @return none.
2518    */
2519 
2520   void arm_add_f32(
2521   float32_t * pSrcA,
2522   float32_t * pSrcB,
2523   float32_t * pDst,
2524   uint32_t blockSize);
2525 
2526   /**
2527    * @brief Q7 vector addition.
2528    * @param[in]       *pSrcA points to the first input vector
2529    * @param[in]       *pSrcB points to the second input vector
2530    * @param[out]      *pDst points to the output vector
2531    * @param[in]       blockSize number of samples in each vector
2532    * @return none.
2533    */
2534 
2535   void arm_add_q7(
2536   q7_t * pSrcA,
2537   q7_t * pSrcB,
2538   q7_t * pDst,
2539   uint32_t blockSize);
2540 
2541   /**
2542    * @brief Q15 vector addition.
2543    * @param[in]       *pSrcA points to the first input vector
2544    * @param[in]       *pSrcB points to the second input vector
2545    * @param[out]      *pDst points to the output vector
2546    * @param[in]       blockSize number of samples in each vector
2547    * @return none.
2548    */
2549 
2550   void arm_add_q15(
2551   q15_t * pSrcA,
2552   q15_t * pSrcB,
2553   q15_t * pDst,
2554   uint32_t blockSize);
2555 
2556   /**
2557    * @brief Q31 vector addition.
2558    * @param[in]       *pSrcA points to the first input vector
2559    * @param[in]       *pSrcB points to the second input vector
2560    * @param[out]      *pDst points to the output vector
2561    * @param[in]       blockSize number of samples in each vector
2562    * @return none.
2563    */
2564 
2565   void arm_add_q31(
2566   q31_t * pSrcA,
2567   q31_t * pSrcB,
2568   q31_t * pDst,
2569   uint32_t blockSize);
2570 
2571   /**
2572    * @brief Floating-point vector subtraction.
2573    * @param[in]       *pSrcA points to the first input vector
2574    * @param[in]       *pSrcB points to the second input vector
2575    * @param[out]      *pDst points to the output vector
2576    * @param[in]       blockSize number of samples in each vector
2577    * @return none.
2578    */
2579 
2580   void arm_sub_f32(
2581   float32_t * pSrcA,
2582   float32_t * pSrcB,
2583   float32_t * pDst,
2584   uint32_t blockSize);
2585 
2586   /**
2587    * @brief Q7 vector subtraction.
2588    * @param[in]       *pSrcA points to the first input vector
2589    * @param[in]       *pSrcB points to the second input vector
2590    * @param[out]      *pDst points to the output vector
2591    * @param[in]       blockSize number of samples in each vector
2592    * @return none.
2593    */
2594 
2595   void arm_sub_q7(
2596   q7_t * pSrcA,
2597   q7_t * pSrcB,
2598   q7_t * pDst,
2599   uint32_t blockSize);
2600 
2601   /**
2602    * @brief Q15 vector subtraction.
2603    * @param[in]       *pSrcA points to the first input vector
2604    * @param[in]       *pSrcB points to the second input vector
2605    * @param[out]      *pDst points to the output vector
2606    * @param[in]       blockSize number of samples in each vector
2607    * @return none.
2608    */
2609 
2610   void arm_sub_q15(
2611   q15_t * pSrcA,
2612   q15_t * pSrcB,
2613   q15_t * pDst,
2614   uint32_t blockSize);
2615 
2616   /**
2617    * @brief Q31 vector subtraction.
2618    * @param[in]       *pSrcA points to the first input vector
2619    * @param[in]       *pSrcB points to the second input vector
2620    * @param[out]      *pDst points to the output vector
2621    * @param[in]       blockSize number of samples in each vector
2622    * @return none.
2623    */
2624 
2625   void arm_sub_q31(
2626   q31_t * pSrcA,
2627   q31_t * pSrcB,
2628   q31_t * pDst,
2629   uint32_t blockSize);
2630 
2631   /**
2632    * @brief Multiplies a floating-point vector by a scalar.
2633    * @param[in]       *pSrc points to the input vector
2634    * @param[in]       scale scale factor to be applied
2635    * @param[out]      *pDst points to the output vector
2636    * @param[in]       blockSize number of samples in the vector
2637    * @return none.
2638    */
2639 
2640   void arm_scale_f32(
2641   float32_t * pSrc,
2642   float32_t scale,
2643   float32_t * pDst,
2644   uint32_t blockSize);
2645 
2646   /**
2647    * @brief Multiplies a Q7 vector by a scalar.
2648    * @param[in]       *pSrc points to the input vector
2649    * @param[in]       scaleFract fractional portion of the scale value
2650    * @param[in]       shift number of bits to shift the result by
2651    * @param[out]      *pDst points to the output vector
2652    * @param[in]       blockSize number of samples in the vector
2653    * @return none.
2654    */
2655 
2656   void arm_scale_q7(
2657   q7_t * pSrc,
2658   q7_t scaleFract,
2659   int8_t shift,
2660   q7_t * pDst,
2661   uint32_t blockSize);
2662 
2663   /**
2664    * @brief Multiplies a Q15 vector by a scalar.
2665    * @param[in]       *pSrc points to the input vector
2666    * @param[in]       scaleFract fractional portion of the scale value
2667    * @param[in]       shift number of bits to shift the result by
2668    * @param[out]      *pDst points to the output vector
2669    * @param[in]       blockSize number of samples in the vector
2670    * @return none.
2671    */
2672 
2673   void arm_scale_q15(
2674   q15_t * pSrc,
2675   q15_t scaleFract,
2676   int8_t shift,
2677   q15_t * pDst,
2678   uint32_t blockSize);
2679 
2680   /**
2681    * @brief Multiplies a Q31 vector by a scalar.
2682    * @param[in]       *pSrc points to the input vector
2683    * @param[in]       scaleFract fractional portion of the scale value
2684    * @param[in]       shift number of bits to shift the result by
2685    * @param[out]      *pDst points to the output vector
2686    * @param[in]       blockSize number of samples in the vector
2687    * @return none.
2688    */
2689 
2690   void arm_scale_q31(
2691   q31_t * pSrc,
2692   q31_t scaleFract,
2693   int8_t shift,
2694   q31_t * pDst,
2695   uint32_t blockSize);
2696 
2697   /**
2698    * @brief Q7 vector absolute value.
2699    * @param[in]       *pSrc points to the input buffer
2700    * @param[out]      *pDst points to the output buffer
2701    * @param[in]       blockSize number of samples in each vector
2702    * @return none.
2703    */
2704 
2705   void arm_abs_q7(
2706   q7_t * pSrc,
2707   q7_t * pDst,
2708   uint32_t blockSize);
2709 
2710   /**
2711    * @brief Floating-point vector absolute value.
2712    * @param[in]       *pSrc points to the input buffer
2713    * @param[out]      *pDst points to the output buffer
2714    * @param[in]       blockSize number of samples in each vector
2715    * @return none.
2716    */
2717 
2718   void arm_abs_f32(
2719   float32_t * pSrc,
2720   float32_t * pDst,
2721   uint32_t blockSize);
2722 
2723   /**
2724    * @brief Q15 vector absolute value.
2725    * @param[in]       *pSrc points to the input buffer
2726    * @param[out]      *pDst points to the output buffer
2727    * @param[in]       blockSize number of samples in each vector
2728    * @return none.
2729    */
2730 
2731   void arm_abs_q15(
2732   q15_t * pSrc,
2733   q15_t * pDst,
2734   uint32_t blockSize);
2735 
2736   /**
2737    * @brief Q31 vector absolute value.
2738    * @param[in]       *pSrc points to the input buffer
2739    * @param[out]      *pDst points to the output buffer
2740    * @param[in]       blockSize number of samples in each vector
2741    * @return none.
2742    */
2743 
2744   void arm_abs_q31(
2745   q31_t * pSrc,
2746   q31_t * pDst,
2747   uint32_t blockSize);
2748 
2749   /**
2750    * @brief Dot product of floating-point vectors.
2751    * @param[in]       *pSrcA points to the first input vector
2752    * @param[in]       *pSrcB points to the second input vector
2753    * @param[in]       blockSize number of samples in each vector
2754    * @param[out]      *result output result returned here
2755    * @return none.
2756    */
2757 
2758   void arm_dot_prod_f32(
2759   float32_t * pSrcA,
2760   float32_t * pSrcB,
2761   uint32_t blockSize,
2762   float32_t * result);
2763 
2764   /**
2765    * @brief Dot product of Q7 vectors.
2766    * @param[in]       *pSrcA points to the first input vector
2767    * @param[in]       *pSrcB points to the second input vector
2768    * @param[in]       blockSize number of samples in each vector
2769    * @param[out]      *result output result returned here
2770    * @return none.
2771    */
2772 
2773   void arm_dot_prod_q7(
2774   q7_t * pSrcA,
2775   q7_t * pSrcB,
2776   uint32_t blockSize,
2777   q31_t * result);
2778 
2779   /**
2780    * @brief Dot product of Q15 vectors.
2781    * @param[in]       *pSrcA points to the first input vector
2782    * @param[in]       *pSrcB points to the second input vector
2783    * @param[in]       blockSize number of samples in each vector
2784    * @param[out]      *result output result returned here
2785    * @return none.
2786    */
2787 
2788   void arm_dot_prod_q15(
2789   q15_t * pSrcA,
2790   q15_t * pSrcB,
2791   uint32_t blockSize,
2792   q63_t * result);
2793 
2794   /**
2795    * @brief Dot product of Q31 vectors.
2796    * @param[in]       *pSrcA points to the first input vector
2797    * @param[in]       *pSrcB points to the second input vector
2798    * @param[in]       blockSize number of samples in each vector
2799    * @param[out]      *result output result returned here
2800    * @return none.
2801    */
2802 
2803   void arm_dot_prod_q31(
2804   q31_t * pSrcA,
2805   q31_t * pSrcB,
2806   uint32_t blockSize,
2807   q63_t * result);
2808 
2809   /**
2810    * @brief  Shifts the elements of a Q7 vector a specified number of bits.
2811    * @param[in]  *pSrc points to the input vector
2812    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2813    * @param[out]  *pDst points to the output vector
2814    * @param[in]  blockSize number of samples in the vector
2815    * @return none.
2816    */
2817 
2818   void arm_shift_q7(
2819   q7_t * pSrc,
2820   int8_t shiftBits,
2821   q7_t * pDst,
2822   uint32_t blockSize);
2823 
2824   /**
2825    * @brief  Shifts the elements of a Q15 vector a specified number of bits.
2826    * @param[in]  *pSrc points to the input vector
2827    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2828    * @param[out]  *pDst points to the output vector
2829    * @param[in]  blockSize number of samples in the vector
2830    * @return none.
2831    */
2832 
2833   void arm_shift_q15(
2834   q15_t * pSrc,
2835   int8_t shiftBits,
2836   q15_t * pDst,
2837   uint32_t blockSize);
2838 
2839   /**
2840    * @brief  Shifts the elements of a Q31 vector a specified number of bits.
2841    * @param[in]  *pSrc points to the input vector
2842    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2843    * @param[out]  *pDst points to the output vector
2844    * @param[in]  blockSize number of samples in the vector
2845    * @return none.
2846    */
2847 
2848   void arm_shift_q31(
2849   q31_t * pSrc,
2850   int8_t shiftBits,
2851   q31_t * pDst,
2852   uint32_t blockSize);
2853 
2854   /**
2855    * @brief  Adds a constant offset to a floating-point vector.
2856    * @param[in]  *pSrc points to the input vector
2857    * @param[in]  offset is the offset to be added
2858    * @param[out]  *pDst points to the output vector
2859    * @param[in]  blockSize number of samples in the vector
2860    * @return none.
2861    */
2862 
2863   void arm_offset_f32(
2864   float32_t * pSrc,
2865   float32_t offset,
2866   float32_t * pDst,
2867   uint32_t blockSize);
2868 
2869   /**
2870    * @brief  Adds a constant offset to a Q7 vector.
2871    * @param[in]  *pSrc points to the input vector
2872    * @param[in]  offset is the offset to be added
2873    * @param[out]  *pDst points to the output vector
2874    * @param[in]  blockSize number of samples in the vector
2875    * @return none.
2876    */
2877 
2878   void arm_offset_q7(
2879   q7_t * pSrc,
2880   q7_t offset,
2881   q7_t * pDst,
2882   uint32_t blockSize);
2883 
2884   /**
2885    * @brief  Adds a constant offset to a Q15 vector.
2886    * @param[in]  *pSrc points to the input vector
2887    * @param[in]  offset is the offset to be added
2888    * @param[out]  *pDst points to the output vector
2889    * @param[in]  blockSize number of samples in the vector
2890    * @return none.
2891    */
2892 
2893   void arm_offset_q15(
2894   q15_t * pSrc,
2895   q15_t offset,
2896   q15_t * pDst,
2897   uint32_t blockSize);
2898 
2899   /**
2900    * @brief  Adds a constant offset to a Q31 vector.
2901    * @param[in]  *pSrc points to the input vector
2902    * @param[in]  offset is the offset to be added
2903    * @param[out]  *pDst points to the output vector
2904    * @param[in]  blockSize number of samples in the vector
2905    * @return none.
2906    */
2907 
2908   void arm_offset_q31(
2909   q31_t * pSrc,
2910   q31_t offset,
2911   q31_t * pDst,
2912   uint32_t blockSize);
2913 
2914   /**
2915    * @brief  Negates the elements of a floating-point vector.
2916    * @param[in]  *pSrc points to the input vector
2917    * @param[out]  *pDst points to the output vector
2918    * @param[in]  blockSize number of samples in the vector
2919    * @return none.
2920    */
2921 
2922   void arm_negate_f32(
2923   float32_t * pSrc,
2924   float32_t * pDst,
2925   uint32_t blockSize);
2926 
2927   /**
2928    * @brief  Negates the elements of a Q7 vector.
2929    * @param[in]  *pSrc points to the input vector
2930    * @param[out]  *pDst points to the output vector
2931    * @param[in]  blockSize number of samples in the vector
2932    * @return none.
2933    */
2934 
2935   void arm_negate_q7(
2936   q7_t * pSrc,
2937   q7_t * pDst,
2938   uint32_t blockSize);
2939 
2940   /**
2941    * @brief  Negates the elements of a Q15 vector.
2942    * @param[in]  *pSrc points to the input vector
2943    * @param[out]  *pDst points to the output vector
2944    * @param[in]  blockSize number of samples in the vector
2945    * @return none.
2946    */
2947 
2948   void arm_negate_q15(
2949   q15_t * pSrc,
2950   q15_t * pDst,
2951   uint32_t blockSize);
2952 
2953   /**
2954    * @brief  Negates the elements of a Q31 vector.
2955    * @param[in]  *pSrc points to the input vector
2956    * @param[out]  *pDst points to the output vector
2957    * @param[in]  blockSize number of samples in the vector
2958    * @return none.
2959    */
2960 
2961   void arm_negate_q31(
2962   q31_t * pSrc,
2963   q31_t * pDst,
2964   uint32_t blockSize);
2965   /**
2966    * @brief  Copies the elements of a floating-point vector.
2967    * @param[in]  *pSrc input pointer
2968    * @param[out]  *pDst output pointer
2969    * @param[in]  blockSize number of samples to process
2970    * @return none.
2971    */
2972   void arm_copy_f32(
2973   float32_t * pSrc,
2974   float32_t * pDst,
2975   uint32_t blockSize);
2976 
2977   /**
2978    * @brief  Copies the elements of a Q7 vector.
2979    * @param[in]  *pSrc input pointer
2980    * @param[out]  *pDst output pointer
2981    * @param[in]  blockSize number of samples to process
2982    * @return none.
2983    */
2984   void arm_copy_q7(
2985   q7_t * pSrc,
2986   q7_t * pDst,
2987   uint32_t blockSize);
2988 
2989   /**
2990    * @brief  Copies the elements of a Q15 vector.
2991    * @param[in]  *pSrc input pointer
2992    * @param[out]  *pDst output pointer
2993    * @param[in]  blockSize number of samples to process
2994    * @return none.
2995    */
2996   void arm_copy_q15(
2997   q15_t * pSrc,
2998   q15_t * pDst,
2999   uint32_t blockSize);
3000 
3001   /**
3002    * @brief  Copies the elements of a Q31 vector.
3003    * @param[in]  *pSrc input pointer
3004    * @param[out]  *pDst output pointer
3005    * @param[in]  blockSize number of samples to process
3006    * @return none.
3007    */
3008   void arm_copy_q31(
3009   q31_t * pSrc,
3010   q31_t * pDst,
3011   uint32_t blockSize);
3012   /**
3013    * @brief  Fills a constant value into a floating-point vector.
3014    * @param[in]  value input value to be filled
3015    * @param[out]  *pDst output pointer
3016    * @param[in]  blockSize number of samples to process
3017    * @return none.
3018    */
3019   void arm_fill_f32(
3020   float32_t value,
3021   float32_t * pDst,
3022   uint32_t blockSize);
3023 
3024   /**
3025    * @brief  Fills a constant value into a Q7 vector.
3026    * @param[in]  value input value to be filled
3027    * @param[out]  *pDst output pointer
3028    * @param[in]  blockSize number of samples to process
3029    * @return none.
3030    */
3031   void arm_fill_q7(
3032   q7_t value,
3033   q7_t * pDst,
3034   uint32_t blockSize);
3035 
3036   /**
3037    * @brief  Fills a constant value into a Q15 vector.
3038    * @param[in]  value input value to be filled
3039    * @param[out]  *pDst output pointer
3040    * @param[in]  blockSize number of samples to process
3041    * @return none.
3042    */
3043   void arm_fill_q15(
3044   q15_t value,
3045   q15_t * pDst,
3046   uint32_t blockSize);
3047 
3048   /**
3049    * @brief  Fills a constant value into a Q31 vector.
3050    * @param[in]  value input value to be filled
3051    * @param[out]  *pDst output pointer
3052    * @param[in]  blockSize number of samples to process
3053    * @return none.
3054    */
3055   void arm_fill_q31(
3056   q31_t value,
3057   q31_t * pDst,
3058   uint32_t blockSize);
3059 
3060 /**
3061  * @brief Convolution of floating-point sequences.
3062  * @param[in] *pSrcA points to the first input sequence.
3063  * @param[in] srcALen length of the first input sequence.
3064  * @param[in] *pSrcB points to the second input sequence.
3065  * @param[in] srcBLen length of the second input sequence.
3066  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
3067  * @return none.
3068  */
3069 
3070   void arm_conv_f32(
3071   float32_t * pSrcA,
3072   uint32_t srcALen,
3073   float32_t * pSrcB,
3074   uint32_t srcBLen,
3075   float32_t * pDst);
3076 
3077 
3078   /**
3079    * @brief Convolution of Q15 sequences.
3080    * @param[in] *pSrcA points to the first input sequence.
3081    * @param[in] srcALen length of the first input sequence.
3082    * @param[in] *pSrcB points to the second input sequence.
3083    * @param[in] srcBLen length of the second input sequence.
3084    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3085    * @param[in]  *pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3086    * @param[in]  *pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3087    * @return none.
3088    */
3089 
3090 
3091   void arm_conv_opt_q15(
3092   q15_t * pSrcA,
3093   uint32_t srcALen,
3094   q15_t * pSrcB,
3095   uint32_t srcBLen,
3096   q15_t * pDst,
3097   q15_t * pScratch1,
3098   q15_t * pScratch2);
3099 
3100 
3101 /**
3102  * @brief Convolution of Q15 sequences.
3103  * @param[in] *pSrcA points to the first input sequence.
3104  * @param[in] srcALen length of the first input sequence.
3105  * @param[in] *pSrcB points to the second input sequence.
3106  * @param[in] srcBLen length of the second input sequence.
3107  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
3108  * @return none.
3109  */
3110 
3111   void arm_conv_q15(
3112   q15_t * pSrcA,
3113   uint32_t srcALen,
3114   q15_t * pSrcB,
3115   uint32_t srcBLen,
3116   q15_t * pDst);
3117 
3118   /**
3119    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3120    * @param[in] *pSrcA points to the first input sequence.
3121    * @param[in] srcALen length of the first input sequence.
3122    * @param[in] *pSrcB points to the second input sequence.
3123    * @param[in] srcBLen length of the second input sequence.
3124    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3125    * @return none.
3126    */
3127 
3128   void arm_conv_fast_q15(
3129 			  q15_t * pSrcA,
3130 			 uint32_t srcALen,
3131 			  q15_t * pSrcB,
3132 			 uint32_t srcBLen,
3133 			 q15_t * pDst);
3134 
3135   /**
3136    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3137    * @param[in] *pSrcA points to the first input sequence.
3138    * @param[in] srcALen length of the first input sequence.
3139    * @param[in] *pSrcB points to the second input sequence.
3140    * @param[in] srcBLen length of the second input sequence.
3141    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3142    * @param[in]  *pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3143    * @param[in]  *pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3144    * @return none.
3145    */
3146 
3147   void arm_conv_fast_opt_q15(
3148   q15_t * pSrcA,
3149   uint32_t srcALen,
3150   q15_t * pSrcB,
3151   uint32_t srcBLen,
3152   q15_t * pDst,
3153   q15_t * pScratch1,
3154   q15_t * pScratch2);
3155 
3156 
3157 
3158   /**
3159    * @brief Convolution of Q31 sequences.
3160    * @param[in] *pSrcA points to the first input sequence.
3161    * @param[in] srcALen length of the first input sequence.
3162    * @param[in] *pSrcB points to the second input sequence.
3163    * @param[in] srcBLen length of the second input sequence.
3164    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3165    * @return none.
3166    */
3167 
3168   void arm_conv_q31(
3169   q31_t * pSrcA,
3170   uint32_t srcALen,
3171   q31_t * pSrcB,
3172   uint32_t srcBLen,
3173   q31_t * pDst);
3174 
3175   /**
3176    * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3177    * @param[in] *pSrcA points to the first input sequence.
3178    * @param[in] srcALen length of the first input sequence.
3179    * @param[in] *pSrcB points to the second input sequence.
3180    * @param[in] srcBLen length of the second input sequence.
3181    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3182    * @return none.
3183    */
3184 
3185   void arm_conv_fast_q31(
3186   q31_t * pSrcA,
3187   uint32_t srcALen,
3188   q31_t * pSrcB,
3189   uint32_t srcBLen,
3190   q31_t * pDst);
3191 
3192 
3193     /**
3194    * @brief Convolution of Q7 sequences.
3195    * @param[in] *pSrcA points to the first input sequence.
3196    * @param[in] srcALen length of the first input sequence.
3197    * @param[in] *pSrcB points to the second input sequence.
3198    * @param[in] srcBLen length of the second input sequence.
3199    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3200    * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3201    * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
3202    * @return none.
3203    */
3204 
3205   void arm_conv_opt_q7(
3206   q7_t * pSrcA,
3207   uint32_t srcALen,
3208   q7_t * pSrcB,
3209   uint32_t srcBLen,
3210   q7_t * pDst,
3211   q15_t * pScratch1,
3212   q15_t * pScratch2);
3213 
3214 
3215 
3216   /**
3217    * @brief Convolution of Q7 sequences.
3218    * @param[in] *pSrcA points to the first input sequence.
3219    * @param[in] srcALen length of the first input sequence.
3220    * @param[in] *pSrcB points to the second input sequence.
3221    * @param[in] srcBLen length of the second input sequence.
3222    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3223    * @return none.
3224    */
3225 
3226   void arm_conv_q7(
3227   q7_t * pSrcA,
3228   uint32_t srcALen,
3229   q7_t * pSrcB,
3230   uint32_t srcBLen,
3231   q7_t * pDst);
3232 
3233 
3234   /**
3235    * @brief Partial convolution of floating-point sequences.
3236    * @param[in]       *pSrcA points to the first input sequence.
3237    * @param[in]       srcALen length of the first input sequence.
3238    * @param[in]       *pSrcB points to the second input sequence.
3239    * @param[in]       srcBLen length of the second input sequence.
3240    * @param[out]      *pDst points to the block of output data
3241    * @param[in]       firstIndex is the first output sample to start with.
3242    * @param[in]       numPoints is the number of output points to be computed.
3243    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3244    */
3245 
3246   arm_status arm_conv_partial_f32(
3247   float32_t * pSrcA,
3248   uint32_t srcALen,
3249   float32_t * pSrcB,
3250   uint32_t srcBLen,
3251   float32_t * pDst,
3252   uint32_t firstIndex,
3253   uint32_t numPoints);
3254 
3255     /**
3256    * @brief Partial convolution of Q15 sequences.
3257    * @param[in]       *pSrcA points to the first input sequence.
3258    * @param[in]       srcALen length of the first input sequence.
3259    * @param[in]       *pSrcB points to the second input sequence.
3260    * @param[in]       srcBLen length of the second input sequence.
3261    * @param[out]      *pDst points to the block of output data
3262    * @param[in]       firstIndex is the first output sample to start with.
3263    * @param[in]       numPoints is the number of output points to be computed.
3264    * @param[in]       * pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3265    * @param[in]       * pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3266    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3267    */
3268 
3269   arm_status arm_conv_partial_opt_q15(
3270   q15_t * pSrcA,
3271   uint32_t srcALen,
3272   q15_t * pSrcB,
3273   uint32_t srcBLen,
3274   q15_t * pDst,
3275   uint32_t firstIndex,
3276   uint32_t numPoints,
3277   q15_t * pScratch1,
3278   q15_t * pScratch2);
3279 
3280 
3281 /**
3282    * @brief Partial convolution of Q15 sequences.
3283    * @param[in]       *pSrcA points to the first input sequence.
3284    * @param[in]       srcALen length of the first input sequence.
3285    * @param[in]       *pSrcB points to the second input sequence.
3286    * @param[in]       srcBLen length of the second input sequence.
3287    * @param[out]      *pDst points to the block of output data
3288    * @param[in]       firstIndex is the first output sample to start with.
3289    * @param[in]       numPoints is the number of output points to be computed.
3290    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3291    */
3292 
3293   arm_status arm_conv_partial_q15(
3294   q15_t * pSrcA,
3295   uint32_t srcALen,
3296   q15_t * pSrcB,
3297   uint32_t srcBLen,
3298   q15_t * pDst,
3299   uint32_t firstIndex,
3300   uint32_t numPoints);
3301 
3302   /**
3303    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3304    * @param[in]       *pSrcA points to the first input sequence.
3305    * @param[in]       srcALen length of the first input sequence.
3306    * @param[in]       *pSrcB points to the second input sequence.
3307    * @param[in]       srcBLen length of the second input sequence.
3308    * @param[out]      *pDst points to the block of output data
3309    * @param[in]       firstIndex is the first output sample to start with.
3310    * @param[in]       numPoints is the number of output points to be computed.
3311    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3312    */
3313 
3314   arm_status arm_conv_partial_fast_q15(
3315 				        q15_t * pSrcA,
3316 				       uint32_t srcALen,
3317 				        q15_t * pSrcB,
3318 				       uint32_t srcBLen,
3319 				       q15_t * pDst,
3320 				       uint32_t firstIndex,
3321 				       uint32_t numPoints);
3322 
3323 
3324   /**
3325    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3326    * @param[in]       *pSrcA points to the first input sequence.
3327    * @param[in]       srcALen length of the first input sequence.
3328    * @param[in]       *pSrcB points to the second input sequence.
3329    * @param[in]       srcBLen length of the second input sequence.
3330    * @param[out]      *pDst points to the block of output data
3331    * @param[in]       firstIndex is the first output sample to start with.
3332    * @param[in]       numPoints is the number of output points to be computed.
3333    * @param[in]       * pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3334    * @param[in]       * pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3335    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3336    */
3337 
3338   arm_status arm_conv_partial_fast_opt_q15(
3339   q15_t * pSrcA,
3340   uint32_t srcALen,
3341   q15_t * pSrcB,
3342   uint32_t srcBLen,
3343   q15_t * pDst,
3344   uint32_t firstIndex,
3345   uint32_t numPoints,
3346   q15_t * pScratch1,
3347   q15_t * pScratch2);
3348 
3349 
3350   /**
3351    * @brief Partial convolution of Q31 sequences.
3352    * @param[in]       *pSrcA points to the first input sequence.
3353    * @param[in]       srcALen length of the first input sequence.
3354    * @param[in]       *pSrcB points to the second input sequence.
3355    * @param[in]       srcBLen length of the second input sequence.
3356    * @param[out]      *pDst points to the block of output data
3357    * @param[in]       firstIndex is the first output sample to start with.
3358    * @param[in]       numPoints is the number of output points to be computed.
3359    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3360    */
3361 
3362   arm_status arm_conv_partial_q31(
3363   q31_t * pSrcA,
3364   uint32_t srcALen,
3365   q31_t * pSrcB,
3366   uint32_t srcBLen,
3367   q31_t * pDst,
3368   uint32_t firstIndex,
3369   uint32_t numPoints);
3370 
3371 
3372   /**
3373    * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3374    * @param[in]       *pSrcA points to the first input sequence.
3375    * @param[in]       srcALen length of the first input sequence.
3376    * @param[in]       *pSrcB points to the second input sequence.
3377    * @param[in]       srcBLen length of the second input sequence.
3378    * @param[out]      *pDst points to the block of output data
3379    * @param[in]       firstIndex is the first output sample to start with.
3380    * @param[in]       numPoints is the number of output points to be computed.
3381    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3382    */
3383 
3384   arm_status arm_conv_partial_fast_q31(
3385   q31_t * pSrcA,
3386   uint32_t srcALen,
3387   q31_t * pSrcB,
3388   uint32_t srcBLen,
3389   q31_t * pDst,
3390   uint32_t firstIndex,
3391   uint32_t numPoints);
3392 
3393 
3394   /**
3395    * @brief Partial convolution of Q7 sequences
3396    * @param[in]       *pSrcA points to the first input sequence.
3397    * @param[in]       srcALen length of the first input sequence.
3398    * @param[in]       *pSrcB points to the second input sequence.
3399    * @param[in]       srcBLen length of the second input sequence.
3400    * @param[out]      *pDst points to the block of output data
3401    * @param[in]       firstIndex is the first output sample to start with.
3402    * @param[in]       numPoints is the number of output points to be computed.
3403    * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3404    * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
3405    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3406    */
3407 
3408   arm_status arm_conv_partial_opt_q7(
3409   q7_t * pSrcA,
3410   uint32_t srcALen,
3411   q7_t * pSrcB,
3412   uint32_t srcBLen,
3413   q7_t * pDst,
3414   uint32_t firstIndex,
3415   uint32_t numPoints,
3416   q15_t * pScratch1,
3417   q15_t * pScratch2);
3418 
3419 
3420 /**
3421    * @brief Partial convolution of Q7 sequences.
3422    * @param[in]       *pSrcA points to the first input sequence.
3423    * @param[in]       srcALen length of the first input sequence.
3424    * @param[in]       *pSrcB points to the second input sequence.
3425    * @param[in]       srcBLen length of the second input sequence.
3426    * @param[out]      *pDst points to the block of output data
3427    * @param[in]       firstIndex is the first output sample to start with.
3428    * @param[in]       numPoints is the number of output points to be computed.
3429    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3430    */
3431 
3432   arm_status arm_conv_partial_q7(
3433   q7_t * pSrcA,
3434   uint32_t srcALen,
3435   q7_t * pSrcB,
3436   uint32_t srcBLen,
3437   q7_t * pDst,
3438   uint32_t firstIndex,
3439   uint32_t numPoints);
3440 
3441 
3442 
3443   /**
3444    * @brief Instance structure for the Q15 FIR decimator.
3445    */
3446 
3447   typedef struct
3448   {
3449     uint8_t M;                      /**< decimation factor. */
3450     uint16_t numTaps;               /**< number of coefficients in the filter. */
3451     q15_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
3452     q15_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3453   } arm_fir_decimate_instance_q15;
3454 
3455   /**
3456    * @brief Instance structure for the Q31 FIR decimator.
3457    */
3458 
3459   typedef struct
3460   {
3461     uint8_t M;                  /**< decimation factor. */
3462     uint16_t numTaps;           /**< number of coefficients in the filter. */
3463     q31_t *pCoeffs;              /**< points to the coefficient array. The array is of length numTaps.*/
3464     q31_t *pState;               /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3465 
3466   } arm_fir_decimate_instance_q31;
3467 
3468   /**
3469    * @brief Instance structure for the floating-point FIR decimator.
3470    */
3471 
3472   typedef struct
3473   {
3474     uint8_t M;                          /**< decimation factor. */
3475     uint16_t numTaps;                   /**< number of coefficients in the filter. */
3476     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
3477     float32_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3478 
3479   } arm_fir_decimate_instance_f32;
3480 
3481 
3482 
3483   /**
3484    * @brief Processing function for the floating-point FIR decimator.
3485    * @param[in] *S points to an instance of the floating-point FIR decimator structure.
3486    * @param[in] *pSrc points to the block of input data.
3487    * @param[out] *pDst points to the block of output data
3488    * @param[in] blockSize number of input samples to process per call.
3489    * @return none
3490    */
3491 
3492   void arm_fir_decimate_f32(
3493   const arm_fir_decimate_instance_f32 * S,
3494   float32_t * pSrc,
3495   float32_t * pDst,
3496   uint32_t blockSize);
3497 
3498 
3499   /**
3500    * @brief  Initialization function for the floating-point FIR decimator.
3501    * @param[in,out] *S points to an instance of the floating-point FIR decimator structure.
3502    * @param[in] numTaps  number of coefficients in the filter.
3503    * @param[in] M  decimation factor.
3504    * @param[in] *pCoeffs points to the filter coefficients.
3505    * @param[in] *pState points to the state buffer.
3506    * @param[in] blockSize number of input samples to process per call.
3507    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3508    * <code>blockSize</code> is not a multiple of <code>M</code>.
3509    */
3510 
3511   arm_status arm_fir_decimate_init_f32(
3512   arm_fir_decimate_instance_f32 * S,
3513   uint16_t numTaps,
3514   uint8_t M,
3515   float32_t * pCoeffs,
3516   float32_t * pState,
3517   uint32_t blockSize);
3518 
3519   /**
3520    * @brief Processing function for the Q15 FIR decimator.
3521    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
3522    * @param[in] *pSrc points to the block of input data.
3523    * @param[out] *pDst points to the block of output data
3524    * @param[in] blockSize number of input samples to process per call.
3525    * @return none
3526    */
3527 
3528   void arm_fir_decimate_q15(
3529   const arm_fir_decimate_instance_q15 * S,
3530   q15_t * pSrc,
3531   q15_t * pDst,
3532   uint32_t blockSize);
3533 
3534   /**
3535    * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3536    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
3537    * @param[in] *pSrc points to the block of input data.
3538    * @param[out] *pDst points to the block of output data
3539    * @param[in] blockSize number of input samples to process per call.
3540    * @return none
3541    */
3542 
3543   void arm_fir_decimate_fast_q15(
3544   const arm_fir_decimate_instance_q15 * S,
3545   q15_t * pSrc,
3546   q15_t * pDst,
3547   uint32_t blockSize);
3548 
3549 
3550 
3551   /**
3552    * @brief  Initialization function for the Q15 FIR decimator.
3553    * @param[in,out] *S points to an instance of the Q15 FIR decimator structure.
3554    * @param[in] numTaps  number of coefficients in the filter.
3555    * @param[in] M  decimation factor.
3556    * @param[in] *pCoeffs points to the filter coefficients.
3557    * @param[in] *pState points to the state buffer.
3558    * @param[in] blockSize number of input samples to process per call.
3559    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3560    * <code>blockSize</code> is not a multiple of <code>M</code>.
3561    */
3562 
3563   arm_status arm_fir_decimate_init_q15(
3564   arm_fir_decimate_instance_q15 * S,
3565   uint16_t numTaps,
3566   uint8_t M,
3567   q15_t * pCoeffs,
3568   q15_t * pState,
3569   uint32_t blockSize);
3570 
3571   /**
3572    * @brief Processing function for the Q31 FIR decimator.
3573    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
3574    * @param[in] *pSrc points to the block of input data.
3575    * @param[out] *pDst points to the block of output data
3576    * @param[in] blockSize number of input samples to process per call.
3577    * @return none
3578    */
3579 
3580   void arm_fir_decimate_q31(
3581   const arm_fir_decimate_instance_q31 * S,
3582   q31_t * pSrc,
3583   q31_t * pDst,
3584   uint32_t blockSize);
3585 
3586   /**
3587    * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3588    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
3589    * @param[in] *pSrc points to the block of input data.
3590    * @param[out] *pDst points to the block of output data
3591    * @param[in] blockSize number of input samples to process per call.
3592    * @return none
3593    */
3594 
3595   void arm_fir_decimate_fast_q31(
3596   arm_fir_decimate_instance_q31 * S,
3597   q31_t * pSrc,
3598   q31_t * pDst,
3599   uint32_t blockSize);
3600 
3601 
3602   /**
3603    * @brief  Initialization function for the Q31 FIR decimator.
3604    * @param[in,out] *S points to an instance of the Q31 FIR decimator structure.
3605    * @param[in] numTaps  number of coefficients in the filter.
3606    * @param[in] M  decimation factor.
3607    * @param[in] *pCoeffs points to the filter coefficients.
3608    * @param[in] *pState points to the state buffer.
3609    * @param[in] blockSize number of input samples to process per call.
3610    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3611    * <code>blockSize</code> is not a multiple of <code>M</code>.
3612    */
3613 
3614   arm_status arm_fir_decimate_init_q31(
3615   arm_fir_decimate_instance_q31 * S,
3616   uint16_t numTaps,
3617   uint8_t M,
3618   q31_t * pCoeffs,
3619   q31_t * pState,
3620   uint32_t blockSize);
3621 
3622 
3623 
3624   /**
3625    * @brief Instance structure for the Q15 FIR interpolator.
3626    */
3627 
3628   typedef struct
3629   {
3630     uint8_t L;                      /**< upsample factor. */
3631     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3632     q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
3633     q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3634   } arm_fir_interpolate_instance_q15;
3635 
3636   /**
3637    * @brief Instance structure for the Q31 FIR interpolator.
3638    */
3639 
3640   typedef struct
3641   {
3642     uint8_t L;                      /**< upsample factor. */
3643     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3644     q31_t *pCoeffs;                  /**< points to the coefficient array. The array is of length L*phaseLength. */
3645     q31_t *pState;                   /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3646   } arm_fir_interpolate_instance_q31;
3647 
3648   /**
3649    * @brief Instance structure for the floating-point FIR interpolator.
3650    */
3651 
3652   typedef struct
3653   {
3654     uint8_t L;                     /**< upsample factor. */
3655     uint16_t phaseLength;          /**< length of each polyphase filter component. */
3656     float32_t *pCoeffs;             /**< points to the coefficient array. The array is of length L*phaseLength. */
3657     float32_t *pState;              /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
3658   } arm_fir_interpolate_instance_f32;
3659 
3660 
3661   /**
3662    * @brief Processing function for the Q15 FIR interpolator.
3663    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
3664    * @param[in] *pSrc     points to the block of input data.
3665    * @param[out] *pDst    points to the block of output data.
3666    * @param[in] blockSize number of input samples to process per call.
3667    * @return none.
3668    */
3669 
3670   void arm_fir_interpolate_q15(
3671   const arm_fir_interpolate_instance_q15 * S,
3672   q15_t * pSrc,
3673   q15_t * pDst,
3674   uint32_t blockSize);
3675 
3676 
3677   /**
3678    * @brief  Initialization function for the Q15 FIR interpolator.
3679    * @param[in,out] *S        points to an instance of the Q15 FIR interpolator structure.
3680    * @param[in]     L         upsample factor.
3681    * @param[in]     numTaps   number of filter coefficients in the filter.
3682    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3683    * @param[in]     *pState   points to the state buffer.
3684    * @param[in]     blockSize number of input samples to process per call.
3685    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3686    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3687    */
3688 
3689   arm_status arm_fir_interpolate_init_q15(
3690   arm_fir_interpolate_instance_q15 * S,
3691   uint8_t L,
3692   uint16_t numTaps,
3693   q15_t * pCoeffs,
3694   q15_t * pState,
3695   uint32_t blockSize);
3696 
3697   /**
3698    * @brief Processing function for the Q31 FIR interpolator.
3699    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
3700    * @param[in] *pSrc     points to the block of input data.
3701    * @param[out] *pDst    points to the block of output data.
3702    * @param[in] blockSize number of input samples to process per call.
3703    * @return none.
3704    */
3705 
3706   void arm_fir_interpolate_q31(
3707   const arm_fir_interpolate_instance_q31 * S,
3708   q31_t * pSrc,
3709   q31_t * pDst,
3710   uint32_t blockSize);
3711 
3712   /**
3713    * @brief  Initialization function for the Q31 FIR interpolator.
3714    * @param[in,out] *S        points to an instance of the Q31 FIR interpolator structure.
3715    * @param[in]     L         upsample factor.
3716    * @param[in]     numTaps   number of filter coefficients in the filter.
3717    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3718    * @param[in]     *pState   points to the state buffer.
3719    * @param[in]     blockSize number of input samples to process per call.
3720    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3721    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3722    */
3723 
3724   arm_status arm_fir_interpolate_init_q31(
3725   arm_fir_interpolate_instance_q31 * S,
3726   uint8_t L,
3727   uint16_t numTaps,
3728   q31_t * pCoeffs,
3729   q31_t * pState,
3730   uint32_t blockSize);
3731 
3732 
3733   /**
3734    * @brief Processing function for the floating-point FIR interpolator.
3735    * @param[in] *S        points to an instance of the floating-point FIR interpolator structure.
3736    * @param[in] *pSrc     points to the block of input data.
3737    * @param[out] *pDst    points to the block of output data.
3738    * @param[in] blockSize number of input samples to process per call.
3739    * @return none.
3740    */
3741 
3742   void arm_fir_interpolate_f32(
3743   const arm_fir_interpolate_instance_f32 * S,
3744   float32_t * pSrc,
3745   float32_t * pDst,
3746   uint32_t blockSize);
3747 
3748   /**
3749    * @brief  Initialization function for the floating-point FIR interpolator.
3750    * @param[in,out] *S        points to an instance of the floating-point FIR interpolator structure.
3751    * @param[in]     L         upsample factor.
3752    * @param[in]     numTaps   number of filter coefficients in the filter.
3753    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3754    * @param[in]     *pState   points to the state buffer.
3755    * @param[in]     blockSize number of input samples to process per call.
3756    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3757    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3758    */
3759 
3760   arm_status arm_fir_interpolate_init_f32(
3761   arm_fir_interpolate_instance_f32 * S,
3762   uint8_t L,
3763   uint16_t numTaps,
3764   float32_t * pCoeffs,
3765   float32_t * pState,
3766   uint32_t blockSize);
3767 
3768   /**
3769    * @brief Instance structure for the high precision Q31 Biquad cascade filter.
3770    */
3771 
3772   typedef struct
3773   {
3774     uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3775     q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
3776     q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
3777     uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
3778 
3779   } arm_biquad_cas_df1_32x64_ins_q31;
3780 
3781 
3782   /**
3783    * @param[in]  *S        points to an instance of the high precision Q31 Biquad cascade filter structure.
3784    * @param[in]  *pSrc     points to the block of input data.
3785    * @param[out] *pDst     points to the block of output data
3786    * @param[in]  blockSize number of samples to process.
3787    * @return none.
3788    */
3789 
3790   void arm_biquad_cas_df1_32x64_q31(
3791   const arm_biquad_cas_df1_32x64_ins_q31 * S,
3792   q31_t * pSrc,
3793   q31_t * pDst,
3794   uint32_t blockSize);
3795 
3796 
3797   /**
3798    * @param[in,out] *S           points to an instance of the high precision Q31 Biquad cascade filter structure.
3799    * @param[in]     numStages    number of 2nd order stages in the filter.
3800    * @param[in]     *pCoeffs     points to the filter coefficients.
3801    * @param[in]     *pState      points to the state buffer.
3802    * @param[in]     postShift    shift to be applied to the output. Varies according to the coefficients format
3803    * @return        none
3804    */
3805 
3806   void arm_biquad_cas_df1_32x64_init_q31(
3807   arm_biquad_cas_df1_32x64_ins_q31 * S,
3808   uint8_t numStages,
3809   q31_t * pCoeffs,
3810   q63_t * pState,
3811   uint8_t postShift);
3812 
3813 
3814 
3815   /**
3816    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3817    */
3818 
3819   typedef struct
3820   {
3821     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3822     float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
3823     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3824   } arm_biquad_cascade_df2T_instance_f32;
3825 
3826 
3827 
3828   /**
3829    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3830    */
3831 
3832   typedef struct
3833   {
3834     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3835     float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 4*numStages. */
3836     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3837   } arm_biquad_cascade_stereo_df2T_instance_f32;
3838 
3839 
3840 
3841   /**
3842    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3843    */
3844 
3845   typedef struct
3846   {
3847     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3848     float64_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
3849     float64_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3850   } arm_biquad_cascade_df2T_instance_f64;
3851 
3852 
3853   /**
3854    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
3855    * @param[in]  *S        points to an instance of the filter data structure.
3856    * @param[in]  *pSrc     points to the block of input data.
3857    * @param[out] *pDst     points to the block of output data
3858    * @param[in]  blockSize number of samples to process.
3859    * @return none.
3860    */
3861 
3862   void arm_biquad_cascade_df2T_f32(
3863   const arm_biquad_cascade_df2T_instance_f32 * S,
3864   float32_t * pSrc,
3865   float32_t * pDst,
3866   uint32_t blockSize);
3867 
3868 
3869   /**
3870    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 2 channels
3871    * @param[in]  *S        points to an instance of the filter data structure.
3872    * @param[in]  *pSrc     points to the block of input data.
3873    * @param[out] *pDst     points to the block of output data
3874    * @param[in]  blockSize number of samples to process.
3875    * @return none.
3876    */
3877 
3878   void arm_biquad_cascade_stereo_df2T_f32(
3879   const arm_biquad_cascade_stereo_df2T_instance_f32 * S,
3880   float32_t * pSrc,
3881   float32_t * pDst,
3882   uint32_t blockSize);
3883 
3884   /**
3885    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
3886    * @param[in]  *S        points to an instance of the filter data structure.
3887    * @param[in]  *pSrc     points to the block of input data.
3888    * @param[out] *pDst     points to the block of output data
3889    * @param[in]  blockSize number of samples to process.
3890    * @return none.
3891    */
3892 
3893   void arm_biquad_cascade_df2T_f64(
3894   const arm_biquad_cascade_df2T_instance_f64 * S,
3895   float64_t * pSrc,
3896   float64_t * pDst,
3897   uint32_t blockSize);
3898 
3899 
3900   /**
3901    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3902    * @param[in,out] *S           points to an instance of the filter data structure.
3903    * @param[in]     numStages    number of 2nd order stages in the filter.
3904    * @param[in]     *pCoeffs     points to the filter coefficients.
3905    * @param[in]     *pState      points to the state buffer.
3906    * @return        none
3907    */
3908 
3909   void arm_biquad_cascade_df2T_init_f32(
3910   arm_biquad_cascade_df2T_instance_f32 * S,
3911   uint8_t numStages,
3912   float32_t * pCoeffs,
3913   float32_t * pState);
3914 
3915 
3916   /**
3917    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3918    * @param[in,out] *S           points to an instance of the filter data structure.
3919    * @param[in]     numStages    number of 2nd order stages in the filter.
3920    * @param[in]     *pCoeffs     points to the filter coefficients.
3921    * @param[in]     *pState      points to the state buffer.
3922    * @return        none
3923    */
3924 
3925   void arm_biquad_cascade_stereo_df2T_init_f32(
3926   arm_biquad_cascade_stereo_df2T_instance_f32 * S,
3927   uint8_t numStages,
3928   float32_t * pCoeffs,
3929   float32_t * pState);
3930 
3931 
3932   /**
3933    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3934    * @param[in,out] *S           points to an instance of the filter data structure.
3935    * @param[in]     numStages    number of 2nd order stages in the filter.
3936    * @param[in]     *pCoeffs     points to the filter coefficients.
3937    * @param[in]     *pState      points to the state buffer.
3938    * @return        none
3939    */
3940 
3941   void arm_biquad_cascade_df2T_init_f64(
3942   arm_biquad_cascade_df2T_instance_f64 * S,
3943   uint8_t numStages,
3944   float64_t * pCoeffs,
3945   float64_t * pState);
3946 
3947 
3948 
3949   /**
3950    * @brief Instance structure for the Q15 FIR lattice filter.
3951    */
3952 
3953   typedef struct
3954   {
3955     uint16_t numStages;                          /**< number of filter stages. */
3956     q15_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
3957     q15_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
3958   } arm_fir_lattice_instance_q15;
3959 
3960   /**
3961    * @brief Instance structure for the Q31 FIR lattice filter.
3962    */
3963 
3964   typedef struct
3965   {
3966     uint16_t numStages;                          /**< number of filter stages. */
3967     q31_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
3968     q31_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
3969   } arm_fir_lattice_instance_q31;
3970 
3971   /**
3972    * @brief Instance structure for the floating-point FIR lattice filter.
3973    */
3974 
3975   typedef struct
3976   {
3977     uint16_t numStages;                  /**< number of filter stages. */
3978     float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
3979     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
3980   } arm_fir_lattice_instance_f32;
3981 
3982   /**
3983    * @brief Initialization function for the Q15 FIR lattice filter.
3984    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
3985    * @param[in] numStages  number of filter stages.
3986    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3987    * @param[in] *pState points to the state buffer.  The array is of length numStages.
3988    * @return none.
3989    */
3990 
3991   void arm_fir_lattice_init_q15(
3992   arm_fir_lattice_instance_q15 * S,
3993   uint16_t numStages,
3994   q15_t * pCoeffs,
3995   q15_t * pState);
3996 
3997 
3998   /**
3999    * @brief Processing function for the Q15 FIR lattice filter.
4000    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
4001    * @param[in] *pSrc points to the block of input data.
4002    * @param[out] *pDst points to the block of output data.
4003    * @param[in] blockSize number of samples to process.
4004    * @return none.
4005    */
4006   void arm_fir_lattice_q15(
4007   const arm_fir_lattice_instance_q15 * S,
4008   q15_t * pSrc,
4009   q15_t * pDst,
4010   uint32_t blockSize);
4011 
4012   /**
4013    * @brief Initialization function for the Q31 FIR lattice filter.
4014    * @param[in] *S points to an instance of the Q31 FIR lattice structure.
4015    * @param[in] numStages  number of filter stages.
4016    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
4017    * @param[in] *pState points to the state buffer.   The array is of length numStages.
4018    * @return none.
4019    */
4020 
4021   void arm_fir_lattice_init_q31(
4022   arm_fir_lattice_instance_q31 * S,
4023   uint16_t numStages,
4024   q31_t * pCoeffs,
4025   q31_t * pState);
4026 
4027 
4028   /**
4029    * @brief Processing function for the Q31 FIR lattice filter.
4030    * @param[in]  *S        points to an instance of the Q31 FIR lattice structure.
4031    * @param[in]  *pSrc     points to the block of input data.
4032    * @param[out] *pDst     points to the block of output data
4033    * @param[in]  blockSize number of samples to process.
4034    * @return none.
4035    */
4036 
4037   void arm_fir_lattice_q31(
4038   const arm_fir_lattice_instance_q31 * S,
4039   q31_t * pSrc,
4040   q31_t * pDst,
4041   uint32_t blockSize);
4042 
4043 /**
4044  * @brief Initialization function for the floating-point FIR lattice filter.
4045  * @param[in] *S points to an instance of the floating-point FIR lattice structure.
4046  * @param[in] numStages  number of filter stages.
4047  * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
4048  * @param[in] *pState points to the state buffer.  The array is of length numStages.
4049  * @return none.
4050  */
4051 
4052   void arm_fir_lattice_init_f32(
4053   arm_fir_lattice_instance_f32 * S,
4054   uint16_t numStages,
4055   float32_t * pCoeffs,
4056   float32_t * pState);
4057 
4058   /**
4059    * @brief Processing function for the floating-point FIR lattice filter.
4060    * @param[in]  *S        points to an instance of the floating-point FIR lattice structure.
4061    * @param[in]  *pSrc     points to the block of input data.
4062    * @param[out] *pDst     points to the block of output data
4063    * @param[in]  blockSize number of samples to process.
4064    * @return none.
4065    */
4066 
4067   void arm_fir_lattice_f32(
4068   const arm_fir_lattice_instance_f32 * S,
4069   float32_t * pSrc,
4070   float32_t * pDst,
4071   uint32_t blockSize);
4072 
4073   /**
4074    * @brief Instance structure for the Q15 IIR lattice filter.
4075    */
4076   typedef struct
4077   {
4078     uint16_t numStages;                         /**< number of stages in the filter. */
4079     q15_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
4080     q15_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
4081     q15_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
4082   } arm_iir_lattice_instance_q15;
4083 
4084   /**
4085    * @brief Instance structure for the Q31 IIR lattice filter.
4086    */
4087   typedef struct
4088   {
4089     uint16_t numStages;                         /**< number of stages in the filter. */
4090     q31_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
4091     q31_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
4092     q31_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
4093   } arm_iir_lattice_instance_q31;
4094 
4095   /**
4096    * @brief Instance structure for the floating-point IIR lattice filter.
4097    */
4098   typedef struct
4099   {
4100     uint16_t numStages;                         /**< number of stages in the filter. */
4101     float32_t *pState;                          /**< points to the state variable array. The array is of length numStages+blockSize. */
4102     float32_t *pkCoeffs;                        /**< points to the reflection coefficient array. The array is of length numStages. */
4103     float32_t *pvCoeffs;                        /**< points to the ladder coefficient array. The array is of length numStages+1. */
4104   } arm_iir_lattice_instance_f32;
4105 
4106   /**
4107    * @brief Processing function for the floating-point IIR lattice filter.
4108    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
4109    * @param[in] *pSrc points to the block of input data.
4110    * @param[out] *pDst points to the block of output data.
4111    * @param[in] blockSize number of samples to process.
4112    * @return none.
4113    */
4114 
4115   void arm_iir_lattice_f32(
4116   const arm_iir_lattice_instance_f32 * S,
4117   float32_t * pSrc,
4118   float32_t * pDst,
4119   uint32_t blockSize);
4120 
4121   /**
4122    * @brief Initialization function for the floating-point IIR lattice filter.
4123    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
4124    * @param[in] numStages number of stages in the filter.
4125    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
4126    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
4127    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize-1.
4128    * @param[in] blockSize number of samples to process.
4129    * @return none.
4130    */
4131 
4132   void arm_iir_lattice_init_f32(
4133   arm_iir_lattice_instance_f32 * S,
4134   uint16_t numStages,
4135   float32_t * pkCoeffs,
4136   float32_t * pvCoeffs,
4137   float32_t * pState,
4138   uint32_t blockSize);
4139 
4140 
4141   /**
4142    * @brief Processing function for the Q31 IIR lattice filter.
4143    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
4144    * @param[in] *pSrc points to the block of input data.
4145    * @param[out] *pDst points to the block of output data.
4146    * @param[in] blockSize number of samples to process.
4147    * @return none.
4148    */
4149 
4150   void arm_iir_lattice_q31(
4151   const arm_iir_lattice_instance_q31 * S,
4152   q31_t * pSrc,
4153   q31_t * pDst,
4154   uint32_t blockSize);
4155 
4156 
4157   /**
4158    * @brief Initialization function for the Q31 IIR lattice filter.
4159    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
4160    * @param[in] numStages number of stages in the filter.
4161    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
4162    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
4163    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize.
4164    * @param[in] blockSize number of samples to process.
4165    * @return none.
4166    */
4167 
4168   void arm_iir_lattice_init_q31(
4169   arm_iir_lattice_instance_q31 * S,
4170   uint16_t numStages,
4171   q31_t * pkCoeffs,
4172   q31_t * pvCoeffs,
4173   q31_t * pState,
4174   uint32_t blockSize);
4175 
4176 
4177   /**
4178    * @brief Processing function for the Q15 IIR lattice filter.
4179    * @param[in] *S points to an instance of the Q15 IIR lattice structure.
4180    * @param[in] *pSrc points to the block of input data.
4181    * @param[out] *pDst points to the block of output data.
4182    * @param[in] blockSize number of samples to process.
4183    * @return none.
4184    */
4185 
4186   void arm_iir_lattice_q15(
4187   const arm_iir_lattice_instance_q15 * S,
4188   q15_t * pSrc,
4189   q15_t * pDst,
4190   uint32_t blockSize);
4191 
4192 
4193 /**
4194  * @brief Initialization function for the Q15 IIR lattice filter.
4195  * @param[in] *S points to an instance of the fixed-point Q15 IIR lattice structure.
4196  * @param[in] numStages  number of stages in the filter.
4197  * @param[in] *pkCoeffs points to reflection coefficient buffer.  The array is of length numStages.
4198  * @param[in] *pvCoeffs points to ladder coefficient buffer.  The array is of length numStages+1.
4199  * @param[in] *pState points to state buffer.  The array is of length numStages+blockSize.
4200  * @param[in] blockSize number of samples to process per call.
4201  * @return none.
4202  */
4203 
4204   void arm_iir_lattice_init_q15(
4205   arm_iir_lattice_instance_q15 * S,
4206   uint16_t numStages,
4207   q15_t * pkCoeffs,
4208   q15_t * pvCoeffs,
4209   q15_t * pState,
4210   uint32_t blockSize);
4211 
4212   /**
4213    * @brief Instance structure for the floating-point LMS filter.
4214    */
4215 
4216   typedef struct
4217   {
4218     uint16_t numTaps;    /**< number of coefficients in the filter. */
4219     float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4220     float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
4221     float32_t mu;        /**< step size that controls filter coefficient updates. */
4222   } arm_lms_instance_f32;
4223 
4224   /**
4225    * @brief Processing function for floating-point LMS filter.
4226    * @param[in]  *S points to an instance of the floating-point LMS filter structure.
4227    * @param[in]  *pSrc points to the block of input data.
4228    * @param[in]  *pRef points to the block of reference data.
4229    * @param[out] *pOut points to the block of output data.
4230    * @param[out] *pErr points to the block of error data.
4231    * @param[in]  blockSize number of samples to process.
4232    * @return     none.
4233    */
4234 
4235   void arm_lms_f32(
4236   const arm_lms_instance_f32 * S,
4237   float32_t * pSrc,
4238   float32_t * pRef,
4239   float32_t * pOut,
4240   float32_t * pErr,
4241   uint32_t blockSize);
4242 
4243   /**
4244    * @brief Initialization function for floating-point LMS filter.
4245    * @param[in] *S points to an instance of the floating-point LMS filter structure.
4246    * @param[in] numTaps  number of filter coefficients.
4247    * @param[in] *pCoeffs points to the coefficient buffer.
4248    * @param[in] *pState points to state buffer.
4249    * @param[in] mu step size that controls filter coefficient updates.
4250    * @param[in] blockSize number of samples to process.
4251    * @return none.
4252    */
4253 
4254   void arm_lms_init_f32(
4255   arm_lms_instance_f32 * S,
4256   uint16_t numTaps,
4257   float32_t * pCoeffs,
4258   float32_t * pState,
4259   float32_t mu,
4260   uint32_t blockSize);
4261 
4262   /**
4263    * @brief Instance structure for the Q15 LMS filter.
4264    */
4265 
4266   typedef struct
4267   {
4268     uint16_t numTaps;    /**< number of coefficients in the filter. */
4269     q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4270     q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4271     q15_t mu;            /**< step size that controls filter coefficient updates. */
4272     uint32_t postShift;  /**< bit shift applied to coefficients. */
4273   } arm_lms_instance_q15;
4274 
4275 
4276   /**
4277    * @brief Initialization function for the Q15 LMS filter.
4278    * @param[in] *S points to an instance of the Q15 LMS filter structure.
4279    * @param[in] numTaps  number of filter coefficients.
4280    * @param[in] *pCoeffs points to the coefficient buffer.
4281    * @param[in] *pState points to the state buffer.
4282    * @param[in] mu step size that controls filter coefficient updates.
4283    * @param[in] blockSize number of samples to process.
4284    * @param[in] postShift bit shift applied to coefficients.
4285    * @return    none.
4286    */
4287 
4288   void arm_lms_init_q15(
4289   arm_lms_instance_q15 * S,
4290   uint16_t numTaps,
4291   q15_t * pCoeffs,
4292   q15_t * pState,
4293   q15_t mu,
4294   uint32_t blockSize,
4295   uint32_t postShift);
4296 
4297   /**
4298    * @brief Processing function for Q15 LMS filter.
4299    * @param[in] *S points to an instance of the Q15 LMS filter structure.
4300    * @param[in] *pSrc points to the block of input data.
4301    * @param[in] *pRef points to the block of reference data.
4302    * @param[out] *pOut points to the block of output data.
4303    * @param[out] *pErr points to the block of error data.
4304    * @param[in] blockSize number of samples to process.
4305    * @return none.
4306    */
4307 
4308   void arm_lms_q15(
4309   const arm_lms_instance_q15 * S,
4310   q15_t * pSrc,
4311   q15_t * pRef,
4312   q15_t * pOut,
4313   q15_t * pErr,
4314   uint32_t blockSize);
4315 
4316 
4317   /**
4318    * @brief Instance structure for the Q31 LMS filter.
4319    */
4320 
4321   typedef struct
4322   {
4323     uint16_t numTaps;    /**< number of coefficients in the filter. */
4324     q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4325     q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4326     q31_t mu;            /**< step size that controls filter coefficient updates. */
4327     uint32_t postShift;  /**< bit shift applied to coefficients. */
4328 
4329   } arm_lms_instance_q31;
4330 
4331   /**
4332    * @brief Processing function for Q31 LMS filter.
4333    * @param[in]  *S points to an instance of the Q15 LMS filter structure.
4334    * @param[in]  *pSrc points to the block of input data.
4335    * @param[in]  *pRef points to the block of reference data.
4336    * @param[out] *pOut points to the block of output data.
4337    * @param[out] *pErr points to the block of error data.
4338    * @param[in]  blockSize number of samples to process.
4339    * @return     none.
4340    */
4341 
4342   void arm_lms_q31(
4343   const arm_lms_instance_q31 * S,
4344   q31_t * pSrc,
4345   q31_t * pRef,
4346   q31_t * pOut,
4347   q31_t * pErr,
4348   uint32_t blockSize);
4349 
4350   /**
4351    * @brief Initialization function for Q31 LMS filter.
4352    * @param[in] *S points to an instance of the Q31 LMS filter structure.
4353    * @param[in] numTaps  number of filter coefficients.
4354    * @param[in] *pCoeffs points to coefficient buffer.
4355    * @param[in] *pState points to state buffer.
4356    * @param[in] mu step size that controls filter coefficient updates.
4357    * @param[in] blockSize number of samples to process.
4358    * @param[in] postShift bit shift applied to coefficients.
4359    * @return none.
4360    */
4361 
4362   void arm_lms_init_q31(
4363   arm_lms_instance_q31 * S,
4364   uint16_t numTaps,
4365   q31_t * pCoeffs,
4366   q31_t * pState,
4367   q31_t mu,
4368   uint32_t blockSize,
4369   uint32_t postShift);
4370 
4371   /**
4372    * @brief Instance structure for the floating-point normalized LMS filter.
4373    */
4374 
4375   typedef struct
4376   {
4377     uint16_t numTaps;     /**< number of coefficients in the filter. */
4378     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4379     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
4380     float32_t mu;        /**< step size that control filter coefficient updates. */
4381     float32_t energy;    /**< saves previous frame energy. */
4382     float32_t x0;        /**< saves previous input sample. */
4383   } arm_lms_norm_instance_f32;
4384 
4385   /**
4386    * @brief Processing function for floating-point normalized LMS filter.
4387    * @param[in] *S points to an instance of the floating-point normalized LMS filter structure.
4388    * @param[in] *pSrc points to the block of input data.
4389    * @param[in] *pRef points to the block of reference data.
4390    * @param[out] *pOut points to the block of output data.
4391    * @param[out] *pErr points to the block of error data.
4392    * @param[in] blockSize number of samples to process.
4393    * @return none.
4394    */
4395 
4396   void arm_lms_norm_f32(
4397   arm_lms_norm_instance_f32 * S,
4398   float32_t * pSrc,
4399   float32_t * pRef,
4400   float32_t * pOut,
4401   float32_t * pErr,
4402   uint32_t blockSize);
4403 
4404   /**
4405    * @brief Initialization function for floating-point normalized LMS filter.
4406    * @param[in] *S points to an instance of the floating-point LMS filter structure.
4407    * @param[in] numTaps  number of filter coefficients.
4408    * @param[in] *pCoeffs points to coefficient buffer.
4409    * @param[in] *pState points to state buffer.
4410    * @param[in] mu step size that controls filter coefficient updates.
4411    * @param[in] blockSize number of samples to process.
4412    * @return none.
4413    */
4414 
4415   void arm_lms_norm_init_f32(
4416   arm_lms_norm_instance_f32 * S,
4417   uint16_t numTaps,
4418   float32_t * pCoeffs,
4419   float32_t * pState,
4420   float32_t mu,
4421   uint32_t blockSize);
4422 
4423 
4424   /**
4425    * @brief Instance structure for the Q31 normalized LMS filter.
4426    */
4427   typedef struct
4428   {
4429     uint16_t numTaps;     /**< number of coefficients in the filter. */
4430     q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4431     q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4432     q31_t mu;             /**< step size that controls filter coefficient updates. */
4433     uint8_t postShift;    /**< bit shift applied to coefficients. */
4434     q31_t *recipTable;    /**< points to the reciprocal initial value table. */
4435     q31_t energy;         /**< saves previous frame energy. */
4436     q31_t x0;             /**< saves previous input sample. */
4437   } arm_lms_norm_instance_q31;
4438 
4439   /**
4440    * @brief Processing function for Q31 normalized LMS filter.
4441    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
4442    * @param[in] *pSrc points to the block of input data.
4443    * @param[in] *pRef points to the block of reference data.
4444    * @param[out] *pOut points to the block of output data.
4445    * @param[out] *pErr points to the block of error data.
4446    * @param[in] blockSize number of samples to process.
4447    * @return none.
4448    */
4449 
4450   void arm_lms_norm_q31(
4451   arm_lms_norm_instance_q31 * S,
4452   q31_t * pSrc,
4453   q31_t * pRef,
4454   q31_t * pOut,
4455   q31_t * pErr,
4456   uint32_t blockSize);
4457 
4458   /**
4459    * @brief Initialization function for Q31 normalized LMS filter.
4460    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
4461    * @param[in] numTaps  number of filter coefficients.
4462    * @param[in] *pCoeffs points to coefficient buffer.
4463    * @param[in] *pState points to state buffer.
4464    * @param[in] mu step size that controls filter coefficient updates.
4465    * @param[in] blockSize number of samples to process.
4466    * @param[in] postShift bit shift applied to coefficients.
4467    * @return none.
4468    */
4469 
4470   void arm_lms_norm_init_q31(
4471   arm_lms_norm_instance_q31 * S,
4472   uint16_t numTaps,
4473   q31_t * pCoeffs,
4474   q31_t * pState,
4475   q31_t mu,
4476   uint32_t blockSize,
4477   uint8_t postShift);
4478 
4479   /**
4480    * @brief Instance structure for the Q15 normalized LMS filter.
4481    */
4482 
4483   typedef struct
4484   {
4485     uint16_t numTaps;    /**< Number of coefficients in the filter. */
4486     q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4487     q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4488     q15_t mu;            /**< step size that controls filter coefficient updates. */
4489     uint8_t postShift;   /**< bit shift applied to coefficients. */
4490     q15_t *recipTable;   /**< Points to the reciprocal initial value table. */
4491     q15_t energy;        /**< saves previous frame energy. */
4492     q15_t x0;            /**< saves previous input sample. */
4493   } arm_lms_norm_instance_q15;
4494 
4495   /**
4496    * @brief Processing function for Q15 normalized LMS filter.
4497    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
4498    * @param[in] *pSrc points to the block of input data.
4499    * @param[in] *pRef points to the block of reference data.
4500    * @param[out] *pOut points to the block of output data.
4501    * @param[out] *pErr points to the block of error data.
4502    * @param[in] blockSize number of samples to process.
4503    * @return none.
4504    */
4505 
4506   void arm_lms_norm_q15(
4507   arm_lms_norm_instance_q15 * S,
4508   q15_t * pSrc,
4509   q15_t * pRef,
4510   q15_t * pOut,
4511   q15_t * pErr,
4512   uint32_t blockSize);
4513 
4514 
4515   /**
4516    * @brief Initialization function for Q15 normalized LMS filter.
4517    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
4518    * @param[in] numTaps  number of filter coefficients.
4519    * @param[in] *pCoeffs points to coefficient buffer.
4520    * @param[in] *pState points to state buffer.
4521    * @param[in] mu step size that controls filter coefficient updates.
4522    * @param[in] blockSize number of samples to process.
4523    * @param[in] postShift bit shift applied to coefficients.
4524    * @return none.
4525    */
4526 
4527   void arm_lms_norm_init_q15(
4528   arm_lms_norm_instance_q15 * S,
4529   uint16_t numTaps,
4530   q15_t * pCoeffs,
4531   q15_t * pState,
4532   q15_t mu,
4533   uint32_t blockSize,
4534   uint8_t postShift);
4535 
4536   /**
4537    * @brief Correlation of floating-point sequences.
4538    * @param[in] *pSrcA points to the first input sequence.
4539    * @param[in] srcALen length of the first input sequence.
4540    * @param[in] *pSrcB points to the second input sequence.
4541    * @param[in] srcBLen length of the second input sequence.
4542    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4543    * @return none.
4544    */
4545 
4546   void arm_correlate_f32(
4547   float32_t * pSrcA,
4548   uint32_t srcALen,
4549   float32_t * pSrcB,
4550   uint32_t srcBLen,
4551   float32_t * pDst);
4552 
4553 
4554    /**
4555    * @brief Correlation of Q15 sequences
4556    * @param[in] *pSrcA points to the first input sequence.
4557    * @param[in] srcALen length of the first input sequence.
4558    * @param[in] *pSrcB points to the second input sequence.
4559    * @param[in] srcBLen length of the second input sequence.
4560    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4561    * @param[in]  *pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4562    * @return none.
4563    */
4564   void arm_correlate_opt_q15(
4565   q15_t * pSrcA,
4566   uint32_t srcALen,
4567   q15_t * pSrcB,
4568   uint32_t srcBLen,
4569   q15_t * pDst,
4570   q15_t * pScratch);
4571 
4572 
4573   /**
4574    * @brief Correlation of Q15 sequences.
4575    * @param[in] *pSrcA points to the first input sequence.
4576    * @param[in] srcALen length of the first input sequence.
4577    * @param[in] *pSrcB points to the second input sequence.
4578    * @param[in] srcBLen length of the second input sequence.
4579    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4580    * @return none.
4581    */
4582 
4583   void arm_correlate_q15(
4584   q15_t * pSrcA,
4585   uint32_t srcALen,
4586   q15_t * pSrcB,
4587   uint32_t srcBLen,
4588   q15_t * pDst);
4589 
4590   /**
4591    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
4592    * @param[in] *pSrcA points to the first input sequence.
4593    * @param[in] srcALen length of the first input sequence.
4594    * @param[in] *pSrcB points to the second input sequence.
4595    * @param[in] srcBLen length of the second input sequence.
4596    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4597    * @return none.
4598    */
4599 
4600   void arm_correlate_fast_q15(
4601 			       q15_t * pSrcA,
4602 			      uint32_t srcALen,
4603 			       q15_t * pSrcB,
4604 			      uint32_t srcBLen,
4605 			      q15_t * pDst);
4606 
4607 
4608 
4609   /**
4610    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
4611    * @param[in] *pSrcA points to the first input sequence.
4612    * @param[in] srcALen length of the first input sequence.
4613    * @param[in] *pSrcB points to the second input sequence.
4614    * @param[in] srcBLen length of the second input sequence.
4615    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4616    * @param[in]  *pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4617    * @return none.
4618    */
4619 
4620   void arm_correlate_fast_opt_q15(
4621   q15_t * pSrcA,
4622   uint32_t srcALen,
4623   q15_t * pSrcB,
4624   uint32_t srcBLen,
4625   q15_t * pDst,
4626   q15_t * pScratch);
4627 
4628   /**
4629    * @brief Correlation of Q31 sequences.
4630    * @param[in] *pSrcA points to the first input sequence.
4631    * @param[in] srcALen length of the first input sequence.
4632    * @param[in] *pSrcB points to the second input sequence.
4633    * @param[in] srcBLen length of the second input sequence.
4634    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4635    * @return none.
4636    */
4637 
4638   void arm_correlate_q31(
4639   q31_t * pSrcA,
4640   uint32_t srcALen,
4641   q31_t * pSrcB,
4642   uint32_t srcBLen,
4643   q31_t * pDst);
4644 
4645   /**
4646    * @brief Correlation of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
4647    * @param[in] *pSrcA points to the first input sequence.
4648    * @param[in] srcALen length of the first input sequence.
4649    * @param[in] *pSrcB points to the second input sequence.
4650    * @param[in] srcBLen length of the second input sequence.
4651    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4652    * @return none.
4653    */
4654 
4655   void arm_correlate_fast_q31(
4656   q31_t * pSrcA,
4657   uint32_t srcALen,
4658   q31_t * pSrcB,
4659   uint32_t srcBLen,
4660   q31_t * pDst);
4661 
4662 
4663 
4664  /**
4665    * @brief Correlation of Q7 sequences.
4666    * @param[in] *pSrcA points to the first input sequence.
4667    * @param[in] srcALen length of the first input sequence.
4668    * @param[in] *pSrcB points to the second input sequence.
4669    * @param[in] srcBLen length of the second input sequence.
4670    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4671    * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4672    * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
4673    * @return none.
4674    */
4675 
4676   void arm_correlate_opt_q7(
4677   q7_t * pSrcA,
4678   uint32_t srcALen,
4679   q7_t * pSrcB,
4680   uint32_t srcBLen,
4681   q7_t * pDst,
4682   q15_t * pScratch1,
4683   q15_t * pScratch2);
4684 
4685 
4686   /**
4687    * @brief Correlation of Q7 sequences.
4688    * @param[in] *pSrcA points to the first input sequence.
4689    * @param[in] srcALen length of the first input sequence.
4690    * @param[in] *pSrcB points to the second input sequence.
4691    * @param[in] srcBLen length of the second input sequence.
4692    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4693    * @return none.
4694    */
4695 
4696   void arm_correlate_q7(
4697   q7_t * pSrcA,
4698   uint32_t srcALen,
4699   q7_t * pSrcB,
4700   uint32_t srcBLen,
4701   q7_t * pDst);
4702 
4703 
4704   /**
4705    * @brief Instance structure for the floating-point sparse FIR filter.
4706    */
4707   typedef struct
4708   {
4709     uint16_t numTaps;             /**< number of coefficients in the filter. */
4710     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4711     float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4712     float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
4713     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4714     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4715   } arm_fir_sparse_instance_f32;
4716 
4717   /**
4718    * @brief Instance structure for the Q31 sparse FIR filter.
4719    */
4720 
4721   typedef struct
4722   {
4723     uint16_t numTaps;             /**< number of coefficients in the filter. */
4724     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4725     q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4726     q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4727     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4728     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4729   } arm_fir_sparse_instance_q31;
4730 
4731   /**
4732    * @brief Instance structure for the Q15 sparse FIR filter.
4733    */
4734 
4735   typedef struct
4736   {
4737     uint16_t numTaps;             /**< number of coefficients in the filter. */
4738     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4739     q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4740     q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4741     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4742     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4743   } arm_fir_sparse_instance_q15;
4744 
4745   /**
4746    * @brief Instance structure for the Q7 sparse FIR filter.
4747    */
4748 
4749   typedef struct
4750   {
4751     uint16_t numTaps;             /**< number of coefficients in the filter. */
4752     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4753     q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4754     q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
4755     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4756     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4757   } arm_fir_sparse_instance_q7;
4758 
4759   /**
4760    * @brief Processing function for the floating-point sparse FIR filter.
4761    * @param[in]  *S          points to an instance of the floating-point sparse FIR structure.
4762    * @param[in]  *pSrc       points to the block of input data.
4763    * @param[out] *pDst       points to the block of output data
4764    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
4765    * @param[in]  blockSize   number of input samples to process per call.
4766    * @return none.
4767    */
4768 
4769   void arm_fir_sparse_f32(
4770   arm_fir_sparse_instance_f32 * S,
4771   float32_t * pSrc,
4772   float32_t * pDst,
4773   float32_t * pScratchIn,
4774   uint32_t blockSize);
4775 
4776   /**
4777    * @brief  Initialization function for the floating-point sparse FIR filter.
4778    * @param[in,out] *S         points to an instance of the floating-point sparse FIR structure.
4779    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4780    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4781    * @param[in]     *pState    points to the state buffer.
4782    * @param[in]     *pTapDelay points to the array of offset times.
4783    * @param[in]     maxDelay   maximum offset time supported.
4784    * @param[in]     blockSize  number of samples that will be processed per block.
4785    * @return none
4786    */
4787 
4788   void arm_fir_sparse_init_f32(
4789   arm_fir_sparse_instance_f32 * S,
4790   uint16_t numTaps,
4791   float32_t * pCoeffs,
4792   float32_t * pState,
4793   int32_t * pTapDelay,
4794   uint16_t maxDelay,
4795   uint32_t blockSize);
4796 
4797   /**
4798    * @brief Processing function for the Q31 sparse FIR filter.
4799    * @param[in]  *S          points to an instance of the Q31 sparse FIR structure.
4800    * @param[in]  *pSrc       points to the block of input data.
4801    * @param[out] *pDst       points to the block of output data
4802    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
4803    * @param[in]  blockSize   number of input samples to process per call.
4804    * @return none.
4805    */
4806 
4807   void arm_fir_sparse_q31(
4808   arm_fir_sparse_instance_q31 * S,
4809   q31_t * pSrc,
4810   q31_t * pDst,
4811   q31_t * pScratchIn,
4812   uint32_t blockSize);
4813 
4814   /**
4815    * @brief  Initialization function for the Q31 sparse FIR filter.
4816    * @param[in,out] *S         points to an instance of the Q31 sparse FIR structure.
4817    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4818    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4819    * @param[in]     *pState    points to the state buffer.
4820    * @param[in]     *pTapDelay points to the array of offset times.
4821    * @param[in]     maxDelay   maximum offset time supported.
4822    * @param[in]     blockSize  number of samples that will be processed per block.
4823    * @return none
4824    */
4825 
4826   void arm_fir_sparse_init_q31(
4827   arm_fir_sparse_instance_q31 * S,
4828   uint16_t numTaps,
4829   q31_t * pCoeffs,
4830   q31_t * pState,
4831   int32_t * pTapDelay,
4832   uint16_t maxDelay,
4833   uint32_t blockSize);
4834 
4835   /**
4836    * @brief Processing function for the Q15 sparse FIR filter.
4837    * @param[in]  *S           points to an instance of the Q15 sparse FIR structure.
4838    * @param[in]  *pSrc        points to the block of input data.
4839    * @param[out] *pDst        points to the block of output data
4840    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
4841    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
4842    * @param[in]  blockSize    number of input samples to process per call.
4843    * @return none.
4844    */
4845 
4846   void arm_fir_sparse_q15(
4847   arm_fir_sparse_instance_q15 * S,
4848   q15_t * pSrc,
4849   q15_t * pDst,
4850   q15_t * pScratchIn,
4851   q31_t * pScratchOut,
4852   uint32_t blockSize);
4853 
4854 
4855   /**
4856    * @brief  Initialization function for the Q15 sparse FIR filter.
4857    * @param[in,out] *S         points to an instance of the Q15 sparse FIR structure.
4858    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4859    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4860    * @param[in]     *pState    points to the state buffer.
4861    * @param[in]     *pTapDelay points to the array of offset times.
4862    * @param[in]     maxDelay   maximum offset time supported.
4863    * @param[in]     blockSize  number of samples that will be processed per block.
4864    * @return none
4865    */
4866 
4867   void arm_fir_sparse_init_q15(
4868   arm_fir_sparse_instance_q15 * S,
4869   uint16_t numTaps,
4870   q15_t * pCoeffs,
4871   q15_t * pState,
4872   int32_t * pTapDelay,
4873   uint16_t maxDelay,
4874   uint32_t blockSize);
4875 
4876   /**
4877    * @brief Processing function for the Q7 sparse FIR filter.
4878    * @param[in]  *S           points to an instance of the Q7 sparse FIR structure.
4879    * @param[in]  *pSrc        points to the block of input data.
4880    * @param[out] *pDst        points to the block of output data
4881    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
4882    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
4883    * @param[in]  blockSize    number of input samples to process per call.
4884    * @return none.
4885    */
4886 
4887   void arm_fir_sparse_q7(
4888   arm_fir_sparse_instance_q7 * S,
4889   q7_t * pSrc,
4890   q7_t * pDst,
4891   q7_t * pScratchIn,
4892   q31_t * pScratchOut,
4893   uint32_t blockSize);
4894 
4895   /**
4896    * @brief  Initialization function for the Q7 sparse FIR filter.
4897    * @param[in,out] *S         points to an instance of the Q7 sparse FIR structure.
4898    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4899    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4900    * @param[in]     *pState    points to the state buffer.
4901    * @param[in]     *pTapDelay points to the array of offset times.
4902    * @param[in]     maxDelay   maximum offset time supported.
4903    * @param[in]     blockSize  number of samples that will be processed per block.
4904    * @return none
4905    */
4906 
4907   void arm_fir_sparse_init_q7(
4908   arm_fir_sparse_instance_q7 * S,
4909   uint16_t numTaps,
4910   q7_t * pCoeffs,
4911   q7_t * pState,
4912   int32_t * pTapDelay,
4913   uint16_t maxDelay,
4914   uint32_t blockSize);
4915 
4916 
4917   /*
4918    * @brief  Floating-point sin_cos function.
4919    * @param[in]  theta    input value in degrees
4920    * @param[out] *pSinVal points to the processed sine output.
4921    * @param[out] *pCosVal points to the processed cos output.
4922    * @return none.
4923    */
4924 
4925   void arm_sin_cos_f32(
4926   float32_t theta,
4927   float32_t * pSinVal,
4928   float32_t * pCcosVal);
4929 
4930   /*
4931    * @brief  Q31 sin_cos function.
4932    * @param[in]  theta    scaled input value in degrees
4933    * @param[out] *pSinVal points to the processed sine output.
4934    * @param[out] *pCosVal points to the processed cosine output.
4935    * @return none.
4936    */
4937 
4938   void arm_sin_cos_q31(
4939   q31_t theta,
4940   q31_t * pSinVal,
4941   q31_t * pCosVal);
4942 
4943 
4944   /**
4945    * @brief  Floating-point complex conjugate.
4946    * @param[in]  *pSrc points to the input vector
4947    * @param[out]  *pDst points to the output vector
4948    * @param[in]  numSamples number of complex samples in each vector
4949    * @return none.
4950    */
4951 
4952   void arm_cmplx_conj_f32(
4953   float32_t * pSrc,
4954   float32_t * pDst,
4955   uint32_t numSamples);
4956 
4957   /**
4958    * @brief  Q31 complex conjugate.
4959    * @param[in]  *pSrc points to the input vector
4960    * @param[out]  *pDst points to the output vector
4961    * @param[in]  numSamples number of complex samples in each vector
4962    * @return none.
4963    */
4964 
4965   void arm_cmplx_conj_q31(
4966   q31_t * pSrc,
4967   q31_t * pDst,
4968   uint32_t numSamples);
4969 
4970   /**
4971    * @brief  Q15 complex conjugate.
4972    * @param[in]  *pSrc points to the input vector
4973    * @param[out]  *pDst points to the output vector
4974    * @param[in]  numSamples number of complex samples in each vector
4975    * @return none.
4976    */
4977 
4978   void arm_cmplx_conj_q15(
4979   q15_t * pSrc,
4980   q15_t * pDst,
4981   uint32_t numSamples);
4982 
4983 
4984 
4985   /**
4986    * @brief  Floating-point complex magnitude squared
4987    * @param[in]  *pSrc points to the complex input vector
4988    * @param[out]  *pDst points to the real output vector
4989    * @param[in]  numSamples number of complex samples in the input vector
4990    * @return none.
4991    */
4992 
4993   void arm_cmplx_mag_squared_f32(
4994   float32_t * pSrc,
4995   float32_t * pDst,
4996   uint32_t numSamples);
4997 
4998   /**
4999    * @brief  Q31 complex magnitude squared
5000    * @param[in]  *pSrc points to the complex input vector
5001    * @param[out]  *pDst points to the real output vector
5002    * @param[in]  numSamples number of complex samples in the input vector
5003    * @return none.
5004    */
5005 
5006   void arm_cmplx_mag_squared_q31(
5007   q31_t * pSrc,
5008   q31_t * pDst,
5009   uint32_t numSamples);
5010 
5011   /**
5012    * @brief  Q15 complex magnitude squared
5013    * @param[in]  *pSrc points to the complex input vector
5014    * @param[out]  *pDst points to the real output vector
5015    * @param[in]  numSamples number of complex samples in the input vector
5016    * @return none.
5017    */
5018 
5019   void arm_cmplx_mag_squared_q15(
5020   q15_t * pSrc,
5021   q15_t * pDst,
5022   uint32_t numSamples);
5023 
5024 
5025  /**
5026    * @ingroup groupController
5027    */
5028 
5029   /**
5030    * @defgroup PID PID Motor Control
5031    *
5032    * A Proportional Integral Derivative (PID) controller is a generic feedback control
5033    * loop mechanism widely used in industrial control systems.
5034    * A PID controller is the most commonly used type of feedback controller.
5035    *
5036    * This set of functions implements (PID) controllers
5037    * for Q15, Q31, and floating-point data types.  The functions operate on a single sample
5038    * of data and each call to the function returns a single processed value.
5039    * <code>S</code> points to an instance of the PID control data structure.  <code>in</code>
5040    * is the input sample value. The functions return the output value.
5041    *
5042    * \par Algorithm:
5043    * <pre>
5044    *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
5045    *    A0 = Kp + Ki + Kd
5046    *    A1 = (-Kp ) - (2 * Kd )
5047    *    A2 = Kd  </pre>
5048    *
5049    * \par
5050    * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
5051    *
5052    * \par
5053    * \image html PID.gif "Proportional Integral Derivative Controller"
5054    *
5055    * \par
5056    * The PID controller calculates an "error" value as the difference between
5057    * the measured output and the reference input.
5058    * The controller attempts to minimize the error by adjusting the process control inputs.
5059    * The proportional value determines the reaction to the current error,
5060    * the integral value determines the reaction based on the sum of recent errors,
5061    * and the derivative value determines the reaction based on the rate at which the error has been changing.
5062    *
5063    * \par Instance Structure
5064    * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
5065    * A separate instance structure must be defined for each PID Controller.
5066    * There are separate instance structure declarations for each of the 3 supported data types.
5067    *
5068    * \par Reset Functions
5069    * There is also an associated reset function for each data type which clears the state array.
5070    *
5071    * \par Initialization Functions
5072    * There is also an associated initialization function for each data type.
5073    * The initialization function performs the following operations:
5074    * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
5075    * - Zeros out the values in the state buffer.
5076    *
5077    * \par
5078    * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.
5079    *
5080    * \par Fixed-Point Behavior
5081    * Care must be taken when using the fixed-point versions of the PID Controller functions.
5082    * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
5083    * Refer to the function specific documentation below for usage guidelines.
5084    */
5085 
5086   /**
5087    * @addtogroup PID
5088    * @{
5089    */
5090 
5091   /**
5092    * @brief  Process function for the floating-point PID Control.
5093    * @param[in,out] *S is an instance of the floating-point PID Control structure
5094    * @param[in] in input sample to process
5095    * @return out processed output sample.
5096    */
5097 
5098 
arm_pid_f32(arm_pid_instance_f32 * S,float32_t in)5099   static __INLINE float32_t arm_pid_f32(
5100   arm_pid_instance_f32 * S,
5101   float32_t in)
5102   {
5103     float32_t out;
5104 
5105     /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
5106     out = (S->A0 * in) +
5107       (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
5108 
5109     /* Update state */
5110     S->state[1] = S->state[0];
5111     S->state[0] = in;
5112     S->state[2] = out;
5113 
5114     /* return to application */
5115     return (out);
5116 
5117   }
5118 
5119   /**
5120    * @brief  Process function for the Q31 PID Control.
5121    * @param[in,out] *S points to an instance of the Q31 PID Control structure
5122    * @param[in] in input sample to process
5123    * @return out processed output sample.
5124    *
5125    * <b>Scaling and Overflow Behavior:</b>
5126    * \par
5127    * The function is implemented using an internal 64-bit accumulator.
5128    * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
5129    * Thus, if the accumulator result overflows it wraps around rather than clip.
5130    * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
5131    * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
5132    */
5133 
arm_pid_q31(arm_pid_instance_q31 * S,q31_t in)5134   static __INLINE q31_t arm_pid_q31(
5135   arm_pid_instance_q31 * S,
5136   q31_t in)
5137   {
5138     q63_t acc;
5139     q31_t out;
5140 
5141     /* acc = A0 * x[n]  */
5142     acc = (q63_t) S->A0 * in;
5143 
5144     /* acc += A1 * x[n-1] */
5145     acc += (q63_t) S->A1 * S->state[0];
5146 
5147     /* acc += A2 * x[n-2]  */
5148     acc += (q63_t) S->A2 * S->state[1];
5149 
5150     /* convert output to 1.31 format to add y[n-1] */
5151     out = (q31_t) (acc >> 31u);
5152 
5153     /* out += y[n-1] */
5154     out += S->state[2];
5155 
5156     /* Update state */
5157     S->state[1] = S->state[0];
5158     S->state[0] = in;
5159     S->state[2] = out;
5160 
5161     /* return to application */
5162     return (out);
5163 
5164   }
5165 
5166   /**
5167    * @brief  Process function for the Q15 PID Control.
5168    * @param[in,out] *S points to an instance of the Q15 PID Control structure
5169    * @param[in] in input sample to process
5170    * @return out processed output sample.
5171    *
5172    * <b>Scaling and Overflow Behavior:</b>
5173    * \par
5174    * The function is implemented using a 64-bit internal accumulator.
5175    * Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
5176    * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
5177    * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
5178    * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
5179    * Lastly, the accumulator is saturated to yield a result in 1.15 format.
5180    */
5181 
arm_pid_q15(arm_pid_instance_q15 * S,q15_t in)5182   static __INLINE q15_t arm_pid_q15(
5183   arm_pid_instance_q15 * S,
5184   q15_t in)
5185   {
5186     q63_t acc;
5187     q15_t out;
5188 
5189 #ifndef ARM_MATH_CM0_FAMILY
5190     __SIMD32_TYPE *vstate;
5191 
5192     /* Implementation of PID controller */
5193 
5194     /* acc = A0 * x[n]  */
5195     acc = (q31_t) __SMUAD(S->A0, in);
5196 
5197     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
5198     vstate = __SIMD32_CONST(S->state);
5199     acc = __SMLALD(S->A1, (q31_t) *vstate, acc);
5200 
5201 #else
5202     /* acc = A0 * x[n]  */
5203     acc = ((q31_t) S->A0) * in;
5204 
5205     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
5206     acc += (q31_t) S->A1 * S->state[0];
5207     acc += (q31_t) S->A2 * S->state[1];
5208 
5209 #endif
5210 
5211     /* acc += y[n-1] */
5212     acc += (q31_t) S->state[2] << 15;
5213 
5214     /* saturate the output */
5215     out = (q15_t) (__SSAT((acc >> 15), 16));
5216 
5217     /* Update state */
5218     S->state[1] = S->state[0];
5219     S->state[0] = in;
5220     S->state[2] = out;
5221 
5222     /* return to application */
5223     return (out);
5224 
5225   }
5226 
5227   /**
5228    * @} end of PID group
5229    */
5230 
5231 
5232   /**
5233    * @brief Floating-point matrix inverse.
5234    * @param[in]  *src points to the instance of the input floating-point matrix structure.
5235    * @param[out] *dst points to the instance of the output floating-point matrix structure.
5236    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
5237    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
5238    */
5239 
5240   arm_status arm_mat_inverse_f32(
5241   const arm_matrix_instance_f32 * src,
5242   arm_matrix_instance_f32 * dst);
5243 
5244 
5245   /**
5246    * @brief Floating-point matrix inverse.
5247    * @param[in]  *src points to the instance of the input floating-point matrix structure.
5248    * @param[out] *dst points to the instance of the output floating-point matrix structure.
5249    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
5250    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
5251    */
5252 
5253   arm_status arm_mat_inverse_f64(
5254   const arm_matrix_instance_f64 * src,
5255   arm_matrix_instance_f64 * dst);
5256 
5257 
5258 
5259   /**
5260    * @ingroup groupController
5261    */
5262 
5263 
5264   /**
5265    * @defgroup clarke Vector Clarke Transform
5266    * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
5267    * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
5268    * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
5269    * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
5270    * \image html clarke.gif Stator current space vector and its components in (a,b).
5271    * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
5272    * can be calculated using only <code>Ia</code> and <code>Ib</code>.
5273    *
5274    * The function operates on a single sample of data and each call to the function returns the processed output.
5275    * The library provides separate functions for Q31 and floating-point data types.
5276    * \par Algorithm
5277    * \image html clarkeFormula.gif
5278    * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
5279    * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
5280    * \par Fixed-Point Behavior
5281    * Care must be taken when using the Q31 version of the Clarke transform.
5282    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5283    * Refer to the function specific documentation below for usage guidelines.
5284    */
5285 
5286   /**
5287    * @addtogroup clarke
5288    * @{
5289    */
5290 
5291   /**
5292    *
5293    * @brief  Floating-point Clarke transform
5294    * @param[in]       Ia       input three-phase coordinate <code>a</code>
5295    * @param[in]       Ib       input three-phase coordinate <code>b</code>
5296    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
5297    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
5298    * @return none.
5299    */
5300 
arm_clarke_f32(float32_t Ia,float32_t Ib,float32_t * pIalpha,float32_t * pIbeta)5301   static __INLINE void arm_clarke_f32(
5302   float32_t Ia,
5303   float32_t Ib,
5304   float32_t * pIalpha,
5305   float32_t * pIbeta)
5306   {
5307     /* Calculate pIalpha using the equation, pIalpha = Ia */
5308     *pIalpha = Ia;
5309 
5310     /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
5311     *pIbeta =
5312       ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
5313 
5314   }
5315 
5316   /**
5317    * @brief  Clarke transform for Q31 version
5318    * @param[in]       Ia       input three-phase coordinate <code>a</code>
5319    * @param[in]       Ib       input three-phase coordinate <code>b</code>
5320    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
5321    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
5322    * @return none.
5323    *
5324    * <b>Scaling and Overflow Behavior:</b>
5325    * \par
5326    * The function is implemented using an internal 32-bit accumulator.
5327    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5328    * There is saturation on the addition, hence there is no risk of overflow.
5329    */
5330 
arm_clarke_q31(q31_t Ia,q31_t Ib,q31_t * pIalpha,q31_t * pIbeta)5331   static __INLINE void arm_clarke_q31(
5332   q31_t Ia,
5333   q31_t Ib,
5334   q31_t * pIalpha,
5335   q31_t * pIbeta)
5336   {
5337     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5338 
5339     /* Calculating pIalpha from Ia by equation pIalpha = Ia */
5340     *pIalpha = Ia;
5341 
5342     /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
5343     product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
5344 
5345     /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
5346     product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
5347 
5348     /* pIbeta is calculated by adding the intermediate products */
5349     *pIbeta = __QADD(product1, product2);
5350   }
5351 
5352   /**
5353    * @} end of clarke group
5354    */
5355 
5356   /**
5357    * @brief  Converts the elements of the Q7 vector to Q31 vector.
5358    * @param[in]  *pSrc     input pointer
5359    * @param[out]  *pDst    output pointer
5360    * @param[in]  blockSize number of samples to process
5361    * @return none.
5362    */
5363   void arm_q7_to_q31(
5364   q7_t * pSrc,
5365   q31_t * pDst,
5366   uint32_t blockSize);
5367 
5368 
5369 
5370 
5371   /**
5372    * @ingroup groupController
5373    */
5374 
5375   /**
5376    * @defgroup inv_clarke Vector Inverse Clarke Transform
5377    * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
5378    *
5379    * The function operates on a single sample of data and each call to the function returns the processed output.
5380    * The library provides separate functions for Q31 and floating-point data types.
5381    * \par Algorithm
5382    * \image html clarkeInvFormula.gif
5383    * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
5384    * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
5385    * \par Fixed-Point Behavior
5386    * Care must be taken when using the Q31 version of the Clarke transform.
5387    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5388    * Refer to the function specific documentation below for usage guidelines.
5389    */
5390 
5391   /**
5392    * @addtogroup inv_clarke
5393    * @{
5394    */
5395 
5396    /**
5397    * @brief  Floating-point Inverse Clarke transform
5398    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
5399    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
5400    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
5401    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
5402    * @return none.
5403    */
5404 
5405 
arm_inv_clarke_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pIa,float32_t * pIb)5406   static __INLINE void arm_inv_clarke_f32(
5407   float32_t Ialpha,
5408   float32_t Ibeta,
5409   float32_t * pIa,
5410   float32_t * pIb)
5411   {
5412     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5413     *pIa = Ialpha;
5414 
5415     /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
5416     *pIb = -0.5 * Ialpha + (float32_t) 0.8660254039 *Ibeta;
5417 
5418   }
5419 
5420   /**
5421    * @brief  Inverse Clarke transform for Q31 version
5422    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
5423    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
5424    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
5425    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
5426    * @return none.
5427    *
5428    * <b>Scaling and Overflow Behavior:</b>
5429    * \par
5430    * The function is implemented using an internal 32-bit accumulator.
5431    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5432    * There is saturation on the subtraction, hence there is no risk of overflow.
5433    */
5434 
arm_inv_clarke_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pIa,q31_t * pIb)5435   static __INLINE void arm_inv_clarke_q31(
5436   q31_t Ialpha,
5437   q31_t Ibeta,
5438   q31_t * pIa,
5439   q31_t * pIb)
5440   {
5441     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5442 
5443     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5444     *pIa = Ialpha;
5445 
5446     /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
5447     product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
5448 
5449     /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
5450     product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
5451 
5452     /* pIb is calculated by subtracting the products */
5453     *pIb = __QSUB(product2, product1);
5454 
5455   }
5456 
5457   /**
5458    * @} end of inv_clarke group
5459    */
5460 
5461   /**
5462    * @brief  Converts the elements of the Q7 vector to Q15 vector.
5463    * @param[in]  *pSrc     input pointer
5464    * @param[out] *pDst     output pointer
5465    * @param[in]  blockSize number of samples to process
5466    * @return none.
5467    */
5468   void arm_q7_to_q15(
5469   q7_t * pSrc,
5470   q15_t * pDst,
5471   uint32_t blockSize);
5472 
5473 
5474 
5475   /**
5476    * @ingroup groupController
5477    */
5478 
5479   /**
5480    * @defgroup park Vector Park Transform
5481    *
5482    * Forward Park transform converts the input two-coordinate vector to flux and torque components.
5483    * The Park transform can be used to realize the transformation of the <code>Ialpha</code> and the <code>Ibeta</code> currents
5484    * from the stationary to the moving reference frame and control the spatial relationship between
5485    * the stator vector current and rotor flux vector.
5486    * If we consider the d axis aligned with the rotor flux, the diagram below shows the
5487    * current vector and the relationship from the two reference frames:
5488    * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
5489    *
5490    * The function operates on a single sample of data and each call to the function returns the processed output.
5491    * The library provides separate functions for Q31 and floating-point data types.
5492    * \par Algorithm
5493    * \image html parkFormula.gif
5494    * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
5495    * <code>pId</code> and <code>pIq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5496    * cosine and sine values of theta (rotor flux position).
5497    * \par Fixed-Point Behavior
5498    * Care must be taken when using the Q31 version of the Park transform.
5499    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5500    * Refer to the function specific documentation below for usage guidelines.
5501    */
5502 
5503   /**
5504    * @addtogroup park
5505    * @{
5506    */
5507 
5508   /**
5509    * @brief Floating-point Park transform
5510    * @param[in]       Ialpha input two-phase vector coordinate alpha
5511    * @param[in]       Ibeta  input two-phase vector coordinate beta
5512    * @param[out]      *pId   points to output	rotor reference frame d
5513    * @param[out]      *pIq   points to output	rotor reference frame q
5514    * @param[in]       sinVal sine value of rotation angle theta
5515    * @param[in]       cosVal cosine value of rotation angle theta
5516    * @return none.
5517    *
5518    * The function implements the forward Park transform.
5519    *
5520    */
5521 
arm_park_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pId,float32_t * pIq,float32_t sinVal,float32_t cosVal)5522   static __INLINE void arm_park_f32(
5523   float32_t Ialpha,
5524   float32_t Ibeta,
5525   float32_t * pId,
5526   float32_t * pIq,
5527   float32_t sinVal,
5528   float32_t cosVal)
5529   {
5530     /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
5531     *pId = Ialpha * cosVal + Ibeta * sinVal;
5532 
5533     /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
5534     *pIq = -Ialpha * sinVal + Ibeta * cosVal;
5535 
5536   }
5537 
5538   /**
5539    * @brief  Park transform for Q31 version
5540    * @param[in]       Ialpha input two-phase vector coordinate alpha
5541    * @param[in]       Ibeta  input two-phase vector coordinate beta
5542    * @param[out]      *pId   points to output rotor reference frame d
5543    * @param[out]      *pIq   points to output rotor reference frame q
5544    * @param[in]       sinVal sine value of rotation angle theta
5545    * @param[in]       cosVal cosine value of rotation angle theta
5546    * @return none.
5547    *
5548    * <b>Scaling and Overflow Behavior:</b>
5549    * \par
5550    * The function is implemented using an internal 32-bit accumulator.
5551    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5552    * There is saturation on the addition and subtraction, hence there is no risk of overflow.
5553    */
5554 
5555 
arm_park_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pId,q31_t * pIq,q31_t sinVal,q31_t cosVal)5556   static __INLINE void arm_park_q31(
5557   q31_t Ialpha,
5558   q31_t Ibeta,
5559   q31_t * pId,
5560   q31_t * pIq,
5561   q31_t sinVal,
5562   q31_t cosVal)
5563   {
5564     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5565     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5566 
5567     /* Intermediate product is calculated by (Ialpha * cosVal) */
5568     product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
5569 
5570     /* Intermediate product is calculated by (Ibeta * sinVal) */
5571     product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
5572 
5573 
5574     /* Intermediate product is calculated by (Ialpha * sinVal) */
5575     product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
5576 
5577     /* Intermediate product is calculated by (Ibeta * cosVal) */
5578     product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
5579 
5580     /* Calculate pId by adding the two intermediate products 1 and 2 */
5581     *pId = __QADD(product1, product2);
5582 
5583     /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
5584     *pIq = __QSUB(product4, product3);
5585   }
5586 
5587   /**
5588    * @} end of park group
5589    */
5590 
5591   /**
5592    * @brief  Converts the elements of the Q7 vector to floating-point vector.
5593    * @param[in]  *pSrc is input pointer
5594    * @param[out]  *pDst is output pointer
5595    * @param[in]  blockSize is the number of samples to process
5596    * @return none.
5597    */
5598   void arm_q7_to_float(
5599   q7_t * pSrc,
5600   float32_t * pDst,
5601   uint32_t blockSize);
5602 
5603 
5604   /**
5605    * @ingroup groupController
5606    */
5607 
5608   /**
5609    * @defgroup inv_park Vector Inverse Park transform
5610    * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
5611    *
5612    * The function operates on a single sample of data and each call to the function returns the processed output.
5613    * The library provides separate functions for Q31 and floating-point data types.
5614    * \par Algorithm
5615    * \image html parkInvFormula.gif
5616    * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
5617    * <code>Id</code> and <code>Iq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5618    * cosine and sine values of theta (rotor flux position).
5619    * \par Fixed-Point Behavior
5620    * Care must be taken when using the Q31 version of the Park transform.
5621    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5622    * Refer to the function specific documentation below for usage guidelines.
5623    */
5624 
5625   /**
5626    * @addtogroup inv_park
5627    * @{
5628    */
5629 
5630    /**
5631    * @brief  Floating-point Inverse Park transform
5632    * @param[in]       Id        input coordinate of rotor reference frame d
5633    * @param[in]       Iq        input coordinate of rotor reference frame q
5634    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
5635    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
5636    * @param[in]       sinVal    sine value of rotation angle theta
5637    * @param[in]       cosVal    cosine value of rotation angle theta
5638    * @return none.
5639    */
5640 
arm_inv_park_f32(float32_t Id,float32_t Iq,float32_t * pIalpha,float32_t * pIbeta,float32_t sinVal,float32_t cosVal)5641   static __INLINE void arm_inv_park_f32(
5642   float32_t Id,
5643   float32_t Iq,
5644   float32_t * pIalpha,
5645   float32_t * pIbeta,
5646   float32_t sinVal,
5647   float32_t cosVal)
5648   {
5649     /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
5650     *pIalpha = Id * cosVal - Iq * sinVal;
5651 
5652     /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
5653     *pIbeta = Id * sinVal + Iq * cosVal;
5654 
5655   }
5656 
5657 
5658   /**
5659    * @brief  Inverse Park transform for	Q31 version
5660    * @param[in]       Id        input coordinate of rotor reference frame d
5661    * @param[in]       Iq        input coordinate of rotor reference frame q
5662    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
5663    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
5664    * @param[in]       sinVal    sine value of rotation angle theta
5665    * @param[in]       cosVal    cosine value of rotation angle theta
5666    * @return none.
5667    *
5668    * <b>Scaling and Overflow Behavior:</b>
5669    * \par
5670    * The function is implemented using an internal 32-bit accumulator.
5671    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5672    * There is saturation on the addition, hence there is no risk of overflow.
5673    */
5674 
5675 
arm_inv_park_q31(q31_t Id,q31_t Iq,q31_t * pIalpha,q31_t * pIbeta,q31_t sinVal,q31_t cosVal)5676   static __INLINE void arm_inv_park_q31(
5677   q31_t Id,
5678   q31_t Iq,
5679   q31_t * pIalpha,
5680   q31_t * pIbeta,
5681   q31_t sinVal,
5682   q31_t cosVal)
5683   {
5684     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5685     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5686 
5687     /* Intermediate product is calculated by (Id * cosVal) */
5688     product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
5689 
5690     /* Intermediate product is calculated by (Iq * sinVal) */
5691     product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
5692 
5693 
5694     /* Intermediate product is calculated by (Id * sinVal) */
5695     product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
5696 
5697     /* Intermediate product is calculated by (Iq * cosVal) */
5698     product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
5699 
5700     /* Calculate pIalpha by using the two intermediate products 1 and 2 */
5701     *pIalpha = __QSUB(product1, product2);
5702 
5703     /* Calculate pIbeta by using the two intermediate products 3 and 4 */
5704     *pIbeta = __QADD(product4, product3);
5705 
5706   }
5707 
5708   /**
5709    * @} end of Inverse park group
5710    */
5711 
5712 
5713   /**
5714    * @brief  Converts the elements of the Q31 vector to floating-point vector.
5715    * @param[in]  *pSrc is input pointer
5716    * @param[out]  *pDst is output pointer
5717    * @param[in]  blockSize is the number of samples to process
5718    * @return none.
5719    */
5720   void arm_q31_to_float(
5721   q31_t * pSrc,
5722   float32_t * pDst,
5723   uint32_t blockSize);
5724 
5725   /**
5726    * @ingroup groupInterpolation
5727    */
5728 
5729   /**
5730    * @defgroup LinearInterpolate Linear Interpolation
5731    *
5732    * Linear interpolation is a method of curve fitting using linear polynomials.
5733    * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line
5734    *
5735    * \par
5736    * \image html LinearInterp.gif "Linear interpolation"
5737    *
5738    * \par
5739    * A  Linear Interpolate function calculates an output value(y), for the input(x)
5740    * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values)
5741    *
5742    * \par Algorithm:
5743    * <pre>
5744    *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
5745    *       where x0, x1 are nearest values of input x
5746    *             y0, y1 are nearest values to output y
5747    * </pre>
5748    *
5749    * \par
5750    * This set of functions implements Linear interpolation process
5751    * for Q7, Q15, Q31, and floating-point data types.  The functions operate on a single
5752    * sample of data and each call to the function returns a single processed value.
5753    * <code>S</code> points to an instance of the Linear Interpolate function data structure.
5754    * <code>x</code> is the input sample value. The functions returns the output value.
5755    *
5756    * \par
5757    * if x is outside of the table boundary, Linear interpolation returns first value of the table
5758    * if x is below input range and returns last value of table if x is above range.
5759    */
5760 
5761   /**
5762    * @addtogroup LinearInterpolate
5763    * @{
5764    */
5765 
5766   /**
5767    * @brief  Process function for the floating-point Linear Interpolation Function.
5768    * @param[in,out] *S is an instance of the floating-point Linear Interpolation structure
5769    * @param[in] x input sample to process
5770    * @return y processed output sample.
5771    *
5772    */
5773 
arm_linear_interp_f32(arm_linear_interp_instance_f32 * S,float32_t x)5774   static __INLINE float32_t arm_linear_interp_f32(
5775   arm_linear_interp_instance_f32 * S,
5776   float32_t x)
5777   {
5778 
5779     float32_t y;
5780     float32_t x0, x1;                            /* Nearest input values */
5781     float32_t y0, y1;                            /* Nearest output values */
5782     float32_t xSpacing = S->xSpacing;            /* spacing between input values */
5783     int32_t i;                                   /* Index variable */
5784     float32_t *pYData = S->pYData;               /* pointer to output table */
5785 
5786     /* Calculation of index */
5787     i = (int32_t) ((x - S->x1) / xSpacing);
5788 
5789     if(i < 0)
5790     {
5791       /* Iniatilize output for below specified range as least output value of table */
5792       y = pYData[0];
5793     }
5794     else if((uint32_t)i >= S->nValues)
5795     {
5796       /* Iniatilize output for above specified range as last output value of table */
5797       y = pYData[S->nValues - 1];
5798     }
5799     else
5800     {
5801       /* Calculation of nearest input values */
5802       x0 = S->x1 + i * xSpacing;
5803       x1 = S->x1 + (i + 1) * xSpacing;
5804 
5805       /* Read of nearest output values */
5806       y0 = pYData[i];
5807       y1 = pYData[i + 1];
5808 
5809       /* Calculation of output */
5810       y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0));
5811 
5812     }
5813 
5814     /* returns output value */
5815     return (y);
5816   }
5817 
5818    /**
5819    *
5820    * @brief  Process function for the Q31 Linear Interpolation Function.
5821    * @param[in] *pYData  pointer to Q31 Linear Interpolation table
5822    * @param[in] x input sample to process
5823    * @param[in] nValues number of table values
5824    * @return y processed output sample.
5825    *
5826    * \par
5827    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5828    * This function can support maximum of table size 2^12.
5829    *
5830    */
5831 
5832 
arm_linear_interp_q31(q31_t * pYData,q31_t x,uint32_t nValues)5833   static __INLINE q31_t arm_linear_interp_q31(
5834   q31_t * pYData,
5835   q31_t x,
5836   uint32_t nValues)
5837   {
5838     q31_t y;                                     /* output */
5839     q31_t y0, y1;                                /* Nearest output values */
5840     q31_t fract;                                 /* fractional part */
5841     int32_t index;                               /* Index to read nearest output values */
5842 
5843     /* Input is in 12.20 format */
5844     /* 12 bits for the table index */
5845     /* Index value calculation */
5846     index = ((x & 0xFFF00000) >> 20);
5847 
5848     if(index >= (int32_t)(nValues - 1))
5849     {
5850       return (pYData[nValues - 1]);
5851     }
5852     else if(index < 0)
5853     {
5854       return (pYData[0]);
5855     }
5856     else
5857     {
5858 
5859       /* 20 bits for the fractional part */
5860       /* shift left by 11 to keep fract in 1.31 format */
5861       fract = (x & 0x000FFFFF) << 11;
5862 
5863       /* Read two nearest output values from the index in 1.31(q31) format */
5864       y0 = pYData[index];
5865       y1 = pYData[index + 1u];
5866 
5867       /* Calculation of y0 * (1-fract) and y is in 2.30 format */
5868       y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32));
5869 
5870       /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
5871       y += ((q31_t) (((q63_t) y1 * fract) >> 32));
5872 
5873       /* Convert y to 1.31 format */
5874       return (y << 1u);
5875 
5876     }
5877 
5878   }
5879 
5880   /**
5881    *
5882    * @brief  Process function for the Q15 Linear Interpolation Function.
5883    * @param[in] *pYData  pointer to Q15 Linear Interpolation table
5884    * @param[in] x input sample to process
5885    * @param[in] nValues number of table values
5886    * @return y processed output sample.
5887    *
5888    * \par
5889    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5890    * This function can support maximum of table size 2^12.
5891    *
5892    */
5893 
5894 
arm_linear_interp_q15(q15_t * pYData,q31_t x,uint32_t nValues)5895   static __INLINE q15_t arm_linear_interp_q15(
5896   q15_t * pYData,
5897   q31_t x,
5898   uint32_t nValues)
5899   {
5900     q63_t y;                                     /* output */
5901     q15_t y0, y1;                                /* Nearest output values */
5902     q31_t fract;                                 /* fractional part */
5903     int32_t index;                               /* Index to read nearest output values */
5904 
5905     /* Input is in 12.20 format */
5906     /* 12 bits for the table index */
5907     /* Index value calculation */
5908     index = ((x & 0xFFF00000) >> 20u);
5909 
5910     if(index >= (int32_t)(nValues - 1))
5911     {
5912       return (pYData[nValues - 1]);
5913     }
5914     else if(index < 0)
5915     {
5916       return (pYData[0]);
5917     }
5918     else
5919     {
5920       /* 20 bits for the fractional part */
5921       /* fract is in 12.20 format */
5922       fract = (x & 0x000FFFFF);
5923 
5924       /* Read two nearest output values from the index */
5925       y0 = pYData[index];
5926       y1 = pYData[index + 1u];
5927 
5928       /* Calculation of y0 * (1-fract) and y is in 13.35 format */
5929       y = ((q63_t) y0 * (0xFFFFF - fract));
5930 
5931       /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
5932       y += ((q63_t) y1 * (fract));
5933 
5934       /* convert y to 1.15 format */
5935       return (y >> 20);
5936     }
5937 
5938 
5939   }
5940 
5941   /**
5942    *
5943    * @brief  Process function for the Q7 Linear Interpolation Function.
5944    * @param[in] *pYData  pointer to Q7 Linear Interpolation table
5945    * @param[in] x input sample to process
5946    * @param[in] nValues number of table values
5947    * @return y processed output sample.
5948    *
5949    * \par
5950    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5951    * This function can support maximum of table size 2^12.
5952    */
5953 
5954 
arm_linear_interp_q7(q7_t * pYData,q31_t x,uint32_t nValues)5955   static __INLINE q7_t arm_linear_interp_q7(
5956   q7_t * pYData,
5957   q31_t x,
5958   uint32_t nValues)
5959   {
5960     q31_t y;                                     /* output */
5961     q7_t y0, y1;                                 /* Nearest output values */
5962     q31_t fract;                                 /* fractional part */
5963     uint32_t index;                              /* Index to read nearest output values */
5964 
5965     /* Input is in 12.20 format */
5966     /* 12 bits for the table index */
5967     /* Index value calculation */
5968     if (x < 0)
5969     {
5970       return (pYData[0]);
5971     }
5972     index = (x >> 20) & 0xfff;
5973 
5974 
5975     if(index >= (nValues - 1))
5976     {
5977       return (pYData[nValues - 1]);
5978     }
5979     else
5980     {
5981 
5982       /* 20 bits for the fractional part */
5983       /* fract is in 12.20 format */
5984       fract = (x & 0x000FFFFF);
5985 
5986       /* Read two nearest output values from the index and are in 1.7(q7) format */
5987       y0 = pYData[index];
5988       y1 = pYData[index + 1u];
5989 
5990       /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */
5991       y = ((y0 * (0xFFFFF - fract)));
5992 
5993       /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
5994       y += (y1 * fract);
5995 
5996       /* convert y to 1.7(q7) format */
5997       return (y >> 20u);
5998 
5999     }
6000 
6001   }
6002   /**
6003    * @} end of LinearInterpolate group
6004    */
6005 
6006   /**
6007    * @brief  Fast approximation to the trigonometric sine function for floating-point data.
6008    * @param[in] x input value in radians.
6009    * @return  sin(x).
6010    */
6011 
6012   float32_t arm_sin_f32(
6013   float32_t x);
6014 
6015   /**
6016    * @brief  Fast approximation to the trigonometric sine function for Q31 data.
6017    * @param[in] x Scaled input value in radians.
6018    * @return  sin(x).
6019    */
6020 
6021   q31_t arm_sin_q31(
6022   q31_t x);
6023 
6024   /**
6025    * @brief  Fast approximation to the trigonometric sine function for Q15 data.
6026    * @param[in] x Scaled input value in radians.
6027    * @return  sin(x).
6028    */
6029 
6030   q15_t arm_sin_q15(
6031   q15_t x);
6032 
6033   /**
6034    * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
6035    * @param[in] x input value in radians.
6036    * @return  cos(x).
6037    */
6038 
6039   float32_t arm_cos_f32(
6040   float32_t x);
6041 
6042   /**
6043    * @brief Fast approximation to the trigonometric cosine function for Q31 data.
6044    * @param[in] x Scaled input value in radians.
6045    * @return  cos(x).
6046    */
6047 
6048   q31_t arm_cos_q31(
6049   q31_t x);
6050 
6051   /**
6052    * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
6053    * @param[in] x Scaled input value in radians.
6054    * @return  cos(x).
6055    */
6056 
6057   q15_t arm_cos_q15(
6058   q15_t x);
6059 
6060 
6061   /**
6062    * @ingroup groupFastMath
6063    */
6064 
6065 
6066   /**
6067    * @defgroup SQRT Square Root
6068    *
6069    * Computes the square root of a number.
6070    * There are separate functions for Q15, Q31, and floating-point data types.
6071    * The square root function is computed using the Newton-Raphson algorithm.
6072    * This is an iterative algorithm of the form:
6073    * <pre>
6074    *      x1 = x0 - f(x0)/f'(x0)
6075    * </pre>
6076    * where <code>x1</code> is the current estimate,
6077    * <code>x0</code> is the previous estimate, and
6078    * <code>f'(x0)</code> is the derivative of <code>f()</code> evaluated at <code>x0</code>.
6079    * For the square root function, the algorithm reduces to:
6080    * <pre>
6081    *     x0 = in/2                         [initial guess]
6082    *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
6083    * </pre>
6084    */
6085 
6086 
6087   /**
6088    * @addtogroup SQRT
6089    * @{
6090    */
6091 
6092   /**
6093    * @brief  Floating-point square root function.
6094    * @param[in]  in     input value.
6095    * @param[out] *pOut  square root of input value.
6096    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
6097    * <code>in</code> is negative value and returns zero output for negative values.
6098    */
6099 
arm_sqrt_f32(float32_t in,float32_t * pOut)6100   static __INLINE arm_status arm_sqrt_f32(
6101   float32_t in,
6102   float32_t * pOut)
6103   {
6104     if(in >= 0.0f)
6105     {
6106 
6107 //      #if __FPU_USED
6108 #if (__FPU_USED == 1) && defined ( __CC_ARM   )
6109       *pOut = __sqrtf(in);
6110 #else
6111       *pOut = sqrtf(in);
6112 #endif
6113 
6114       return (ARM_MATH_SUCCESS);
6115     }
6116     else
6117     {
6118       *pOut = 0.0f;
6119       return (ARM_MATH_ARGUMENT_ERROR);
6120     }
6121 
6122   }
6123 
6124 
6125   /**
6126    * @brief Q31 square root function.
6127    * @param[in]   in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
6128    * @param[out]  *pOut square root of input value.
6129    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
6130    * <code>in</code> is negative value and returns zero output for negative values.
6131    */
6132   arm_status arm_sqrt_q31(
6133   q31_t in,
6134   q31_t * pOut);
6135 
6136   /**
6137    * @brief  Q15 square root function.
6138    * @param[in]   in     input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
6139    * @param[out]  *pOut  square root of input value.
6140    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
6141    * <code>in</code> is negative value and returns zero output for negative values.
6142    */
6143   arm_status arm_sqrt_q15(
6144   q15_t in,
6145   q15_t * pOut);
6146 
6147   /**
6148    * @} end of SQRT group
6149    */
6150 
6151 
6152 
6153 
6154 
6155 
6156   /**
6157    * @brief floating-point Circular write function.
6158    */
6159 
arm_circularWrite_f32(int32_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const int32_t * src,int32_t srcInc,uint32_t blockSize)6160   static __INLINE void arm_circularWrite_f32(
6161   int32_t * circBuffer,
6162   int32_t L,
6163   uint16_t * writeOffset,
6164   int32_t bufferInc,
6165   const int32_t * src,
6166   int32_t srcInc,
6167   uint32_t blockSize)
6168   {
6169     uint32_t i = 0u;
6170     int32_t wOffset;
6171 
6172     /* Copy the value of Index pointer that points
6173      * to the current location where the input samples to be copied */
6174     wOffset = *writeOffset;
6175 
6176     /* Loop over the blockSize */
6177     i = blockSize;
6178 
6179     while(i > 0u)
6180     {
6181       /* copy the input sample to the circular buffer */
6182       circBuffer[wOffset] = *src;
6183 
6184       /* Update the input pointer */
6185       src += srcInc;
6186 
6187       /* Circularly update wOffset.  Watch out for positive and negative value */
6188       wOffset += bufferInc;
6189       if(wOffset >= L)
6190         wOffset -= L;
6191 
6192       /* Decrement the loop counter */
6193       i--;
6194     }
6195 
6196     /* Update the index pointer */
6197     *writeOffset = wOffset;
6198   }
6199 
6200 
6201 
6202   /**
6203    * @brief floating-point Circular Read function.
6204    */
arm_circularRead_f32(int32_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,int32_t * dst,int32_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)6205   static __INLINE void arm_circularRead_f32(
6206   int32_t * circBuffer,
6207   int32_t L,
6208   int32_t * readOffset,
6209   int32_t bufferInc,
6210   int32_t * dst,
6211   int32_t * dst_base,
6212   int32_t dst_length,
6213   int32_t dstInc,
6214   uint32_t blockSize)
6215   {
6216     uint32_t i = 0u;
6217     int32_t rOffset, dst_end;
6218 
6219     /* Copy the value of Index pointer that points
6220      * to the current location from where the input samples to be read */
6221     rOffset = *readOffset;
6222     dst_end = (int32_t) (dst_base + dst_length);
6223 
6224     /* Loop over the blockSize */
6225     i = blockSize;
6226 
6227     while(i > 0u)
6228     {
6229       /* copy the sample from the circular buffer to the destination buffer */
6230       *dst = circBuffer[rOffset];
6231 
6232       /* Update the input pointer */
6233       dst += dstInc;
6234 
6235       if(dst == (int32_t *) dst_end)
6236       {
6237         dst = dst_base;
6238       }
6239 
6240       /* Circularly update rOffset.  Watch out for positive and negative value  */
6241       rOffset += bufferInc;
6242 
6243       if(rOffset >= L)
6244       {
6245         rOffset -= L;
6246       }
6247 
6248       /* Decrement the loop counter */
6249       i--;
6250     }
6251 
6252     /* Update the index pointer */
6253     *readOffset = rOffset;
6254   }
6255 
6256   /**
6257    * @brief Q15 Circular write function.
6258    */
6259 
arm_circularWrite_q15(q15_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q15_t * src,int32_t srcInc,uint32_t blockSize)6260   static __INLINE void arm_circularWrite_q15(
6261   q15_t * circBuffer,
6262   int32_t L,
6263   uint16_t * writeOffset,
6264   int32_t bufferInc,
6265   const q15_t * src,
6266   int32_t srcInc,
6267   uint32_t blockSize)
6268   {
6269     uint32_t i = 0u;
6270     int32_t wOffset;
6271 
6272     /* Copy the value of Index pointer that points
6273      * to the current location where the input samples to be copied */
6274     wOffset = *writeOffset;
6275 
6276     /* Loop over the blockSize */
6277     i = blockSize;
6278 
6279     while(i > 0u)
6280     {
6281       /* copy the input sample to the circular buffer */
6282       circBuffer[wOffset] = *src;
6283 
6284       /* Update the input pointer */
6285       src += srcInc;
6286 
6287       /* Circularly update wOffset.  Watch out for positive and negative value */
6288       wOffset += bufferInc;
6289       if(wOffset >= L)
6290         wOffset -= L;
6291 
6292       /* Decrement the loop counter */
6293       i--;
6294     }
6295 
6296     /* Update the index pointer */
6297     *writeOffset = wOffset;
6298   }
6299 
6300 
6301 
6302   /**
6303    * @brief Q15 Circular Read function.
6304    */
arm_circularRead_q15(q15_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q15_t * dst,q15_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)6305   static __INLINE void arm_circularRead_q15(
6306   q15_t * circBuffer,
6307   int32_t L,
6308   int32_t * readOffset,
6309   int32_t bufferInc,
6310   q15_t * dst,
6311   q15_t * dst_base,
6312   int32_t dst_length,
6313   int32_t dstInc,
6314   uint32_t blockSize)
6315   {
6316     uint32_t i = 0;
6317     int32_t rOffset, dst_end;
6318 
6319     /* Copy the value of Index pointer that points
6320      * to the current location from where the input samples to be read */
6321     rOffset = *readOffset;
6322 
6323     dst_end = (int32_t) (dst_base + dst_length);
6324 
6325     /* Loop over the blockSize */
6326     i = blockSize;
6327 
6328     while(i > 0u)
6329     {
6330       /* copy the sample from the circular buffer to the destination buffer */
6331       *dst = circBuffer[rOffset];
6332 
6333       /* Update the input pointer */
6334       dst += dstInc;
6335 
6336       if(dst == (q15_t *) dst_end)
6337       {
6338         dst = dst_base;
6339       }
6340 
6341       /* Circularly update wOffset.  Watch out for positive and negative value */
6342       rOffset += bufferInc;
6343 
6344       if(rOffset >= L)
6345       {
6346         rOffset -= L;
6347       }
6348 
6349       /* Decrement the loop counter */
6350       i--;
6351     }
6352 
6353     /* Update the index pointer */
6354     *readOffset = rOffset;
6355   }
6356 
6357 
6358   /**
6359    * @brief Q7 Circular write function.
6360    */
6361 
arm_circularWrite_q7(q7_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q7_t * src,int32_t srcInc,uint32_t blockSize)6362   static __INLINE void arm_circularWrite_q7(
6363   q7_t * circBuffer,
6364   int32_t L,
6365   uint16_t * writeOffset,
6366   int32_t bufferInc,
6367   const q7_t * src,
6368   int32_t srcInc,
6369   uint32_t blockSize)
6370   {
6371     uint32_t i = 0u;
6372     int32_t wOffset;
6373 
6374     /* Copy the value of Index pointer that points
6375      * to the current location where the input samples to be copied */
6376     wOffset = *writeOffset;
6377 
6378     /* Loop over the blockSize */
6379     i = blockSize;
6380 
6381     while(i > 0u)
6382     {
6383       /* copy the input sample to the circular buffer */
6384       circBuffer[wOffset] = *src;
6385 
6386       /* Update the input pointer */
6387       src += srcInc;
6388 
6389       /* Circularly update wOffset.  Watch out for positive and negative value */
6390       wOffset += bufferInc;
6391       if(wOffset >= L)
6392         wOffset -= L;
6393 
6394       /* Decrement the loop counter */
6395       i--;
6396     }
6397 
6398     /* Update the index pointer */
6399     *writeOffset = wOffset;
6400   }
6401 
6402 
6403 
6404   /**
6405    * @brief Q7 Circular Read function.
6406    */
arm_circularRead_q7(q7_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q7_t * dst,q7_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)6407   static __INLINE void arm_circularRead_q7(
6408   q7_t * circBuffer,
6409   int32_t L,
6410   int32_t * readOffset,
6411   int32_t bufferInc,
6412   q7_t * dst,
6413   q7_t * dst_base,
6414   int32_t dst_length,
6415   int32_t dstInc,
6416   uint32_t blockSize)
6417   {
6418     uint32_t i = 0;
6419     int32_t rOffset, dst_end;
6420 
6421     /* Copy the value of Index pointer that points
6422      * to the current location from where the input samples to be read */
6423     rOffset = *readOffset;
6424 
6425     dst_end = (int32_t) (dst_base + dst_length);
6426 
6427     /* Loop over the blockSize */
6428     i = blockSize;
6429 
6430     while(i > 0u)
6431     {
6432       /* copy the sample from the circular buffer to the destination buffer */
6433       *dst = circBuffer[rOffset];
6434 
6435       /* Update the input pointer */
6436       dst += dstInc;
6437 
6438       if(dst == (q7_t *) dst_end)
6439       {
6440         dst = dst_base;
6441       }
6442 
6443       /* Circularly update rOffset.  Watch out for positive and negative value */
6444       rOffset += bufferInc;
6445 
6446       if(rOffset >= L)
6447       {
6448         rOffset -= L;
6449       }
6450 
6451       /* Decrement the loop counter */
6452       i--;
6453     }
6454 
6455     /* Update the index pointer */
6456     *readOffset = rOffset;
6457   }
6458 
6459 
6460   /**
6461    * @brief  Sum of the squares of the elements of a Q31 vector.
6462    * @param[in]  *pSrc is input pointer
6463    * @param[in]  blockSize is the number of samples to process
6464    * @param[out]  *pResult is output value.
6465    * @return none.
6466    */
6467 
6468   void arm_power_q31(
6469   q31_t * pSrc,
6470   uint32_t blockSize,
6471   q63_t * pResult);
6472 
6473   /**
6474    * @brief  Sum of the squares of the elements of a floating-point vector.
6475    * @param[in]  *pSrc is input pointer
6476    * @param[in]  blockSize is the number of samples to process
6477    * @param[out]  *pResult is output value.
6478    * @return none.
6479    */
6480 
6481   void arm_power_f32(
6482   float32_t * pSrc,
6483   uint32_t blockSize,
6484   float32_t * pResult);
6485 
6486   /**
6487    * @brief  Sum of the squares of the elements of a Q15 vector.
6488    * @param[in]  *pSrc is input pointer
6489    * @param[in]  blockSize is the number of samples to process
6490    * @param[out]  *pResult is output value.
6491    * @return none.
6492    */
6493 
6494   void arm_power_q15(
6495   q15_t * pSrc,
6496   uint32_t blockSize,
6497   q63_t * pResult);
6498 
6499   /**
6500    * @brief  Sum of the squares of the elements of a Q7 vector.
6501    * @param[in]  *pSrc is input pointer
6502    * @param[in]  blockSize is the number of samples to process
6503    * @param[out]  *pResult is output value.
6504    * @return none.
6505    */
6506 
6507   void arm_power_q7(
6508   q7_t * pSrc,
6509   uint32_t blockSize,
6510   q31_t * pResult);
6511 
6512   /**
6513    * @brief  Mean value of a Q7 vector.
6514    * @param[in]  *pSrc is input pointer
6515    * @param[in]  blockSize is the number of samples to process
6516    * @param[out]  *pResult is output value.
6517    * @return none.
6518    */
6519 
6520   void arm_mean_q7(
6521   q7_t * pSrc,
6522   uint32_t blockSize,
6523   q7_t * pResult);
6524 
6525   /**
6526    * @brief  Mean value of a Q15 vector.
6527    * @param[in]  *pSrc is input pointer
6528    * @param[in]  blockSize is the number of samples to process
6529    * @param[out]  *pResult is output value.
6530    * @return none.
6531    */
6532   void arm_mean_q15(
6533   q15_t * pSrc,
6534   uint32_t blockSize,
6535   q15_t * pResult);
6536 
6537   /**
6538    * @brief  Mean value of a Q31 vector.
6539    * @param[in]  *pSrc is input pointer
6540    * @param[in]  blockSize is the number of samples to process
6541    * @param[out]  *pResult is output value.
6542    * @return none.
6543    */
6544   void arm_mean_q31(
6545   q31_t * pSrc,
6546   uint32_t blockSize,
6547   q31_t * pResult);
6548 
6549   /**
6550    * @brief  Mean value of a floating-point vector.
6551    * @param[in]  *pSrc is input pointer
6552    * @param[in]  blockSize is the number of samples to process
6553    * @param[out]  *pResult is output value.
6554    * @return none.
6555    */
6556   void arm_mean_f32(
6557   float32_t * pSrc,
6558   uint32_t blockSize,
6559   float32_t * pResult);
6560 
6561   /**
6562    * @brief  Variance of the elements of a floating-point vector.
6563    * @param[in]  *pSrc is input pointer
6564    * @param[in]  blockSize is the number of samples to process
6565    * @param[out]  *pResult is output value.
6566    * @return none.
6567    */
6568 
6569   void arm_var_f32(
6570   float32_t * pSrc,
6571   uint32_t blockSize,
6572   float32_t * pResult);
6573 
6574   /**
6575    * @brief  Variance of the elements of a Q31 vector.
6576    * @param[in]  *pSrc is input pointer
6577    * @param[in]  blockSize is the number of samples to process
6578    * @param[out]  *pResult is output value.
6579    * @return none.
6580    */
6581 
6582   void arm_var_q31(
6583   q31_t * pSrc,
6584   uint32_t blockSize,
6585   q31_t * pResult);
6586 
6587   /**
6588    * @brief  Variance of the elements of a Q15 vector.
6589    * @param[in]  *pSrc is input pointer
6590    * @param[in]  blockSize is the number of samples to process
6591    * @param[out]  *pResult is output value.
6592    * @return none.
6593    */
6594 
6595   void arm_var_q15(
6596   q15_t * pSrc,
6597   uint32_t blockSize,
6598   q15_t * pResult);
6599 
6600   /**
6601    * @brief  Root Mean Square of the elements of a floating-point vector.
6602    * @param[in]  *pSrc is input pointer
6603    * @param[in]  blockSize is the number of samples to process
6604    * @param[out]  *pResult is output value.
6605    * @return none.
6606    */
6607 
6608   void arm_rms_f32(
6609   float32_t * pSrc,
6610   uint32_t blockSize,
6611   float32_t * pResult);
6612 
6613   /**
6614    * @brief  Root Mean Square of the elements of a Q31 vector.
6615    * @param[in]  *pSrc is input pointer
6616    * @param[in]  blockSize is the number of samples to process
6617    * @param[out]  *pResult is output value.
6618    * @return none.
6619    */
6620 
6621   void arm_rms_q31(
6622   q31_t * pSrc,
6623   uint32_t blockSize,
6624   q31_t * pResult);
6625 
6626   /**
6627    * @brief  Root Mean Square of the elements of a Q15 vector.
6628    * @param[in]  *pSrc is input pointer
6629    * @param[in]  blockSize is the number of samples to process
6630    * @param[out]  *pResult is output value.
6631    * @return none.
6632    */
6633 
6634   void arm_rms_q15(
6635   q15_t * pSrc,
6636   uint32_t blockSize,
6637   q15_t * pResult);
6638 
6639   /**
6640    * @brief  Standard deviation of the elements of a floating-point vector.
6641    * @param[in]  *pSrc is input pointer
6642    * @param[in]  blockSize is the number of samples to process
6643    * @param[out]  *pResult is output value.
6644    * @return none.
6645    */
6646 
6647   void arm_std_f32(
6648   float32_t * pSrc,
6649   uint32_t blockSize,
6650   float32_t * pResult);
6651 
6652   /**
6653    * @brief  Standard deviation of the elements of a Q31 vector.
6654    * @param[in]  *pSrc is input pointer
6655    * @param[in]  blockSize is the number of samples to process
6656    * @param[out]  *pResult is output value.
6657    * @return none.
6658    */
6659 
6660   void arm_std_q31(
6661   q31_t * pSrc,
6662   uint32_t blockSize,
6663   q31_t * pResult);
6664 
6665   /**
6666    * @brief  Standard deviation of the elements of a Q15 vector.
6667    * @param[in]  *pSrc is input pointer
6668    * @param[in]  blockSize is the number of samples to process
6669    * @param[out]  *pResult is output value.
6670    * @return none.
6671    */
6672 
6673   void arm_std_q15(
6674   q15_t * pSrc,
6675   uint32_t blockSize,
6676   q15_t * pResult);
6677 
6678   /**
6679    * @brief  Floating-point complex magnitude
6680    * @param[in]  *pSrc points to the complex input vector
6681    * @param[out]  *pDst points to the real output vector
6682    * @param[in]  numSamples number of complex samples in the input vector
6683    * @return none.
6684    */
6685 
6686   void arm_cmplx_mag_f32(
6687   float32_t * pSrc,
6688   float32_t * pDst,
6689   uint32_t numSamples);
6690 
6691   /**
6692    * @brief  Q31 complex magnitude
6693    * @param[in]  *pSrc points to the complex input vector
6694    * @param[out]  *pDst points to the real output vector
6695    * @param[in]  numSamples number of complex samples in the input vector
6696    * @return none.
6697    */
6698 
6699   void arm_cmplx_mag_q31(
6700   q31_t * pSrc,
6701   q31_t * pDst,
6702   uint32_t numSamples);
6703 
6704   /**
6705    * @brief  Q15 complex magnitude
6706    * @param[in]  *pSrc points to the complex input vector
6707    * @param[out]  *pDst points to the real output vector
6708    * @param[in]  numSamples number of complex samples in the input vector
6709    * @return none.
6710    */
6711 
6712   void arm_cmplx_mag_q15(
6713   q15_t * pSrc,
6714   q15_t * pDst,
6715   uint32_t numSamples);
6716 
6717   /**
6718    * @brief  Q15 complex dot product
6719    * @param[in]  *pSrcA points to the first input vector
6720    * @param[in]  *pSrcB points to the second input vector
6721    * @param[in]  numSamples number of complex samples in each vector
6722    * @param[out]  *realResult real part of the result returned here
6723    * @param[out]  *imagResult imaginary part of the result returned here
6724    * @return none.
6725    */
6726 
6727   void arm_cmplx_dot_prod_q15(
6728   q15_t * pSrcA,
6729   q15_t * pSrcB,
6730   uint32_t numSamples,
6731   q31_t * realResult,
6732   q31_t * imagResult);
6733 
6734   /**
6735    * @brief  Q31 complex dot product
6736    * @param[in]  *pSrcA points to the first input vector
6737    * @param[in]  *pSrcB points to the second input vector
6738    * @param[in]  numSamples number of complex samples in each vector
6739    * @param[out]  *realResult real part of the result returned here
6740    * @param[out]  *imagResult imaginary part of the result returned here
6741    * @return none.
6742    */
6743 
6744   void arm_cmplx_dot_prod_q31(
6745   q31_t * pSrcA,
6746   q31_t * pSrcB,
6747   uint32_t numSamples,
6748   q63_t * realResult,
6749   q63_t * imagResult);
6750 
6751   /**
6752    * @brief  Floating-point complex dot product
6753    * @param[in]  *pSrcA points to the first input vector
6754    * @param[in]  *pSrcB points to the second input vector
6755    * @param[in]  numSamples number of complex samples in each vector
6756    * @param[out]  *realResult real part of the result returned here
6757    * @param[out]  *imagResult imaginary part of the result returned here
6758    * @return none.
6759    */
6760 
6761   void arm_cmplx_dot_prod_f32(
6762   float32_t * pSrcA,
6763   float32_t * pSrcB,
6764   uint32_t numSamples,
6765   float32_t * realResult,
6766   float32_t * imagResult);
6767 
6768   /**
6769    * @brief  Q15 complex-by-real multiplication
6770    * @param[in]  *pSrcCmplx points to the complex input vector
6771    * @param[in]  *pSrcReal points to the real input vector
6772    * @param[out]  *pCmplxDst points to the complex output vector
6773    * @param[in]  numSamples number of samples in each vector
6774    * @return none.
6775    */
6776 
6777   void arm_cmplx_mult_real_q15(
6778   q15_t * pSrcCmplx,
6779   q15_t * pSrcReal,
6780   q15_t * pCmplxDst,
6781   uint32_t numSamples);
6782 
6783   /**
6784    * @brief  Q31 complex-by-real multiplication
6785    * @param[in]  *pSrcCmplx points to the complex input vector
6786    * @param[in]  *pSrcReal points to the real input vector
6787    * @param[out]  *pCmplxDst points to the complex output vector
6788    * @param[in]  numSamples number of samples in each vector
6789    * @return none.
6790    */
6791 
6792   void arm_cmplx_mult_real_q31(
6793   q31_t * pSrcCmplx,
6794   q31_t * pSrcReal,
6795   q31_t * pCmplxDst,
6796   uint32_t numSamples);
6797 
6798   /**
6799    * @brief  Floating-point complex-by-real multiplication
6800    * @param[in]  *pSrcCmplx points to the complex input vector
6801    * @param[in]  *pSrcReal points to the real input vector
6802    * @param[out]  *pCmplxDst points to the complex output vector
6803    * @param[in]  numSamples number of samples in each vector
6804    * @return none.
6805    */
6806 
6807   void arm_cmplx_mult_real_f32(
6808   float32_t * pSrcCmplx,
6809   float32_t * pSrcReal,
6810   float32_t * pCmplxDst,
6811   uint32_t numSamples);
6812 
6813   /**
6814    * @brief  Minimum value of a Q7 vector.
6815    * @param[in]  *pSrc is input pointer
6816    * @param[in]  blockSize is the number of samples to process
6817    * @param[out]  *result is output pointer
6818    * @param[in]  index is the array index of the minimum value in the input buffer.
6819    * @return none.
6820    */
6821 
6822   void arm_min_q7(
6823   q7_t * pSrc,
6824   uint32_t blockSize,
6825   q7_t * result,
6826   uint32_t * index);
6827 
6828   /**
6829    * @brief  Minimum value of a Q15 vector.
6830    * @param[in]  *pSrc is input pointer
6831    * @param[in]  blockSize is the number of samples to process
6832    * @param[out]  *pResult is output pointer
6833    * @param[in]  *pIndex is the array index of the minimum value in the input buffer.
6834    * @return none.
6835    */
6836 
6837   void arm_min_q15(
6838   q15_t * pSrc,
6839   uint32_t blockSize,
6840   q15_t * pResult,
6841   uint32_t * pIndex);
6842 
6843   /**
6844    * @brief  Minimum value of a Q31 vector.
6845    * @param[in]  *pSrc is input pointer
6846    * @param[in]  blockSize is the number of samples to process
6847    * @param[out]  *pResult is output pointer
6848    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
6849    * @return none.
6850    */
6851   void arm_min_q31(
6852   q31_t * pSrc,
6853   uint32_t blockSize,
6854   q31_t * pResult,
6855   uint32_t * pIndex);
6856 
6857   /**
6858    * @brief  Minimum value of a floating-point vector.
6859    * @param[in]  *pSrc is input pointer
6860    * @param[in]  blockSize is the number of samples to process
6861    * @param[out]  *pResult is output pointer
6862    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
6863    * @return none.
6864    */
6865 
6866   void arm_min_f32(
6867   float32_t * pSrc,
6868   uint32_t blockSize,
6869   float32_t * pResult,
6870   uint32_t * pIndex);
6871 
6872 /**
6873  * @brief Maximum value of a Q7 vector.
6874  * @param[in]       *pSrc points to the input buffer
6875  * @param[in]       blockSize length of the input vector
6876  * @param[out]      *pResult maximum value returned here
6877  * @param[out]      *pIndex index of maximum value returned here
6878  * @return none.
6879  */
6880 
6881   void arm_max_q7(
6882   q7_t * pSrc,
6883   uint32_t blockSize,
6884   q7_t * pResult,
6885   uint32_t * pIndex);
6886 
6887 /**
6888  * @brief Maximum value of a Q15 vector.
6889  * @param[in]       *pSrc points to the input buffer
6890  * @param[in]       blockSize length of the input vector
6891  * @param[out]      *pResult maximum value returned here
6892  * @param[out]      *pIndex index of maximum value returned here
6893  * @return none.
6894  */
6895 
6896   void arm_max_q15(
6897   q15_t * pSrc,
6898   uint32_t blockSize,
6899   q15_t * pResult,
6900   uint32_t * pIndex);
6901 
6902 /**
6903  * @brief Maximum value of a Q31 vector.
6904  * @param[in]       *pSrc points to the input buffer
6905  * @param[in]       blockSize length of the input vector
6906  * @param[out]      *pResult maximum value returned here
6907  * @param[out]      *pIndex index of maximum value returned here
6908  * @return none.
6909  */
6910 
6911   void arm_max_q31(
6912   q31_t * pSrc,
6913   uint32_t blockSize,
6914   q31_t * pResult,
6915   uint32_t * pIndex);
6916 
6917 /**
6918  * @brief Maximum value of a floating-point vector.
6919  * @param[in]       *pSrc points to the input buffer
6920  * @param[in]       blockSize length of the input vector
6921  * @param[out]      *pResult maximum value returned here
6922  * @param[out]      *pIndex index of maximum value returned here
6923  * @return none.
6924  */
6925 
6926   void arm_max_f32(
6927   float32_t * pSrc,
6928   uint32_t blockSize,
6929   float32_t * pResult,
6930   uint32_t * pIndex);
6931 
6932   /**
6933    * @brief  Q15 complex-by-complex multiplication
6934    * @param[in]  *pSrcA points to the first input vector
6935    * @param[in]  *pSrcB points to the second input vector
6936    * @param[out]  *pDst  points to the output vector
6937    * @param[in]  numSamples number of complex samples in each vector
6938    * @return none.
6939    */
6940 
6941   void arm_cmplx_mult_cmplx_q15(
6942   q15_t * pSrcA,
6943   q15_t * pSrcB,
6944   q15_t * pDst,
6945   uint32_t numSamples);
6946 
6947   /**
6948    * @brief  Q31 complex-by-complex multiplication
6949    * @param[in]  *pSrcA points to the first input vector
6950    * @param[in]  *pSrcB points to the second input vector
6951    * @param[out]  *pDst  points to the output vector
6952    * @param[in]  numSamples number of complex samples in each vector
6953    * @return none.
6954    */
6955 
6956   void arm_cmplx_mult_cmplx_q31(
6957   q31_t * pSrcA,
6958   q31_t * pSrcB,
6959   q31_t * pDst,
6960   uint32_t numSamples);
6961 
6962   /**
6963    * @brief  Floating-point complex-by-complex multiplication
6964    * @param[in]  *pSrcA points to the first input vector
6965    * @param[in]  *pSrcB points to the second input vector
6966    * @param[out]  *pDst  points to the output vector
6967    * @param[in]  numSamples number of complex samples in each vector
6968    * @return none.
6969    */
6970 
6971   void arm_cmplx_mult_cmplx_f32(
6972   float32_t * pSrcA,
6973   float32_t * pSrcB,
6974   float32_t * pDst,
6975   uint32_t numSamples);
6976 
6977   /**
6978    * @brief Converts the elements of the floating-point vector to Q31 vector.
6979    * @param[in]       *pSrc points to the floating-point input vector
6980    * @param[out]      *pDst points to the Q31 output vector
6981    * @param[in]       blockSize length of the input vector
6982    * @return none.
6983    */
6984   void arm_float_to_q31(
6985   float32_t * pSrc,
6986   q31_t * pDst,
6987   uint32_t blockSize);
6988 
6989   /**
6990    * @brief Converts the elements of the floating-point vector to Q15 vector.
6991    * @param[in]       *pSrc points to the floating-point input vector
6992    * @param[out]      *pDst points to the Q15 output vector
6993    * @param[in]       blockSize length of the input vector
6994    * @return          none
6995    */
6996   void arm_float_to_q15(
6997   float32_t * pSrc,
6998   q15_t * pDst,
6999   uint32_t blockSize);
7000 
7001   /**
7002    * @brief Converts the elements of the floating-point vector to Q7 vector.
7003    * @param[in]       *pSrc points to the floating-point input vector
7004    * @param[out]      *pDst points to the Q7 output vector
7005    * @param[in]       blockSize length of the input vector
7006    * @return          none
7007    */
7008   void arm_float_to_q7(
7009   float32_t * pSrc,
7010   q7_t * pDst,
7011   uint32_t blockSize);
7012 
7013 
7014   /**
7015    * @brief  Converts the elements of the Q31 vector to Q15 vector.
7016    * @param[in]  *pSrc is input pointer
7017    * @param[out]  *pDst is output pointer
7018    * @param[in]  blockSize is the number of samples to process
7019    * @return none.
7020    */
7021   void arm_q31_to_q15(
7022   q31_t * pSrc,
7023   q15_t * pDst,
7024   uint32_t blockSize);
7025 
7026   /**
7027    * @brief  Converts the elements of the Q31 vector to Q7 vector.
7028    * @param[in]  *pSrc is input pointer
7029    * @param[out]  *pDst is output pointer
7030    * @param[in]  blockSize is the number of samples to process
7031    * @return none.
7032    */
7033   void arm_q31_to_q7(
7034   q31_t * pSrc,
7035   q7_t * pDst,
7036   uint32_t blockSize);
7037 
7038   /**
7039    * @brief  Converts the elements of the Q15 vector to floating-point vector.
7040    * @param[in]  *pSrc is input pointer
7041    * @param[out]  *pDst is output pointer
7042    * @param[in]  blockSize is the number of samples to process
7043    * @return none.
7044    */
7045   void arm_q15_to_float(
7046   q15_t * pSrc,
7047   float32_t * pDst,
7048   uint32_t blockSize);
7049 
7050 
7051   /**
7052    * @brief  Converts the elements of the Q15 vector to Q31 vector.
7053    * @param[in]  *pSrc is input pointer
7054    * @param[out]  *pDst is output pointer
7055    * @param[in]  blockSize is the number of samples to process
7056    * @return none.
7057    */
7058   void arm_q15_to_q31(
7059   q15_t * pSrc,
7060   q31_t * pDst,
7061   uint32_t blockSize);
7062 
7063 
7064   /**
7065    * @brief  Converts the elements of the Q15 vector to Q7 vector.
7066    * @param[in]  *pSrc is input pointer
7067    * @param[out]  *pDst is output pointer
7068    * @param[in]  blockSize is the number of samples to process
7069    * @return none.
7070    */
7071   void arm_q15_to_q7(
7072   q15_t * pSrc,
7073   q7_t * pDst,
7074   uint32_t blockSize);
7075 
7076 
7077   /**
7078    * @ingroup groupInterpolation
7079    */
7080 
7081   /**
7082    * @defgroup BilinearInterpolate Bilinear Interpolation
7083    *
7084    * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
7085    * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
7086    * determines values between the grid points.
7087    * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension.
7088    * Bilinear interpolation is often used in image processing to rescale images.
7089    * The CMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
7090    *
7091    * <b>Algorithm</b>
7092    * \par
7093    * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
7094    * For floating-point, the instance structure is defined as:
7095    * <pre>
7096    *   typedef struct
7097    *   {
7098    *     uint16_t numRows;
7099    *     uint16_t numCols;
7100    *     float32_t *pData;
7101    * } arm_bilinear_interp_instance_f32;
7102    * </pre>
7103    *
7104    * \par
7105    * where <code>numRows</code> specifies the number of rows in the table;
7106    * <code>numCols</code> specifies the number of columns in the table;
7107    * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
7108    * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
7109    * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
7110    *
7111    * \par
7112    * Let <code>(x, y)</code> specify the desired interpolation point.  Then define:
7113    * <pre>
7114    *     XF = floor(x)
7115    *     YF = floor(y)
7116    * </pre>
7117    * \par
7118    * The interpolated output point is computed as:
7119    * <pre>
7120    *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
7121    *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
7122    *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
7123    *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
7124    * </pre>
7125    * Note that the coordinates (x, y) contain integer and fractional components.
7126    * The integer components specify which portion of the table to use while the
7127    * fractional components control the interpolation processor.
7128    *
7129    * \par
7130    * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
7131    */
7132 
7133   /**
7134    * @addtogroup BilinearInterpolate
7135    * @{
7136    */
7137 
7138   /**
7139   *
7140   * @brief  Floating-point bilinear interpolation.
7141   * @param[in,out] *S points to an instance of the interpolation structure.
7142   * @param[in] X interpolation coordinate.
7143   * @param[in] Y interpolation coordinate.
7144   * @return out interpolated value.
7145   */
7146 
7147 
arm_bilinear_interp_f32(const arm_bilinear_interp_instance_f32 * S,float32_t X,float32_t Y)7148   static __INLINE float32_t arm_bilinear_interp_f32(
7149   const arm_bilinear_interp_instance_f32 * S,
7150   float32_t X,
7151   float32_t Y)
7152   {
7153     float32_t out;
7154     float32_t f00, f01, f10, f11;
7155     float32_t *pData = S->pData;
7156     int32_t xIndex, yIndex, index;
7157     float32_t xdiff, ydiff;
7158     float32_t b1, b2, b3, b4;
7159 
7160     xIndex = (int32_t) X;
7161     yIndex = (int32_t) Y;
7162 
7163     /* Care taken for table outside boundary */
7164     /* Returns zero output when values are outside table boundary */
7165     if(xIndex < 0 || xIndex > (S->numRows - 1) || yIndex < 0
7166        || yIndex > (S->numCols - 1))
7167     {
7168       return (0);
7169     }
7170 
7171     /* Calculation of index for two nearest points in X-direction */
7172     index = (xIndex - 1) + (yIndex - 1) * S->numCols;
7173 
7174 
7175     /* Read two nearest points in X-direction */
7176     f00 = pData[index];
7177     f01 = pData[index + 1];
7178 
7179     /* Calculation of index for two nearest points in Y-direction */
7180     index = (xIndex - 1) + (yIndex) * S->numCols;
7181 
7182 
7183     /* Read two nearest points in Y-direction */
7184     f10 = pData[index];
7185     f11 = pData[index + 1];
7186 
7187     /* Calculation of intermediate values */
7188     b1 = f00;
7189     b2 = f01 - f00;
7190     b3 = f10 - f00;
7191     b4 = f00 - f01 - f10 + f11;
7192 
7193     /* Calculation of fractional part in X */
7194     xdiff = X - xIndex;
7195 
7196     /* Calculation of fractional part in Y */
7197     ydiff = Y - yIndex;
7198 
7199     /* Calculation of bi-linear interpolated output */
7200     out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
7201 
7202     /* return to application */
7203     return (out);
7204 
7205   }
7206 
7207   /**
7208   *
7209   * @brief  Q31 bilinear interpolation.
7210   * @param[in,out] *S points to an instance of the interpolation structure.
7211   * @param[in] X interpolation coordinate in 12.20 format.
7212   * @param[in] Y interpolation coordinate in 12.20 format.
7213   * @return out interpolated value.
7214   */
7215 
arm_bilinear_interp_q31(arm_bilinear_interp_instance_q31 * S,q31_t X,q31_t Y)7216   static __INLINE q31_t arm_bilinear_interp_q31(
7217   arm_bilinear_interp_instance_q31 * S,
7218   q31_t X,
7219   q31_t Y)
7220   {
7221     q31_t out;                                   /* Temporary output */
7222     q31_t acc = 0;                               /* output */
7223     q31_t xfract, yfract;                        /* X, Y fractional parts */
7224     q31_t x1, x2, y1, y2;                        /* Nearest output values */
7225     int32_t rI, cI;                              /* Row and column indices */
7226     q31_t *pYData = S->pData;                    /* pointer to output table values */
7227     uint32_t nCols = S->numCols;                 /* num of rows */
7228 
7229 
7230     /* Input is in 12.20 format */
7231     /* 12 bits for the table index */
7232     /* Index value calculation */
7233     rI = ((X & 0xFFF00000) >> 20u);
7234 
7235     /* Input is in 12.20 format */
7236     /* 12 bits for the table index */
7237     /* Index value calculation */
7238     cI = ((Y & 0xFFF00000) >> 20u);
7239 
7240     /* Care taken for table outside boundary */
7241     /* Returns zero output when values are outside table boundary */
7242     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7243     {
7244       return (0);
7245     }
7246 
7247     /* 20 bits for the fractional part */
7248     /* shift left xfract by 11 to keep 1.31 format */
7249     xfract = (X & 0x000FFFFF) << 11u;
7250 
7251     /* Read two nearest output values from the index */
7252     x1 = pYData[(rI) + nCols * (cI)];
7253     x2 = pYData[(rI) + nCols * (cI) + 1u];
7254 
7255     /* 20 bits for the fractional part */
7256     /* shift left yfract by 11 to keep 1.31 format */
7257     yfract = (Y & 0x000FFFFF) << 11u;
7258 
7259     /* Read two nearest output values from the index */
7260     y1 = pYData[(rI) + nCols * (cI + 1)];
7261     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7262 
7263     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */
7264     out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32));
7265     acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32));
7266 
7267     /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
7268     out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32));
7269     acc += ((q31_t) ((q63_t) out * (xfract) >> 32));
7270 
7271     /* y1 * (1 - xfract) * (yfract)  in 3.29(q29) and adding to acc */
7272     out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32));
7273     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
7274 
7275     /* y2 * (xfract) * (yfract)  in 3.29(q29) and adding to acc */
7276     out = ((q31_t) ((q63_t) y2 * (xfract) >> 32));
7277     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
7278 
7279     /* Convert acc to 1.31(q31) format */
7280     return (acc << 2u);
7281 
7282   }
7283 
7284   /**
7285   * @brief  Q15 bilinear interpolation.
7286   * @param[in,out] *S points to an instance of the interpolation structure.
7287   * @param[in] X interpolation coordinate in 12.20 format.
7288   * @param[in] Y interpolation coordinate in 12.20 format.
7289   * @return out interpolated value.
7290   */
7291 
arm_bilinear_interp_q15(arm_bilinear_interp_instance_q15 * S,q31_t X,q31_t Y)7292   static __INLINE q15_t arm_bilinear_interp_q15(
7293   arm_bilinear_interp_instance_q15 * S,
7294   q31_t X,
7295   q31_t Y)
7296   {
7297     q63_t acc = 0;                               /* output */
7298     q31_t out;                                   /* Temporary output */
7299     q15_t x1, x2, y1, y2;                        /* Nearest output values */
7300     q31_t xfract, yfract;                        /* X, Y fractional parts */
7301     int32_t rI, cI;                              /* Row and column indices */
7302     q15_t *pYData = S->pData;                    /* pointer to output table values */
7303     uint32_t nCols = S->numCols;                 /* num of rows */
7304 
7305     /* Input is in 12.20 format */
7306     /* 12 bits for the table index */
7307     /* Index value calculation */
7308     rI = ((X & 0xFFF00000) >> 20);
7309 
7310     /* Input is in 12.20 format */
7311     /* 12 bits for the table index */
7312     /* Index value calculation */
7313     cI = ((Y & 0xFFF00000) >> 20);
7314 
7315     /* Care taken for table outside boundary */
7316     /* Returns zero output when values are outside table boundary */
7317     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7318     {
7319       return (0);
7320     }
7321 
7322     /* 20 bits for the fractional part */
7323     /* xfract should be in 12.20 format */
7324     xfract = (X & 0x000FFFFF);
7325 
7326     /* Read two nearest output values from the index */
7327     x1 = pYData[(rI) + nCols * (cI)];
7328     x2 = pYData[(rI) + nCols * (cI) + 1u];
7329 
7330 
7331     /* 20 bits for the fractional part */
7332     /* yfract should be in 12.20 format */
7333     yfract = (Y & 0x000FFFFF);
7334 
7335     /* Read two nearest output values from the index */
7336     y1 = pYData[(rI) + nCols * (cI + 1)];
7337     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7338 
7339     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */
7340 
7341     /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
7342     /* convert 13.35 to 13.31 by right shifting  and out is in 1.31 */
7343     out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
7344     acc = ((q63_t) out * (0xFFFFF - yfract));
7345 
7346     /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
7347     out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
7348     acc += ((q63_t) out * (xfract));
7349 
7350     /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
7351     out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
7352     acc += ((q63_t) out * (yfract));
7353 
7354     /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
7355     out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
7356     acc += ((q63_t) out * (yfract));
7357 
7358     /* acc is in 13.51 format and down shift acc by 36 times */
7359     /* Convert out to 1.15 format */
7360     return (acc >> 36);
7361 
7362   }
7363 
7364   /**
7365   * @brief  Q7 bilinear interpolation.
7366   * @param[in,out] *S points to an instance of the interpolation structure.
7367   * @param[in] X interpolation coordinate in 12.20 format.
7368   * @param[in] Y interpolation coordinate in 12.20 format.
7369   * @return out interpolated value.
7370   */
7371 
arm_bilinear_interp_q7(arm_bilinear_interp_instance_q7 * S,q31_t X,q31_t Y)7372   static __INLINE q7_t arm_bilinear_interp_q7(
7373   arm_bilinear_interp_instance_q7 * S,
7374   q31_t X,
7375   q31_t Y)
7376   {
7377     q63_t acc = 0;                               /* output */
7378     q31_t out;                                   /* Temporary output */
7379     q31_t xfract, yfract;                        /* X, Y fractional parts */
7380     q7_t x1, x2, y1, y2;                         /* Nearest output values */
7381     int32_t rI, cI;                              /* Row and column indices */
7382     q7_t *pYData = S->pData;                     /* pointer to output table values */
7383     uint32_t nCols = S->numCols;                 /* num of rows */
7384 
7385     /* Input is in 12.20 format */
7386     /* 12 bits for the table index */
7387     /* Index value calculation */
7388     rI = ((X & 0xFFF00000) >> 20);
7389 
7390     /* Input is in 12.20 format */
7391     /* 12 bits for the table index */
7392     /* Index value calculation */
7393     cI = ((Y & 0xFFF00000) >> 20);
7394 
7395     /* Care taken for table outside boundary */
7396     /* Returns zero output when values are outside table boundary */
7397     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7398     {
7399       return (0);
7400     }
7401 
7402     /* 20 bits for the fractional part */
7403     /* xfract should be in 12.20 format */
7404     xfract = (X & 0x000FFFFF);
7405 
7406     /* Read two nearest output values from the index */
7407     x1 = pYData[(rI) + nCols * (cI)];
7408     x2 = pYData[(rI) + nCols * (cI) + 1u];
7409 
7410 
7411     /* 20 bits for the fractional part */
7412     /* yfract should be in 12.20 format */
7413     yfract = (Y & 0x000FFFFF);
7414 
7415     /* Read two nearest output values from the index */
7416     y1 = pYData[(rI) + nCols * (cI + 1)];
7417     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7418 
7419     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */
7420     out = ((x1 * (0xFFFFF - xfract)));
7421     acc = (((q63_t) out * (0xFFFFF - yfract)));
7422 
7423     /* x2 * (xfract) * (1-yfract)  in 2.22 and adding to acc */
7424     out = ((x2 * (0xFFFFF - yfract)));
7425     acc += (((q63_t) out * (xfract)));
7426 
7427     /* y1 * (1 - xfract) * (yfract)  in 2.22 and adding to acc */
7428     out = ((y1 * (0xFFFFF - xfract)));
7429     acc += (((q63_t) out * (yfract)));
7430 
7431     /* y2 * (xfract) * (yfract)  in 2.22 and adding to acc */
7432     out = ((y2 * (yfract)));
7433     acc += (((q63_t) out * (xfract)));
7434 
7435     /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
7436     return (acc >> 40);
7437 
7438   }
7439 
7440   /**
7441    * @} end of BilinearInterpolate group
7442    */
7443 
7444 
7445 //SMMLAR
7446 #define multAcc_32x32_keep32_R(a, x, y) \
7447     a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
7448 
7449 //SMMLSR
7450 #define multSub_32x32_keep32_R(a, x, y) \
7451     a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
7452 
7453 //SMMULR
7454 #define mult_32x32_keep32_R(a, x, y) \
7455     a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
7456 
7457 //SMMLA
7458 #define multAcc_32x32_keep32(a, x, y) \
7459     a += (q31_t) (((q63_t) x * y) >> 32)
7460 
7461 //SMMLS
7462 #define multSub_32x32_keep32(a, x, y) \
7463     a -= (q31_t) (((q63_t) x * y) >> 32)
7464 
7465 //SMMUL
7466 #define mult_32x32_keep32(a, x, y) \
7467     a = (q31_t) (((q63_t) x * y ) >> 32)
7468 
7469 
7470 #if defined ( __CC_ARM ) //Keil
7471 
7472 //Enter low optimization region - place directly above function definition
7473     #ifdef ARM_MATH_CM4
7474       #define LOW_OPTIMIZATION_ENTER \
7475          _Pragma ("push")         \
7476          _Pragma ("O1")
7477     #else
7478       #define LOW_OPTIMIZATION_ENTER
7479     #endif
7480 
7481 //Exit low optimization region - place directly after end of function definition
7482     #ifdef ARM_MATH_CM4
7483       #define LOW_OPTIMIZATION_EXIT \
7484          _Pragma ("pop")
7485     #else
7486       #define LOW_OPTIMIZATION_EXIT
7487     #endif
7488 
7489 //Enter low optimization region - place directly above function definition
7490   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7491 
7492 //Exit low optimization region - place directly after end of function definition
7493   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7494 
7495 #elif defined(__ICCARM__) //IAR
7496 
7497 //Enter low optimization region - place directly above function definition
7498     #ifdef ARM_MATH_CM4
7499       #define LOW_OPTIMIZATION_ENTER \
7500          _Pragma ("optimize=low")
7501     #else
7502       #define LOW_OPTIMIZATION_ENTER
7503     #endif
7504 
7505 //Exit low optimization region - place directly after end of function definition
7506   #define LOW_OPTIMIZATION_EXIT
7507 
7508 //Enter low optimization region - place directly above function definition
7509     #ifdef ARM_MATH_CM4
7510       #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \
7511          _Pragma ("optimize=low")
7512     #else
7513       #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7514     #endif
7515 
7516 //Exit low optimization region - place directly after end of function definition
7517   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7518 
7519 #elif defined(__GNUC__)
7520 
7521   #define LOW_OPTIMIZATION_ENTER __attribute__(( optimize("-O1") ))
7522 
7523   #define LOW_OPTIMIZATION_EXIT
7524 
7525   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7526 
7527   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7528 
7529 #elif defined(__CSMC__)		// Cosmic
7530 
7531 #define LOW_OPTIMIZATION_ENTER
7532 #define LOW_OPTIMIZATION_EXIT
7533 #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7534 #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7535 
7536 #elif defined(__TASKING__)		// TASKING
7537 
7538 #define LOW_OPTIMIZATION_ENTER
7539 #define LOW_OPTIMIZATION_EXIT
7540 #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7541 #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7542 
7543 #endif
7544 
7545 
7546 #ifdef	__cplusplus
7547 }
7548 #endif
7549 
7550 
7551 #endif /* _ARM_MATH_H */
7552 
7553 /**
7554  *
7555  * End of file.
7556  */
7557