1 /* ----------------------------------------------------------------------
2  * Copyright (C) 2010-2011 ARM Limited. All rights reserved.
3  *
4  * $Date:        15. July 2011
5  * $Revision: 	V1.0.10
6  *
7  * Project: 	    CMSIS DSP Library
8  * Title:	     arm_math.h
9  *
10  * Description:	 Public header file for CMSIS DSP Library
11  *
12  * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13  *
14  * Version 1.0.10 2011/7/15
15  *    Big Endian support added and Merged M0 and M3/M4 Source code.
16  *
17  * Version 1.0.3 2010/11/29
18  *    Re-organized the CMSIS folders and updated documentation.
19  *
20  * Version 1.0.2 2010/11/11
21  *    Documentation updated.
22  *
23  * Version 1.0.1 2010/10/05
24  *    Production release and review comments incorporated.
25  *
26  * Version 1.0.0 2010/09/20
27  *    Production release and review comments incorporated.
28  * -------------------------------------------------------------------- */
29 
30 /**
31    \mainpage CMSIS DSP Software Library
32    *
33    * <b>Introduction</b>
34    *
35    * This user manual describes the CMSIS DSP software library,
36    * a suite of common signal processing functions for use on Cortex-M processor based devices.
37    *
38    * The library is divided into a number of modules each covering a specific category:
39    * - Basic math functions
40    * - Fast math functions
41    * - Complex math functions
42    * - Filters
43    * - Matrix functions
44    * - Transforms
45    * - Motor control functions
46    * - Statistical functions
47    * - Support functions
48    * - Interpolation functions
49    *
50    * The library has separate functions for operating on 8-bit integers, 16-bit integers,
51    * 32-bit integer and 32-bit floating-point values.
52    *
53    * <b>Processor Support</b>
54    *
55    * The library is completely written in C and is fully CMSIS compliant.
56    * High performance is achieved through maximum use of Cortex-M4 intrinsics.
57    *
58    * The supplied library source code also builds and runs on the Cortex-M3 and Cortex-M0 processor,
59    * with the DSP intrinsics being emulated through software.
60    *
61    *
62    * <b>Toolchain Support</b>
63    *
64    * The library has been developed and tested with MDK-ARM version 4.21.
65    * The library is being tested in GCC and IAR toolchains and updates on this activity will be made available shortly.
66    *
67    * <b>Using the Library</b>
68    *
69    * The library installer contains prebuilt versions of the libraries in the <code>Lib</code> folder.
70    * - arm_cortexM4lf_math.lib (Little endian and Floating Point Unit on Cortex-M4)
71    * - arm_cortexM4bf_math.lib (Big endian and Floating Point Unit on Cortex-M4)
72    * - arm_cortexM4l_math.lib (Little endian on Cortex-M4)
73    * - arm_cortexM4b_math.lib (Big endian on Cortex-M4)
74    * - arm_cortexM3l_math.lib (Little endian on Cortex-M3)
75    * - arm_cortexM3b_math.lib (Big endian on Cortex-M3)
76    * - arm_cortexM0l_math.lib (Little endian on Cortex-M0)
77    * - arm_cortexM0b_math.lib (Big endian on Cortex-M3)
78    *
79    * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
80    * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single
81    * public header file <code>arm_math.h</code> for Cortex-M4/M3/M0 with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
82    * Define the appropriate pre processor MACRO ARM_MATH_CM4 or  ARM_MATH_CM3 or
83    * ARM_MATH_CM0 depending on the target processor in the application.
84    *
85    * <b>Examples</b>
86    *
87    * The library ships with a number of examples which demonstrate how to use the library functions.
88    *
89    * <b>Building the Library</b>
90    *
91    * The library installer contains project files to re build libraries on MDK Tool chain in the <code>CMSIS\\DSP_Lib\\Source\\ARM</code> folder.
92    * - arm_cortexM0b_math.uvproj
93    * - arm_cortexM0l_math.uvproj
94    * - arm_cortexM3b_math.uvproj
95    * - arm_cortexM3l_math.uvproj
96    * - arm_cortexM4b_math.uvproj
97    * - arm_cortexM4l_math.uvproj
98    * - arm_cortexM4bf_math.uvproj
99    * - arm_cortexM4lf_math.uvproj
100    *
101    * Each library project have differant pre-processor macros.
102    *
103    * <b>ARM_MATH_CMx:</b>
104    * Define macro ARM_MATH_CM4 for building the library on Cortex-M4 target, ARM_MATH_CM3 for building library on Cortex-M3 target
105    * and ARM_MATH_CM0 for building library on cortex-M0 target.
106    *
107    * <b>ARM_MATH_BIG_ENDIAN:</b>
108    * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
109    *
110    * <b>ARM_MATH_MATRIX_CHECK:</b>
111    * Define macro for checking on the input and output sizes of matrices
112    *
113    * <b>ARM_MATH_ROUNDING:</b>
114    * Define macro for rounding on support functions
115    *
116    * <b>__FPU_PRESENT:</b>
117    * Initialize macro __FPU_PRESENT = 1 when building on FPU supported Targets. Enable this macro for M4bf and M4lf libraries
118    *
119    *
120    * The project can be built by opening the appropriate project in MDK-ARM 4.21 chain and defining the optional pre processor MACROs detailed above.
121    *
122    * <b>Copyright Notice</b>
123    *
124    * Copyright (C) 2010 ARM Limited. All rights reserved.
125    */
126 
127 
128 /**
129  * @defgroup groupMath Basic Math Functions
130  */
131 
132 /**
133  * @defgroup groupFastMath Fast Math Functions
134  * This set of functions provides a fast approximation to sine, cosine, and square root.
135  * As compared to most of the other functions in the CMSIS math library, the fast math functions
136  * operate on individual values and not arrays.
137  * There are separate functions for Q15, Q31, and floating-point data.
138  *
139  */
140 
141 /**
142  * @defgroup groupCmplxMath Complex Math Functions
143  * This set of functions operates on complex data vectors.
144  * The data in the complex arrays is stored in an interleaved fashion
145  * (real, imag, real, imag, ...).
146  * In the API functions, the number of samples in a complex array refers
147  * to the number of complex values; the array contains twice this number of
148  * real values.
149  */
150 
151 /**
152  * @defgroup groupFilters Filtering Functions
153  */
154 
155 /**
156  * @defgroup groupMatrix Matrix Functions
157  *
158  * This set of functions provides basic matrix math operations.
159  * The functions operate on matrix data structures.  For example,
160  * the type
161  * definition for the floating-point matrix structure is shown
162  * below:
163  * <pre>
164  *     typedef struct
165  *     {
166  *       uint16_t numRows;     // number of rows of the matrix.
167  *       uint16_t numCols;     // number of columns of the matrix.
168  *       float32_t *pData;     // points to the data of the matrix.
169  *     } arm_matrix_instance_f32;
170  * </pre>
171  * There are similar definitions for Q15 and Q31 data types.
172  *
173  * The structure specifies the size of the matrix and then points to
174  * an array of data.  The array is of size <code>numRows X numCols</code>
175  * and the values are arranged in row order.  That is, the
176  * matrix element (i, j) is stored at:
177  * <pre>
178  *     pData[i*numCols + j]
179  * </pre>
180  *
181  * \par Init Functions
182  * There is an associated initialization function for each type of matrix
183  * data structure.
184  * The initialization function sets the values of the internal structure fields.
185  * Refer to the function <code>arm_mat_init_f32()</code>, <code>arm_mat_init_q31()</code>
186  * and <code>arm_mat_init_q15()</code> for floating-point, Q31 and Q15 types,  respectively.
187  *
188  * \par
189  * Use of the initialization function is optional. However, if initialization function is used
190  * then the instance structure cannot be placed into a const data section.
191  * To place the instance structure in a const data
192  * section, manually initialize the data structure.  For example:
193  * <pre>
194  * <code>arm_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
195  * <code>arm_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
196  * <code>arm_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
197  * </pre>
198  * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
199  * specifies the number of columns, and <code>pData</code> points to the
200  * data array.
201  *
202  * \par Size Checking
203  * By default all of the matrix functions perform size checking on the input and
204  * output matrices.  For example, the matrix addition function verifies that the
205  * two input matrices and the output matrix all have the same number of rows and
206  * columns.  If the size check fails the functions return:
207  * <pre>
208  *     ARM_MATH_SIZE_MISMATCH
209  * </pre>
210  * Otherwise the functions return
211  * <pre>
212  *     ARM_MATH_SUCCESS
213  * </pre>
214  * There is some overhead associated with this matrix size checking.
215  * The matrix size checking is enabled via the \#define
216  * <pre>
217  *     ARM_MATH_MATRIX_CHECK
218  * </pre>
219  * within the library project settings.  By default this macro is defined
220  * and size checking is enabled.  By changing the project settings and
221  * undefining this macro size checking is eliminated and the functions
222  * run a bit faster.  With size checking disabled the functions always
223  * return <code>ARM_MATH_SUCCESS</code>.
224  */
225 
226 /**
227  * @defgroup groupTransforms Transform Functions
228  */
229 
230 /**
231  * @defgroup groupController Controller Functions
232  */
233 
234 /**
235  * @defgroup groupStats Statistics Functions
236  */
237 /**
238  * @defgroup groupSupport Support Functions
239  */
240 
241 /**
242  * @defgroup groupInterpolation Interpolation Functions
243  * These functions perform 1- and 2-dimensional interpolation of data.
244  * Linear interpolation is used for 1-dimensional data and
245  * bilinear interpolation is used for 2-dimensional data.
246  */
247 
248 /**
249  * @defgroup groupExamples Examples
250  */
251 #ifndef _ARM_MATH_H
252 #define _ARM_MATH_H
253 
254 #define __CMSIS_GENERIC              /* disable NVIC and Systick functions */
255 
256 #if defined (ARM_MATH_CM4)
257   #include "core_cm4.h"
258 #elif defined (ARM_MATH_CM3)
259   #include "core_cm3.h"
260 #elif defined (ARM_MATH_CM0)
261   #include "core_cm0.h"
262 #else
263 #include "ARMCM4.h"
264 #warning "Define either ARM_MATH_CM4 OR ARM_MATH_CM3...By Default building on ARM_MATH_CM4....."
265 #endif
266 
267 #undef  __CMSIS_GENERIC              /* enable NVIC and Systick functions */
268 #include "string.h"
269     #include "math.h"
270 #ifdef	__cplusplus
271 extern "C"
272 {
273 #endif
274 
275 
276   /**
277    * @brief Macros required for reciprocal calculation in Normalized LMS
278    */
279 
280 #define DELTA_Q31 			(0x100)
281 #define DELTA_Q15 			0x5
282 #define INDEX_MASK 			0x0000003F
283 #define PI					3.14159265358979f
284 
285   /**
286    * @brief Macros required for SINE and COSINE Fast math approximations
287    */
288 
289 #define TABLE_SIZE			256
290 #define TABLE_SPACING_Q31	0x800000
291 #define TABLE_SPACING_Q15	0x80
292 
293   /**
294    * @brief Macros required for SINE and COSINE Controller functions
295    */
296   /* 1.31(q31) Fixed value of 2/360 */
297   /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
298 #define INPUT_SPACING			0xB60B61
299 
300 
301   /**
302    * @brief Error status returned by some functions in the library.
303    */
304 
305   typedef enum
306     {
307       ARM_MATH_SUCCESS = 0,              /**< No error */
308       ARM_MATH_ARGUMENT_ERROR = -1,      /**< One or more arguments are incorrect */
309       ARM_MATH_LENGTH_ERROR = -2,        /**< Length of data buffer is incorrect */
310       ARM_MATH_SIZE_MISMATCH = -3,       /**< Size of matrices is not compatible with the operation. */
311       ARM_MATH_NANINF = -4,              /**< Not-a-number (NaN) or infinity is generated */
312       ARM_MATH_SINGULAR = -5,            /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
313       ARM_MATH_TEST_FAILURE = -6         /**< Test Failed  */
314     } arm_status;
315 
316   /**
317    * @brief 8-bit fractional data type in 1.7 format.
318    */
319   typedef int8_t q7_t;
320 
321   /**
322    * @brief 16-bit fractional data type in 1.15 format.
323    */
324   typedef int16_t q15_t;
325 
326   /**
327    * @brief 32-bit fractional data type in 1.31 format.
328    */
329   typedef int32_t q31_t;
330 
331   /**
332    * @brief 64-bit fractional data type in 1.63 format.
333    */
334   typedef int64_t q63_t;
335 
336   /**
337    * @brief 32-bit floating-point type definition.
338    */
339   typedef float float32_t;
340 
341   /**
342    * @brief 64-bit floating-point type definition.
343    */
344   typedef double float64_t;
345 
346   /**
347    * @brief definition to read/write two 16 bit values.
348    */
349 #define __SIMD32(addr)  (*(int32_t **) & (addr))
350 
351 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0)
352   /**
353    * @brief definition to pack two 16 bit values.
354    */
355 #define __PKHBT(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0x0000FFFF) | \
356                                          (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
357 
358 #endif
359 
360 
361    /**
362    * @brief definition to pack four 8 bit values.
363    */
364 #ifndef ARM_MATH_BIG_ENDIAN
365 
366 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) |	\
367                                 (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) |	\
368 							    (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) |	\
369 							    (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
370 #else
371 
372 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) |	\
373                                 (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) |	\
374 							    (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) |	\
375 							    (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
376 
377 #endif
378 
379 
380   /**
381    * @brief Clips Q63 to Q31 values.
382    */
clip_q63_to_q31(q63_t x)383   __STATIC_INLINE q31_t clip_q63_to_q31(
384 					q63_t x)
385   {
386     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
387       ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
388   }
389 
390   /**
391    * @brief Clips Q63 to Q15 values.
392    */
clip_q63_to_q15(q63_t x)393   __STATIC_INLINE q15_t clip_q63_to_q15(
394 					q63_t x)
395   {
396     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
397       ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
398   }
399 
400   /**
401    * @brief Clips Q31 to Q7 values.
402    */
clip_q31_to_q7(q31_t x)403   __STATIC_INLINE q7_t clip_q31_to_q7(
404 				      q31_t x)
405   {
406     return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
407       ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
408   }
409 
410   /**
411    * @brief Clips Q31 to Q15 values.
412    */
clip_q31_to_q15(q31_t x)413   __STATIC_INLINE q15_t clip_q31_to_q15(
414 					q31_t x)
415   {
416     return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
417       ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
418   }
419 
420   /**
421    * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
422    */
423 
mult32x64(q63_t x,q31_t y)424   __STATIC_INLINE q63_t mult32x64(
425 				  q63_t x,
426 				  q31_t y)
427   {
428     return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
429             (((q63_t) (x >> 32) * y)));
430   }
431 
432 
433 #if defined (ARM_MATH_CM0) && defined ( __CC_ARM   )
434 #define __CLZ __clz
435 #endif
436 
437 #if defined (ARM_MATH_CM0) && defined ( __TASKING__ )
438 /* No need to redefine __CLZ */
439 #endif
440 
441 #if defined (ARM_MATH_CM0) && ((defined (__ICCARM__)) ||(defined (__GNUC__)) )
442 
443   __STATIC_INLINE  uint32_t __CLZ(q31_t data);
444 
445 
__CLZ(q31_t data)446   __STATIC_INLINE uint32_t __CLZ(q31_t data)
447   {
448 	  uint32_t count = 0;
449 	  uint32_t mask = 0x80000000;
450 
451 	  while((data & mask) ==  0)
452 	  {
453 		  count += 1u;
454 		  mask = mask >> 1u;
455 	  }
456 
457 	  return(count);
458 
459   }
460 
461 #endif
462 
463   /**
464    * @brief Function to Calculates 1/in(reciprocal) value of Q31 Data type.
465    */
466 
arm_recip_q31(q31_t in,q31_t * dst,q31_t * pRecipTable)467   __STATIC_INLINE uint32_t arm_recip_q31(
468 					 q31_t in,
469 					 q31_t * dst,
470 					 q31_t * pRecipTable)
471   {
472 
473     uint32_t out, tempVal;
474     uint32_t index, i;
475     uint32_t signBits;
476 
477     if(in > 0)
478       {
479 	signBits = __CLZ(in) - 1;
480       }
481     else
482       {
483 	signBits = __CLZ(-in) - 1;
484       }
485 
486     /* Convert input sample to 1.31 format */
487     in = in << signBits;
488 
489     /* calculation of index for initial approximated Val */
490     index = (uint32_t) (in >> 24u);
491     index = (index & INDEX_MASK);
492 
493     /* 1.31 with exp 1 */
494     out = pRecipTable[index];
495 
496     /* calculation of reciprocal value */
497     /* running approximation for two iterations */
498     for (i = 0u; i < 2u; i++)
499       {
500 	tempVal = (q31_t) (((q63_t) in * out) >> 31u);
501 	tempVal = 0x7FFFFFFF - tempVal;
502 	/*      1.31 with exp 1 */
503 	//out = (q31_t) (((q63_t) out * tempVal) >> 30u);
504 	out = (q31_t) clip_q63_to_q31(((q63_t) out * tempVal) >> 30u);
505       }
506 
507     /* write output */
508     *dst = out;
509 
510     /* return num of signbits of out = 1/in value */
511     return (signBits + 1u);
512 
513   }
514 
515   /**
516    * @brief Function to Calculates 1/in(reciprocal) value of Q15 Data type.
517    */
arm_recip_q15(q15_t in,q15_t * dst,q15_t * pRecipTable)518   __STATIC_INLINE uint32_t arm_recip_q15(
519 					 q15_t in,
520 					 q15_t * dst,
521 					 q15_t * pRecipTable)
522   {
523 
524     uint32_t out = 0, tempVal = 0;
525     uint32_t index = 0, i = 0;
526     uint32_t signBits = 0;
527 
528     if(in > 0)
529       {
530 	signBits = __CLZ(in) - 17;
531       }
532     else
533       {
534 	signBits = __CLZ(-in) - 17;
535       }
536 
537     /* Convert input sample to 1.15 format */
538     in = in << signBits;
539 
540     /* calculation of index for initial approximated Val */
541     index = in >> 8;
542     index = (index & INDEX_MASK);
543 
544     /*      1.15 with exp 1  */
545     out = pRecipTable[index];
546 
547     /* calculation of reciprocal value */
548     /* running approximation for two iterations */
549     for (i = 0; i < 2; i++)
550       {
551 	tempVal = (q15_t) (((q31_t) in * out) >> 15);
552 	tempVal = 0x7FFF - tempVal;
553 	/*      1.15 with exp 1 */
554 	out = (q15_t) (((q31_t) out * tempVal) >> 14);
555       }
556 
557     /* write output */
558     *dst = out;
559 
560     /* return num of signbits of out = 1/in value */
561     return (signBits + 1);
562 
563   }
564 
565 
566   /*
567    * @brief C custom defined intrinisic function for only M0 processors
568    */
569 #if defined(ARM_MATH_CM0)
570 
__SSAT(q31_t x,uint32_t y)571   __STATIC_INLINE q31_t __SSAT(
572 			       q31_t x,
573 			       uint32_t y)
574   {
575     int32_t posMax, negMin;
576     uint32_t i;
577 
578     posMax = 1;
579     for (i = 0; i < (y - 1); i++)
580       {
581 	posMax = posMax * 2;
582       }
583 
584     if(x > 0)
585       {
586 	posMax = (posMax - 1);
587 
588 	if(x > posMax)
589 	  {
590 	    x = posMax;
591 	  }
592       }
593     else
594       {
595 	negMin = -posMax;
596 
597 	if(x < negMin)
598 	  {
599 	    x = negMin;
600 	  }
601       }
602     return (x);
603 
604 
605   }
606 
607 #endif /* end of ARM_MATH_CM0 */
608 
609 
610 
611   /*
612    * @brief C custom defined intrinsic function for M3 and M0 processors
613    */
614 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0)
615 
616   /*
617    * @brief C custom defined QADD8 for M3 and M0 processors
618    */
__QADD8(q31_t x,q31_t y)619   __STATIC_INLINE q31_t __QADD8(
620 				q31_t x,
621 				q31_t y)
622   {
623 
624     q31_t sum;
625     q7_t r, s, t, u;
626 
627     r = (char) x;
628     s = (char) y;
629 
630     r = __SSAT((q31_t) (r + s), 8);
631     s = __SSAT(((q31_t) (((x << 16) >> 24) + ((y << 16) >> 24))), 8);
632     t = __SSAT(((q31_t) (((x << 8) >> 24) + ((y << 8) >> 24))), 8);
633     u = __SSAT(((q31_t) ((x >> 24) + (y >> 24))), 8);
634 
635     sum = (((q31_t) u << 24) & 0xFF000000) | (((q31_t) t << 16) & 0x00FF0000) |
636       (((q31_t) s << 8) & 0x0000FF00) | (r & 0x000000FF);
637 
638     return sum;
639 
640   }
641 
642   /*
643    * @brief C custom defined QSUB8 for M3 and M0 processors
644    */
__QSUB8(q31_t x,q31_t y)645   __STATIC_INLINE q31_t __QSUB8(
646 				q31_t x,
647 				q31_t y)
648   {
649 
650     q31_t sum;
651     q31_t r, s, t, u;
652 
653     r = (char) x;
654     s = (char) y;
655 
656     r = __SSAT((r - s), 8);
657     s = __SSAT(((q31_t) (((x << 16) >> 24) - ((y << 16) >> 24))), 8) << 8;
658     t = __SSAT(((q31_t) (((x << 8) >> 24) - ((y << 8) >> 24))), 8) << 16;
659     u = __SSAT(((q31_t) ((x >> 24) - (y >> 24))), 8) << 24;
660 
661     sum =
662       (u & 0xFF000000) | (t & 0x00FF0000) | (s & 0x0000FF00) | (r & 0x000000FF);
663 
664     return sum;
665   }
666 
667   /*
668    * @brief C custom defined QADD16 for M3 and M0 processors
669    */
670 
671   /*
672    * @brief C custom defined QADD16 for M3 and M0 processors
673    */
__QADD16(q31_t x,q31_t y)674   __STATIC_INLINE q31_t __QADD16(
675 				 q31_t x,
676 				 q31_t y)
677   {
678 
679     q31_t sum;
680     q31_t r, s;
681 
682     r = (short) x;
683     s = (short) y;
684 
685     r = __SSAT(r + s, 16);
686     s = __SSAT(((q31_t) ((x >> 16) + (y >> 16))), 16) << 16;
687 
688     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
689 
690     return sum;
691 
692   }
693 
694   /*
695    * @brief C custom defined SHADD16 for M3 and M0 processors
696    */
__SHADD16(q31_t x,q31_t y)697   __STATIC_INLINE q31_t __SHADD16(
698 				  q31_t x,
699 				  q31_t y)
700   {
701 
702     q31_t sum;
703     q31_t r, s;
704 
705     r = (short) x;
706     s = (short) y;
707 
708     r = ((r >> 1) + (s >> 1));
709     s = ((q31_t) ((x >> 17) + (y >> 17))) << 16;
710 
711     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
712 
713     return sum;
714 
715   }
716 
717   /*
718    * @brief C custom defined QSUB16 for M3 and M0 processors
719    */
__QSUB16(q31_t x,q31_t y)720   __STATIC_INLINE q31_t __QSUB16(
721 				 q31_t x,
722 				 q31_t y)
723   {
724 
725     q31_t sum;
726     q31_t r, s;
727 
728     r = (short) x;
729     s = (short) y;
730 
731     r = __SSAT(r - s, 16);
732     s = __SSAT(((q31_t) ((x >> 16) - (y >> 16))), 16) << 16;
733 
734     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
735 
736     return sum;
737   }
738 
739   /*
740    * @brief C custom defined SHSUB16 for M3 and M0 processors
741    */
__SHSUB16(q31_t x,q31_t y)742   __STATIC_INLINE q31_t __SHSUB16(
743 				  q31_t x,
744 				  q31_t y)
745   {
746 
747     q31_t diff;
748     q31_t r, s;
749 
750     r = (short) x;
751     s = (short) y;
752 
753     r = ((r >> 1) - (s >> 1));
754     s = (((x >> 17) - (y >> 17)) << 16);
755 
756     diff = (s & 0xFFFF0000) | (r & 0x0000FFFF);
757 
758     return diff;
759   }
760 
761   /*
762    * @brief C custom defined QASX for M3 and M0 processors
763    */
__QASX(q31_t x,q31_t y)764   __STATIC_INLINE q31_t __QASX(
765 			       q31_t x,
766 			       q31_t y)
767   {
768 
769     q31_t sum = 0;
770 
771     sum = ((sum + clip_q31_to_q15((q31_t) ((short) (x >> 16) + (short) y))) << 16) +
772       clip_q31_to_q15((q31_t) ((short) x - (short) (y >> 16)));
773 
774     return sum;
775   }
776 
777   /*
778    * @brief C custom defined SHASX for M3 and M0 processors
779    */
__SHASX(q31_t x,q31_t y)780   __STATIC_INLINE q31_t __SHASX(
781 				q31_t x,
782 				q31_t y)
783   {
784 
785     q31_t sum;
786     q31_t r, s;
787 
788     r = (short) x;
789     s = (short) y;
790 
791     r = ((r >> 1) - (y >> 17));
792     s = (((x >> 17) + (s >> 1)) << 16);
793 
794     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
795 
796     return sum;
797   }
798 
799 
800   /*
801    * @brief C custom defined QSAX for M3 and M0 processors
802    */
__QSAX(q31_t x,q31_t y)803   __STATIC_INLINE q31_t __QSAX(
804 			       q31_t x,
805 			       q31_t y)
806   {
807 
808     q31_t sum = 0;
809 
810     sum = ((sum + clip_q31_to_q15((q31_t) ((short) (x >> 16) - (short) y))) << 16) +
811       clip_q31_to_q15((q31_t) ((short) x + (short) (y >> 16)));
812 
813     return sum;
814   }
815 
816   /*
817    * @brief C custom defined SHSAX for M3 and M0 processors
818    */
__SHSAX(q31_t x,q31_t y)819   __STATIC_INLINE q31_t __SHSAX(
820 				q31_t x,
821 				q31_t y)
822   {
823 
824     q31_t sum;
825     q31_t r, s;
826 
827     r = (short) x;
828     s = (short) y;
829 
830     r = ((r >> 1) + (y >> 17));
831     s = (((x >> 17) - (s >> 1)) << 16);
832 
833     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
834 
835     return sum;
836   }
837 
838   /*
839    * @brief C custom defined SMUSDX for M3 and M0 processors
840    */
__SMUSDX(q31_t x,q31_t y)841   __STATIC_INLINE q31_t __SMUSDX(
842 				 q31_t x,
843 				 q31_t y)
844   {
845 
846     return ((q31_t)(((short) x * (short) (y >> 16)) -
847 		    ((short) (x >> 16) * (short) y)));
848   }
849 
850   /*
851    * @brief C custom defined SMUADX for M3 and M0 processors
852    */
__SMUADX(q31_t x,q31_t y)853   __STATIC_INLINE q31_t __SMUADX(
854 				 q31_t x,
855 				 q31_t y)
856   {
857 
858     return ((q31_t)(((short) x * (short) (y >> 16)) +
859 		    ((short) (x >> 16) * (short) y)));
860   }
861 
862   /*
863    * @brief C custom defined QADD for M3 and M0 processors
864    */
__QADD(q31_t x,q31_t y)865   __STATIC_INLINE q31_t __QADD(
866 			       q31_t x,
867 			       q31_t y)
868   {
869     return clip_q63_to_q31((q63_t) x + y);
870   }
871 
872   /*
873    * @brief C custom defined QSUB for M3 and M0 processors
874    */
__QSUB(q31_t x,q31_t y)875   __STATIC_INLINE q31_t __QSUB(
876 			       q31_t x,
877 			       q31_t y)
878   {
879     return clip_q63_to_q31((q63_t) x - y);
880   }
881 
882   /*
883    * @brief C custom defined SMLAD for M3 and M0 processors
884    */
__SMLAD(q31_t x,q31_t y,q31_t sum)885   __STATIC_INLINE q31_t __SMLAD(
886 				q31_t x,
887 				q31_t y,
888 				q31_t sum)
889   {
890 
891     return (sum + ((short) (x >> 16) * (short) (y >> 16)) +
892             ((short) x * (short) y));
893   }
894 
895   /*
896    * @brief C custom defined SMLADX for M3 and M0 processors
897    */
__SMLADX(q31_t x,q31_t y,q31_t sum)898   __STATIC_INLINE q31_t __SMLADX(
899 				 q31_t x,
900 				 q31_t y,
901 				 q31_t sum)
902   {
903 
904     return (sum + ((short) (x >> 16) * (short) (y)) +
905             ((short) x * (short) (y >> 16)));
906   }
907 
908   /*
909    * @brief C custom defined SMLSDX for M3 and M0 processors
910    */
__SMLSDX(q31_t x,q31_t y,q31_t sum)911   __STATIC_INLINE q31_t __SMLSDX(
912 				 q31_t x,
913 				 q31_t y,
914 				 q31_t sum)
915   {
916 
917     return (sum - ((short) (x >> 16) * (short) (y)) +
918             ((short) x * (short) (y >> 16)));
919   }
920 
921   /*
922    * @brief C custom defined SMLALD for M3 and M0 processors
923    */
__SMLALD(q31_t x,q31_t y,q63_t sum)924   __STATIC_INLINE q63_t __SMLALD(
925 				 q31_t x,
926 				 q31_t y,
927 				 q63_t sum)
928   {
929 
930     return (sum + ((short) (x >> 16) * (short) (y >> 16)) +
931             ((short) x * (short) y));
932   }
933 
934   /*
935    * @brief C custom defined SMLALDX for M3 and M0 processors
936    */
__SMLALDX(q31_t x,q31_t y,q63_t sum)937   __STATIC_INLINE q63_t __SMLALDX(
938 				  q31_t x,
939 				  q31_t y,
940 				  q63_t sum)
941   {
942 
943     return (sum + ((short) (x >> 16) * (short) y)) +
944       ((short) x * (short) (y >> 16));
945   }
946 
947   /*
948    * @brief C custom defined SMUAD for M3 and M0 processors
949    */
__SMUAD(q31_t x,q31_t y)950   __STATIC_INLINE q31_t __SMUAD(
951 				q31_t x,
952 				q31_t y)
953   {
954 
955     return (((x >> 16) * (y >> 16)) +
956             (((x << 16) >> 16) * ((y << 16) >> 16)));
957   }
958 
959   /*
960    * @brief C custom defined SMUSD for M3 and M0 processors
961    */
__SMUSD(q31_t x,q31_t y)962   __STATIC_INLINE q31_t __SMUSD(
963 				q31_t x,
964 				q31_t y)
965   {
966 
967     return (-((x >> 16) * (y >> 16)) +
968             (((x << 16) >> 16) * ((y << 16) >> 16)));
969   }
970 
971 
972 
973 
974 #endif /* (ARM_MATH_CM3) || defined (ARM_MATH_CM0) */
975 
976 
977   /**
978    * @brief Instance structure for the Q7 FIR filter.
979    */
980   typedef struct
981   {
982     uint16_t numTaps;        /**< number of filter coefficients in the filter. */
983     q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
984     q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
985   } arm_fir_instance_q7;
986 
987   /**
988    * @brief Instance structure for the Q15 FIR filter.
989    */
990   typedef struct
991   {
992     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
993     q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
994     q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
995   } arm_fir_instance_q15;
996 
997   /**
998    * @brief Instance structure for the Q31 FIR filter.
999    */
1000   typedef struct
1001   {
1002     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1003     q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1004     q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
1005   } arm_fir_instance_q31;
1006 
1007   /**
1008    * @brief Instance structure for the floating-point FIR filter.
1009    */
1010   typedef struct
1011   {
1012     uint16_t numTaps;     /**< number of filter coefficients in the filter. */
1013     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1014     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
1015   } arm_fir_instance_f32;
1016 
1017 
1018   /**
1019    * @brief Processing function for the Q7 FIR filter.
1020    * @param[in] *S points to an instance of the Q7 FIR filter structure.
1021    * @param[in] *pSrc points to the block of input data.
1022    * @param[out] *pDst points to the block of output data.
1023    * @param[in] blockSize number of samples to process.
1024    * @return none.
1025    */
1026   void arm_fir_q7(
1027 		  const arm_fir_instance_q7 * S,
1028 		   q7_t * pSrc,
1029 		  q7_t * pDst,
1030 		  uint32_t blockSize);
1031 
1032 
1033   /**
1034    * @brief  Initialization function for the Q7 FIR filter.
1035    * @param[in,out] *S points to an instance of the Q7 FIR structure.
1036    * @param[in] numTaps  Number of filter coefficients in the filter.
1037    * @param[in] *pCoeffs points to the filter coefficients.
1038    * @param[in] *pState points to the state buffer.
1039    * @param[in] blockSize number of samples that are processed.
1040    * @return none
1041    */
1042   void arm_fir_init_q7(
1043 		       arm_fir_instance_q7 * S,
1044 		       uint16_t numTaps,
1045 		       q7_t * pCoeffs,
1046 		       q7_t * pState,
1047 		       uint32_t blockSize);
1048 
1049 
1050   /**
1051    * @brief Processing function for the Q15 FIR filter.
1052    * @param[in] *S points to an instance of the Q15 FIR structure.
1053    * @param[in] *pSrc points to the block of input data.
1054    * @param[out] *pDst points to the block of output data.
1055    * @param[in] blockSize number of samples to process.
1056    * @return none.
1057    */
1058   void arm_fir_q15(
1059 		   const arm_fir_instance_q15 * S,
1060 		    q15_t * pSrc,
1061 		   q15_t * pDst,
1062 		   uint32_t blockSize);
1063 
1064   /**
1065    * @brief Processing function for the fast Q15 FIR filter for Cortex-M3 and Cortex-M4.
1066    * @param[in] *S points to an instance of the Q15 FIR filter structure.
1067    * @param[in] *pSrc points to the block of input data.
1068    * @param[out] *pDst points to the block of output data.
1069    * @param[in] blockSize number of samples to process.
1070    * @return none.
1071    */
1072   void arm_fir_fast_q15(
1073 			const arm_fir_instance_q15 * S,
1074 			 q15_t * pSrc,
1075 			q15_t * pDst,
1076 			uint32_t blockSize);
1077 
1078   /**
1079    * @brief  Initialization function for the Q15 FIR filter.
1080    * @param[in,out] *S points to an instance of the Q15 FIR filter structure.
1081    * @param[in] numTaps  Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
1082    * @param[in] *pCoeffs points to the filter coefficients.
1083    * @param[in] *pState points to the state buffer.
1084    * @param[in] blockSize number of samples that are processed at a time.
1085    * @return The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_ARGUMENT_ERROR if
1086    * <code>numTaps</code> is not a supported value.
1087    */
1088 
1089        arm_status arm_fir_init_q15(
1090 			      arm_fir_instance_q15 * S,
1091 			      uint16_t numTaps,
1092 			      q15_t * pCoeffs,
1093 			      q15_t * pState,
1094 			      uint32_t blockSize);
1095 
1096   /**
1097    * @brief Processing function for the Q31 FIR filter.
1098    * @param[in] *S points to an instance of the Q31 FIR filter structure.
1099    * @param[in] *pSrc points to the block of input data.
1100    * @param[out] *pDst points to the block of output data.
1101    * @param[in] blockSize number of samples to process.
1102    * @return none.
1103    */
1104   void arm_fir_q31(
1105 		   const arm_fir_instance_q31 * S,
1106 		    q31_t * pSrc,
1107 		   q31_t * pDst,
1108 		   uint32_t blockSize);
1109 
1110   /**
1111    * @brief Processing function for the fast Q31 FIR filter for Cortex-M3 and Cortex-M4.
1112    * @param[in] *S points to an instance of the Q31 FIR structure.
1113    * @param[in] *pSrc points to the block of input data.
1114    * @param[out] *pDst points to the block of output data.
1115    * @param[in] blockSize number of samples to process.
1116    * @return none.
1117    */
1118   void arm_fir_fast_q31(
1119 			const arm_fir_instance_q31 * S,
1120 			 q31_t * pSrc,
1121 			q31_t * pDst,
1122 			uint32_t blockSize);
1123 
1124   /**
1125    * @brief  Initialization function for the Q31 FIR filter.
1126    * @param[in,out] *S points to an instance of the Q31 FIR structure.
1127    * @param[in] 	numTaps  Number of filter coefficients in the filter.
1128    * @param[in] 	*pCoeffs points to the filter coefficients.
1129    * @param[in] 	*pState points to the state buffer.
1130    * @param[in] 	blockSize number of samples that are processed at a time.
1131    * @return 		none.
1132    */
1133   void arm_fir_init_q31(
1134 			arm_fir_instance_q31 * S,
1135 			uint16_t numTaps,
1136 			q31_t * pCoeffs,
1137 			q31_t * pState,
1138 			uint32_t blockSize);
1139 
1140   /**
1141    * @brief Processing function for the floating-point FIR filter.
1142    * @param[in] *S points to an instance of the floating-point FIR structure.
1143    * @param[in] *pSrc points to the block of input data.
1144    * @param[out] *pDst points to the block of output data.
1145    * @param[in] blockSize number of samples to process.
1146    * @return none.
1147    */
1148   void arm_fir_f32(
1149 		   const arm_fir_instance_f32 * S,
1150 		    float32_t * pSrc,
1151 		   float32_t * pDst,
1152 		   uint32_t blockSize);
1153 
1154   /**
1155    * @brief  Initialization function for the floating-point FIR filter.
1156    * @param[in,out] *S points to an instance of the floating-point FIR filter structure.
1157    * @param[in] 	numTaps  Number of filter coefficients in the filter.
1158    * @param[in] 	*pCoeffs points to the filter coefficients.
1159    * @param[in] 	*pState points to the state buffer.
1160    * @param[in] 	blockSize number of samples that are processed at a time.
1161    * @return    	none.
1162    */
1163   void arm_fir_init_f32(
1164 			arm_fir_instance_f32 * S,
1165 			uint16_t numTaps,
1166 			float32_t * pCoeffs,
1167 			float32_t * pState,
1168 			uint32_t blockSize);
1169 
1170 
1171   /**
1172    * @brief Instance structure for the Q15 Biquad cascade filter.
1173    */
1174   typedef struct
1175   {
1176     int8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1177     q15_t *pState;            /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1178     q15_t *pCoeffs;           /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1179     int8_t postShift;         /**< Additional shift, in bits, applied to each output sample. */
1180 
1181   } arm_biquad_casd_df1_inst_q15;
1182 
1183 
1184   /**
1185    * @brief Instance structure for the Q31 Biquad cascade filter.
1186    */
1187   typedef struct
1188   {
1189     uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1190     q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1191     q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1192     uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
1193 
1194   } arm_biquad_casd_df1_inst_q31;
1195 
1196   /**
1197    * @brief Instance structure for the floating-point Biquad cascade filter.
1198    */
1199   typedef struct
1200   {
1201     uint32_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1202     float32_t *pState;          /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1203     float32_t *pCoeffs;         /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1204 
1205 
1206   } arm_biquad_casd_df1_inst_f32;
1207 
1208 
1209 
1210   /**
1211    * @brief Processing function for the Q15 Biquad cascade filter.
1212    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
1213    * @param[in]  *pSrc points to the block of input data.
1214    * @param[out] *pDst points to the block of output data.
1215    * @param[in]  blockSize number of samples to process.
1216    * @return     none.
1217    */
1218 
1219   void arm_biquad_cascade_df1_q15(
1220 				  const arm_biquad_casd_df1_inst_q15 * S,
1221 				   q15_t * pSrc,
1222 				  q15_t * pDst,
1223 				  uint32_t blockSize);
1224 
1225   /**
1226    * @brief  Initialization function for the Q15 Biquad cascade filter.
1227    * @param[in,out] *S           points to an instance of the Q15 Biquad cascade structure.
1228    * @param[in]     numStages    number of 2nd order stages in the filter.
1229    * @param[in]     *pCoeffs     points to the filter coefficients.
1230    * @param[in]     *pState      points to the state buffer.
1231    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
1232    * @return        none
1233    */
1234 
1235   void arm_biquad_cascade_df1_init_q15(
1236 				       arm_biquad_casd_df1_inst_q15 * S,
1237 				       uint8_t numStages,
1238 				       q15_t * pCoeffs,
1239 				       q15_t * pState,
1240 				       int8_t postShift);
1241 
1242 
1243   /**
1244    * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1245    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
1246    * @param[in]  *pSrc points to the block of input data.
1247    * @param[out] *pDst points to the block of output data.
1248    * @param[in]  blockSize number of samples to process.
1249    * @return     none.
1250    */
1251 
1252   void arm_biquad_cascade_df1_fast_q15(
1253 				       const arm_biquad_casd_df1_inst_q15 * S,
1254 				        q15_t * pSrc,
1255 				       q15_t * pDst,
1256 				       uint32_t blockSize);
1257 
1258 
1259   /**
1260    * @brief Processing function for the Q31 Biquad cascade filter
1261    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
1262    * @param[in]  *pSrc      points to the block of input data.
1263    * @param[out] *pDst      points to the block of output data.
1264    * @param[in]  blockSize  number of samples to process.
1265    * @return     none.
1266    */
1267 
1268   void arm_biquad_cascade_df1_q31(
1269 				  const arm_biquad_casd_df1_inst_q31 * S,
1270 				   q31_t * pSrc,
1271 				  q31_t * pDst,
1272 				  uint32_t blockSize);
1273 
1274   /**
1275    * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1276    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
1277    * @param[in]  *pSrc      points to the block of input data.
1278    * @param[out] *pDst      points to the block of output data.
1279    * @param[in]  blockSize  number of samples to process.
1280    * @return     none.
1281    */
1282 
1283   void arm_biquad_cascade_df1_fast_q31(
1284 				       const arm_biquad_casd_df1_inst_q31 * S,
1285 				        q31_t * pSrc,
1286 				       q31_t * pDst,
1287 				       uint32_t blockSize);
1288 
1289   /**
1290    * @brief  Initialization function for the Q31 Biquad cascade filter.
1291    * @param[in,out] *S           points to an instance of the Q31 Biquad cascade structure.
1292    * @param[in]     numStages      number of 2nd order stages in the filter.
1293    * @param[in]     *pCoeffs     points to the filter coefficients.
1294    * @param[in]     *pState      points to the state buffer.
1295    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
1296    * @return        none
1297    */
1298 
1299   void arm_biquad_cascade_df1_init_q31(
1300 				       arm_biquad_casd_df1_inst_q31 * S,
1301 				       uint8_t numStages,
1302 				       q31_t * pCoeffs,
1303 				       q31_t * pState,
1304 				       int8_t postShift);
1305 
1306   /**
1307    * @brief Processing function for the floating-point Biquad cascade filter.
1308    * @param[in]  *S         points to an instance of the floating-point Biquad cascade structure.
1309    * @param[in]  *pSrc      points to the block of input data.
1310    * @param[out] *pDst      points to the block of output data.
1311    * @param[in]  blockSize  number of samples to process.
1312    * @return     none.
1313    */
1314 
1315   void arm_biquad_cascade_df1_f32(
1316 				  const arm_biquad_casd_df1_inst_f32 * S,
1317 				   float32_t * pSrc,
1318 				  float32_t * pDst,
1319 				  uint32_t blockSize);
1320 
1321   /**
1322    * @brief  Initialization function for the floating-point Biquad cascade filter.
1323    * @param[in,out] *S           points to an instance of the floating-point Biquad cascade structure.
1324    * @param[in]     numStages    number of 2nd order stages in the filter.
1325    * @param[in]     *pCoeffs     points to the filter coefficients.
1326    * @param[in]     *pState      points to the state buffer.
1327    * @return        none
1328    */
1329 
1330   void arm_biquad_cascade_df1_init_f32(
1331 				       arm_biquad_casd_df1_inst_f32 * S,
1332 				       uint8_t numStages,
1333 				       float32_t * pCoeffs,
1334 				       float32_t * pState);
1335 
1336 
1337   /**
1338    * @brief Instance structure for the floating-point matrix structure.
1339    */
1340 
1341   typedef struct
1342   {
1343     uint16_t numRows;     /**< number of rows of the matrix.     */
1344     uint16_t numCols;     /**< number of columns of the matrix.  */
1345     float32_t *pData;     /**< points to the data of the matrix. */
1346   } arm_matrix_instance_f32;
1347 
1348   /**
1349    * @brief Instance structure for the Q15 matrix structure.
1350    */
1351 
1352   typedef struct
1353   {
1354     uint16_t numRows;     /**< number of rows of the matrix.     */
1355     uint16_t numCols;     /**< number of columns of the matrix.  */
1356     q15_t *pData;         /**< points to the data of the matrix. */
1357 
1358   } arm_matrix_instance_q15;
1359 
1360   /**
1361    * @brief Instance structure for the Q31 matrix structure.
1362    */
1363 
1364   typedef struct
1365   {
1366     uint16_t numRows;     /**< number of rows of the matrix.     */
1367     uint16_t numCols;     /**< number of columns of the matrix.  */
1368     q31_t *pData;         /**< points to the data of the matrix. */
1369 
1370   } arm_matrix_instance_q31;
1371 
1372 
1373 
1374   /**
1375    * @brief Floating-point matrix addition.
1376    * @param[in]       *pSrcA points to the first input matrix structure
1377    * @param[in]       *pSrcB points to the second input matrix structure
1378    * @param[out]      *pDst points to output matrix structure
1379    * @return     The function returns either
1380    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1381    */
1382 
1383   arm_status arm_mat_add_f32(
1384 			     const arm_matrix_instance_f32 * pSrcA,
1385 			     const arm_matrix_instance_f32 * pSrcB,
1386 			     arm_matrix_instance_f32 * pDst);
1387 
1388   /**
1389    * @brief Q15 matrix addition.
1390    * @param[in]       *pSrcA points to the first input matrix structure
1391    * @param[in]       *pSrcB points to the second input matrix structure
1392    * @param[out]      *pDst points to output matrix structure
1393    * @return     The function returns either
1394    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1395    */
1396 
1397   arm_status arm_mat_add_q15(
1398 			     const arm_matrix_instance_q15 * pSrcA,
1399 			     const arm_matrix_instance_q15 * pSrcB,
1400 			     arm_matrix_instance_q15 * pDst);
1401 
1402   /**
1403    * @brief Q31 matrix addition.
1404    * @param[in]       *pSrcA points to the first input matrix structure
1405    * @param[in]       *pSrcB points to the second input matrix structure
1406    * @param[out]      *pDst points to output matrix structure
1407    * @return     The function returns either
1408    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1409    */
1410 
1411   arm_status arm_mat_add_q31(
1412 			     const arm_matrix_instance_q31 * pSrcA,
1413 			     const arm_matrix_instance_q31 * pSrcB,
1414 			     arm_matrix_instance_q31 * pDst);
1415 
1416 
1417   /**
1418    * @brief Floating-point matrix transpose.
1419    * @param[in]  *pSrc points to the input matrix
1420    * @param[out] *pDst points to the output matrix
1421    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1422    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1423    */
1424 
1425   arm_status arm_mat_trans_f32(
1426 			       const arm_matrix_instance_f32 * pSrc,
1427 			       arm_matrix_instance_f32 * pDst);
1428 
1429 
1430   /**
1431    * @brief Q15 matrix transpose.
1432    * @param[in]  *pSrc points to the input matrix
1433    * @param[out] *pDst points to the output matrix
1434    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1435    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1436    */
1437 
1438   arm_status arm_mat_trans_q15(
1439 			       const arm_matrix_instance_q15 * pSrc,
1440 			       arm_matrix_instance_q15 * pDst);
1441 
1442   /**
1443    * @brief Q31 matrix transpose.
1444    * @param[in]  *pSrc points to the input matrix
1445    * @param[out] *pDst points to the output matrix
1446    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1447    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1448    */
1449 
1450   arm_status arm_mat_trans_q31(
1451 			       const arm_matrix_instance_q31 * pSrc,
1452 			       arm_matrix_instance_q31 * pDst);
1453 
1454 
1455   /**
1456    * @brief Floating-point matrix multiplication
1457    * @param[in]       *pSrcA points to the first input matrix structure
1458    * @param[in]       *pSrcB points to the second input matrix structure
1459    * @param[out]      *pDst points to output matrix structure
1460    * @return     The function returns either
1461    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1462    */
1463 
1464   arm_status arm_mat_mult_f32(
1465 			      const arm_matrix_instance_f32 * pSrcA,
1466 			      const arm_matrix_instance_f32 * pSrcB,
1467 			      arm_matrix_instance_f32 * pDst);
1468 
1469   /**
1470    * @brief Q15 matrix multiplication
1471    * @param[in]       *pSrcA points to the first input matrix structure
1472    * @param[in]       *pSrcB points to the second input matrix structure
1473    * @param[out]      *pDst points to output matrix structure
1474    * @return     The function returns either
1475    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1476    */
1477 
1478   arm_status arm_mat_mult_q15(
1479 			      const arm_matrix_instance_q15 * pSrcA,
1480 			      const arm_matrix_instance_q15 * pSrcB,
1481 			      arm_matrix_instance_q15 * pDst,
1482 			      q15_t * pState);
1483 
1484   /**
1485    * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1486    * @param[in]       *pSrcA  points to the first input matrix structure
1487    * @param[in]       *pSrcB  points to the second input matrix structure
1488    * @param[out]      *pDst   points to output matrix structure
1489    * @param[in]		  *pState points to the array for storing intermediate results
1490    * @return     The function returns either
1491    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1492    */
1493 
1494   arm_status arm_mat_mult_fast_q15(
1495 				   const arm_matrix_instance_q15 * pSrcA,
1496 				   const arm_matrix_instance_q15 * pSrcB,
1497 				   arm_matrix_instance_q15 * pDst,
1498 				   q15_t * pState);
1499 
1500   /**
1501    * @brief Q31 matrix multiplication
1502    * @param[in]       *pSrcA points to the first input matrix structure
1503    * @param[in]       *pSrcB points to the second input matrix structure
1504    * @param[out]      *pDst points to output matrix structure
1505    * @return     The function returns either
1506    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1507    */
1508 
1509   arm_status arm_mat_mult_q31(
1510 			      const arm_matrix_instance_q31 * pSrcA,
1511 			      const arm_matrix_instance_q31 * pSrcB,
1512 			      arm_matrix_instance_q31 * pDst);
1513 
1514   /**
1515    * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1516    * @param[in]       *pSrcA points to the first input matrix structure
1517    * @param[in]       *pSrcB points to the second input matrix structure
1518    * @param[out]      *pDst points to output matrix structure
1519    * @return     The function returns either
1520    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1521    */
1522 
1523   arm_status arm_mat_mult_fast_q31(
1524 				   const arm_matrix_instance_q31 * pSrcA,
1525 				   const arm_matrix_instance_q31 * pSrcB,
1526 				   arm_matrix_instance_q31 * pDst);
1527 
1528 
1529   /**
1530    * @brief Floating-point matrix subtraction
1531    * @param[in]       *pSrcA points to the first input matrix structure
1532    * @param[in]       *pSrcB points to the second input matrix structure
1533    * @param[out]      *pDst points to output matrix structure
1534    * @return     The function returns either
1535    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1536    */
1537 
1538   arm_status arm_mat_sub_f32(
1539 			     const arm_matrix_instance_f32 * pSrcA,
1540 			     const arm_matrix_instance_f32 * pSrcB,
1541 			     arm_matrix_instance_f32 * pDst);
1542 
1543   /**
1544    * @brief Q15 matrix subtraction
1545    * @param[in]       *pSrcA points to the first input matrix structure
1546    * @param[in]       *pSrcB points to the second input matrix structure
1547    * @param[out]      *pDst points to output matrix structure
1548    * @return     The function returns either
1549    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1550    */
1551 
1552   arm_status arm_mat_sub_q15(
1553 			     const arm_matrix_instance_q15 * pSrcA,
1554 			     const arm_matrix_instance_q15 * pSrcB,
1555 			     arm_matrix_instance_q15 * pDst);
1556 
1557   /**
1558    * @brief Q31 matrix subtraction
1559    * @param[in]       *pSrcA points to the first input matrix structure
1560    * @param[in]       *pSrcB points to the second input matrix structure
1561    * @param[out]      *pDst points to output matrix structure
1562    * @return     The function returns either
1563    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1564    */
1565 
1566   arm_status arm_mat_sub_q31(
1567 			     const arm_matrix_instance_q31 * pSrcA,
1568 			     const arm_matrix_instance_q31 * pSrcB,
1569 			     arm_matrix_instance_q31 * pDst);
1570 
1571   /**
1572    * @brief Floating-point matrix scaling.
1573    * @param[in]  *pSrc points to the input matrix
1574    * @param[in]  scale scale factor
1575    * @param[out] *pDst points to the output matrix
1576    * @return     The function returns either
1577    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1578    */
1579 
1580   arm_status arm_mat_scale_f32(
1581 			       const arm_matrix_instance_f32 * pSrc,
1582 			       float32_t scale,
1583 			       arm_matrix_instance_f32 * pDst);
1584 
1585   /**
1586    * @brief Q15 matrix scaling.
1587    * @param[in]       *pSrc points to input matrix
1588    * @param[in]       scaleFract fractional portion of the scale factor
1589    * @param[in]       shift number of bits to shift the result by
1590    * @param[out]      *pDst points to output matrix
1591    * @return     The function returns either
1592    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1593    */
1594 
1595   arm_status arm_mat_scale_q15(
1596 			       const arm_matrix_instance_q15 * pSrc,
1597 			       q15_t scaleFract,
1598 			       int32_t shift,
1599 			       arm_matrix_instance_q15 * pDst);
1600 
1601   /**
1602    * @brief Q31 matrix scaling.
1603    * @param[in]       *pSrc points to input matrix
1604    * @param[in]       scaleFract fractional portion of the scale factor
1605    * @param[in]       shift number of bits to shift the result by
1606    * @param[out]      *pDst points to output matrix structure
1607    * @return     The function returns either
1608    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1609    */
1610 
1611   arm_status arm_mat_scale_q31(
1612 			       const arm_matrix_instance_q31 * pSrc,
1613 			       q31_t scaleFract,
1614 			       int32_t shift,
1615 			       arm_matrix_instance_q31 * pDst);
1616 
1617 
1618   /**
1619    * @brief  Q31 matrix initialization.
1620    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1621    * @param[in]     nRows          number of rows in the matrix.
1622    * @param[in]     nColumns       number of columns in the matrix.
1623    * @param[in]     *pData	       points to the matrix data array.
1624    * @return        none
1625    */
1626 
1627   void arm_mat_init_q31(
1628 			arm_matrix_instance_q31 * S,
1629 			uint16_t nRows,
1630 			uint16_t nColumns,
1631 			q31_t   *pData);
1632 
1633   /**
1634    * @brief  Q15 matrix initialization.
1635    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1636    * @param[in]     nRows          number of rows in the matrix.
1637    * @param[in]     nColumns       number of columns in the matrix.
1638    * @param[in]     *pData	       points to the matrix data array.
1639    * @return        none
1640    */
1641 
1642   void arm_mat_init_q15(
1643 			arm_matrix_instance_q15 * S,
1644 			uint16_t nRows,
1645 			uint16_t nColumns,
1646 			q15_t    *pData);
1647 
1648   /**
1649    * @brief  Floating-point matrix initialization.
1650    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1651    * @param[in]     nRows          number of rows in the matrix.
1652    * @param[in]     nColumns       number of columns in the matrix.
1653    * @param[in]     *pData	       points to the matrix data array.
1654    * @return        none
1655    */
1656 
1657   void arm_mat_init_f32(
1658 			arm_matrix_instance_f32 * S,
1659 			uint16_t nRows,
1660 			uint16_t nColumns,
1661 			float32_t   *pData);
1662 
1663 
1664 
1665   /**
1666    * @brief Instance structure for the Q15 PID Control.
1667    */
1668   typedef struct
1669   {
1670     q15_t A0; 	 /**< The derived gain, A0 = Kp + Ki + Kd . */
1671 	#ifdef ARM_MATH_CM0
1672 	q15_t A1;
1673 	q15_t A2;
1674 	#else
1675     q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
1676 	#endif
1677     q15_t state[3];       /**< The state array of length 3. */
1678     q15_t Kp;           /**< The proportional gain. */
1679     q15_t Ki;           /**< The integral gain. */
1680     q15_t Kd;           /**< The derivative gain. */
1681   } arm_pid_instance_q15;
1682 
1683   /**
1684    * @brief Instance structure for the Q31 PID Control.
1685    */
1686   typedef struct
1687   {
1688     q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
1689     q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
1690     q31_t A2;            /**< The derived gain, A2 = Kd . */
1691     q31_t state[3];      /**< The state array of length 3. */
1692     q31_t Kp;            /**< The proportional gain. */
1693     q31_t Ki;            /**< The integral gain. */
1694     q31_t Kd;            /**< The derivative gain. */
1695 
1696   } arm_pid_instance_q31;
1697 
1698   /**
1699    * @brief Instance structure for the floating-point PID Control.
1700    */
1701   typedef struct
1702   {
1703     float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
1704     float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
1705     float32_t A2;          /**< The derived gain, A2 = Kd . */
1706     float32_t state[3];    /**< The state array of length 3. */
1707     float32_t Kp;               /**< The proportional gain. */
1708     float32_t Ki;               /**< The integral gain. */
1709     float32_t Kd;               /**< The derivative gain. */
1710   } arm_pid_instance_f32;
1711 
1712 
1713 
1714   /**
1715    * @brief  Initialization function for the floating-point PID Control.
1716    * @param[in,out] *S      points to an instance of the PID structure.
1717    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1718    * @return none.
1719    */
1720   void arm_pid_init_f32(
1721 			arm_pid_instance_f32 * S,
1722 			int32_t resetStateFlag);
1723 
1724   /**
1725    * @brief  Reset function for the floating-point PID Control.
1726    * @param[in,out] *S is an instance of the floating-point PID Control structure
1727    * @return none
1728    */
1729   void arm_pid_reset_f32(
1730 			 arm_pid_instance_f32 * S);
1731 
1732 
1733   /**
1734    * @brief  Initialization function for the Q31 PID Control.
1735    * @param[in,out] *S points to an instance of the Q15 PID structure.
1736    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1737    * @return none.
1738    */
1739   void arm_pid_init_q31(
1740 			arm_pid_instance_q31 * S,
1741 			int32_t resetStateFlag);
1742 
1743 
1744   /**
1745    * @brief  Reset function for the Q31 PID Control.
1746    * @param[in,out] *S points to an instance of the Q31 PID Control structure
1747    * @return none
1748    */
1749 
1750   void arm_pid_reset_q31(
1751 			 arm_pid_instance_q31 * S);
1752 
1753   /**
1754    * @brief  Initialization function for the Q15 PID Control.
1755    * @param[in,out] *S points to an instance of the Q15 PID structure.
1756    * @param[in] resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1757    * @return none.
1758    */
1759   void arm_pid_init_q15(
1760 			arm_pid_instance_q15 * S,
1761 			int32_t resetStateFlag);
1762 
1763   /**
1764    * @brief  Reset function for the Q15 PID Control.
1765    * @param[in,out] *S points to an instance of the q15 PID Control structure
1766    * @return none
1767    */
1768   void arm_pid_reset_q15(
1769 			 arm_pid_instance_q15 * S);
1770 
1771 
1772   /**
1773    * @brief Instance structure for the floating-point Linear Interpolate function.
1774    */
1775   typedef struct
1776   {
1777     uint32_t nValues;           /**< nValues */
1778     float32_t x1;               /**< x1 */
1779     float32_t xSpacing;         /**< xSpacing */
1780     float32_t *pYData;          /**< pointer to the table of Y values */
1781   } arm_linear_interp_instance_f32;
1782 
1783   /**
1784    * @brief Instance structure for the floating-point bilinear interpolation function.
1785    */
1786 
1787   typedef struct
1788   {
1789     uint16_t numRows;	/**< number of rows in the data table. */
1790     uint16_t numCols;	/**< number of columns in the data table. */
1791     float32_t *pData;	/**< points to the data table. */
1792   } arm_bilinear_interp_instance_f32;
1793 
1794    /**
1795    * @brief Instance structure for the Q31 bilinear interpolation function.
1796    */
1797 
1798   typedef struct
1799   {
1800     uint16_t numRows;	/**< number of rows in the data table. */
1801     uint16_t numCols;	/**< number of columns in the data table. */
1802     q31_t *pData;	/**< points to the data table. */
1803   } arm_bilinear_interp_instance_q31;
1804 
1805    /**
1806    * @brief Instance structure for the Q15 bilinear interpolation function.
1807    */
1808 
1809   typedef struct
1810   {
1811     uint16_t numRows;	/**< number of rows in the data table. */
1812     uint16_t numCols;	/**< number of columns in the data table. */
1813     q15_t *pData;	/**< points to the data table. */
1814   } arm_bilinear_interp_instance_q15;
1815 
1816    /**
1817    * @brief Instance structure for the Q15 bilinear interpolation function.
1818    */
1819 
1820   typedef struct
1821   {
1822     uint16_t numRows; 	/**< number of rows in the data table. */
1823     uint16_t numCols;	/**< number of columns in the data table. */
1824     q7_t *pData;		/**< points to the data table. */
1825   } arm_bilinear_interp_instance_q7;
1826 
1827 
1828   /**
1829    * @brief Q7 vector multiplication.
1830    * @param[in]       *pSrcA points to the first input vector
1831    * @param[in]       *pSrcB points to the second input vector
1832    * @param[out]      *pDst  points to the output vector
1833    * @param[in]       blockSize number of samples in each vector
1834    * @return none.
1835    */
1836 
1837   void arm_mult_q7(
1838 		    q7_t * pSrcA,
1839 		    q7_t * pSrcB,
1840 		   q7_t * pDst,
1841 		   uint32_t blockSize);
1842 
1843   /**
1844    * @brief Q15 vector multiplication.
1845    * @param[in]       *pSrcA points to the first input vector
1846    * @param[in]       *pSrcB points to the second input vector
1847    * @param[out]      *pDst  points to the output vector
1848    * @param[in]       blockSize number of samples in each vector
1849    * @return none.
1850    */
1851 
1852   void arm_mult_q15(
1853 		     q15_t * pSrcA,
1854 		     q15_t * pSrcB,
1855 		    q15_t * pDst,
1856 		    uint32_t blockSize);
1857 
1858   /**
1859    * @brief Q31 vector multiplication.
1860    * @param[in]       *pSrcA points to the first input vector
1861    * @param[in]       *pSrcB points to the second input vector
1862    * @param[out]      *pDst points to the output vector
1863    * @param[in]       blockSize number of samples in each vector
1864    * @return none.
1865    */
1866 
1867   void arm_mult_q31(
1868 		     q31_t * pSrcA,
1869 		     q31_t * pSrcB,
1870 		    q31_t * pDst,
1871 		    uint32_t blockSize);
1872 
1873   /**
1874    * @brief Floating-point vector multiplication.
1875    * @param[in]       *pSrcA points to the first input vector
1876    * @param[in]       *pSrcB points to the second input vector
1877    * @param[out]      *pDst points to the output vector
1878    * @param[in]       blockSize number of samples in each vector
1879    * @return none.
1880    */
1881 
1882   void arm_mult_f32(
1883 		     float32_t * pSrcA,
1884 		     float32_t * pSrcB,
1885 		    float32_t * pDst,
1886 		    uint32_t blockSize);
1887 
1888 
1889   /**
1890    * @brief Instance structure for the Q15 CFFT/CIFFT function.
1891    */
1892 
1893   typedef struct
1894   {
1895     uint16_t  fftLen;                /**< length of the FFT. */
1896     uint8_t   ifftFlag;              /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1897     uint8_t   bitReverseFlag;        /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1898     q15_t     *pTwiddle;             /**< points to the twiddle factor table. */
1899     uint16_t  *pBitRevTable;         /**< points to the bit reversal table. */
1900     uint16_t  twidCoefModifier;      /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1901     uint16_t  bitRevFactor;          /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1902   } arm_cfft_radix4_instance_q15;
1903 
1904   /**
1905    * @brief Instance structure for the Q31 CFFT/CIFFT function.
1906    */
1907 
1908   typedef struct
1909   {
1910     uint16_t    fftLen;              /**< length of the FFT. */
1911     uint8_t     ifftFlag;            /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1912     uint8_t     bitReverseFlag;      /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1913     q31_t       *pTwiddle;           /**< points to the twiddle factor table. */
1914     uint16_t    *pBitRevTable;       /**< points to the bit reversal table. */
1915     uint16_t    twidCoefModifier;    /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1916     uint16_t    bitRevFactor;        /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1917   } arm_cfft_radix4_instance_q31;
1918 
1919   /**
1920    * @brief Instance structure for the floating-point CFFT/CIFFT function.
1921    */
1922 
1923   typedef struct
1924   {
1925     uint16_t     fftLen;               /**< length of the FFT. */
1926     uint8_t      ifftFlag;             /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1927     uint8_t      bitReverseFlag;       /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1928     float32_t    *pTwiddle;            /**< points to the twiddle factor table. */
1929     uint16_t     *pBitRevTable;        /**< points to the bit reversal table. */
1930     uint16_t     twidCoefModifier;     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1931     uint16_t     bitRevFactor;         /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1932 	float32_t    onebyfftLen;          /**< value of 1/fftLen. */
1933   } arm_cfft_radix4_instance_f32;
1934 
1935   /**
1936    * @brief Processing function for the Q15 CFFT/CIFFT.
1937    * @param[in]      *S    points to an instance of the Q15 CFFT/CIFFT structure.
1938    * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
1939    * @return none.
1940    */
1941 
1942   void arm_cfft_radix4_q15(
1943 			   const arm_cfft_radix4_instance_q15 * S,
1944 			   q15_t * pSrc);
1945 
1946   /**
1947    * @brief Initialization function for the Q15 CFFT/CIFFT.
1948    * @param[in,out] *S             points to an instance of the Q15 CFFT/CIFFT structure.
1949    * @param[in]     fftLen         length of the FFT.
1950    * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
1951    * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
1952    * @return        arm_status     function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
1953    */
1954 
1955   arm_status arm_cfft_radix4_init_q15(
1956 				      arm_cfft_radix4_instance_q15 * S,
1957 				      uint16_t fftLen,
1958 				      uint8_t ifftFlag,
1959 				      uint8_t bitReverseFlag);
1960 
1961   /**
1962    * @brief Processing function for the Q31 CFFT/CIFFT.
1963    * @param[in]      *S    points to an instance of the Q31 CFFT/CIFFT structure.
1964    * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
1965    * @return none.
1966    */
1967 
1968   void arm_cfft_radix4_q31(
1969 			   const arm_cfft_radix4_instance_q31 * S,
1970 			   q31_t * pSrc);
1971 
1972   /**
1973    * @brief  Initialization function for the Q31 CFFT/CIFFT.
1974    * @param[in,out] *S             points to an instance of the Q31 CFFT/CIFFT structure.
1975    * @param[in]     fftLen         length of the FFT.
1976    * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
1977    * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
1978    * @return        arm_status     function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
1979    */
1980 
1981   arm_status arm_cfft_radix4_init_q31(
1982 				      arm_cfft_radix4_instance_q31 * S,
1983 				      uint16_t fftLen,
1984 				      uint8_t ifftFlag,
1985 				      uint8_t bitReverseFlag);
1986 
1987   /**
1988    * @brief Processing function for the floating-point CFFT/CIFFT.
1989    * @param[in]      *S    points to an instance of the floating-point CFFT/CIFFT structure.
1990    * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
1991    * @return none.
1992    */
1993 
1994   void arm_cfft_radix4_f32(
1995 			   const arm_cfft_radix4_instance_f32 * S,
1996 			   float32_t * pSrc);
1997 
1998   /**
1999    * @brief  Initialization function for the floating-point CFFT/CIFFT.
2000    * @param[in,out] *S             points to an instance of the floating-point CFFT/CIFFT structure.
2001    * @param[in]     fftLen         length of the FFT.
2002    * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
2003    * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
2004    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
2005    */
2006 
2007   arm_status arm_cfft_radix4_init_f32(
2008 				      arm_cfft_radix4_instance_f32 * S,
2009 				      uint16_t fftLen,
2010 				      uint8_t ifftFlag,
2011 				      uint8_t bitReverseFlag);
2012 
2013 
2014 
2015   /*----------------------------------------------------------------------
2016    *		Internal functions prototypes FFT function
2017    ----------------------------------------------------------------------*/
2018 
2019   /**
2020    * @brief  Core function for the floating-point CFFT butterfly process.
2021    * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
2022    * @param[in]      fftLen           length of the FFT.
2023    * @param[in]      *pCoef           points to the twiddle coefficient buffer.
2024    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
2025    * @return none.
2026    */
2027 
2028   void arm_radix4_butterfly_f32(
2029 				float32_t * pSrc,
2030 				uint16_t fftLen,
2031 				float32_t * pCoef,
2032 				uint16_t twidCoefModifier);
2033 
2034   /**
2035    * @brief  Core function for the floating-point CIFFT butterfly process.
2036    * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
2037    * @param[in]      fftLen           length of the FFT.
2038    * @param[in]      *pCoef           points to twiddle coefficient buffer.
2039    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
2040    * @param[in]      onebyfftLen      value of 1/fftLen.
2041    * @return none.
2042    */
2043 
2044   void arm_radix4_butterfly_inverse_f32(
2045 					float32_t * pSrc,
2046 					uint16_t fftLen,
2047 					float32_t * pCoef,
2048 					uint16_t twidCoefModifier,
2049 					float32_t onebyfftLen);
2050 
2051   /**
2052    * @brief  In-place bit reversal function.
2053    * @param[in, out] *pSrc        points to the in-place buffer of floating-point data type.
2054    * @param[in]      fftSize      length of the FFT.
2055    * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table.
2056    * @param[in]      *pBitRevTab  points to the bit reversal table.
2057    * @return none.
2058    */
2059 
2060   void arm_bitreversal_f32(
2061 			   float32_t *pSrc,
2062 			   uint16_t fftSize,
2063 			   uint16_t bitRevFactor,
2064 			   uint16_t *pBitRevTab);
2065 
2066   /**
2067    * @brief  Core function for the Q31 CFFT butterfly process.
2068    * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.
2069    * @param[in]      fftLen           length of the FFT.
2070    * @param[in]      *pCoef           points to twiddle coefficient buffer.
2071    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
2072    * @return none.
2073    */
2074 
2075   void arm_radix4_butterfly_q31(
2076 				q31_t *pSrc,
2077 				uint32_t fftLen,
2078 				q31_t *pCoef,
2079 				uint32_t twidCoefModifier);
2080 
2081   /**
2082    * @brief  Core function for the Q31 CIFFT butterfly process.
2083    * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.
2084    * @param[in]      fftLen           length of the FFT.
2085    * @param[in]      *pCoef           points to twiddle coefficient buffer.
2086    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
2087    * @return none.
2088    */
2089 
2090   void arm_radix4_butterfly_inverse_q31(
2091 					q31_t * pSrc,
2092 					uint32_t fftLen,
2093 					q31_t * pCoef,
2094 					uint32_t twidCoefModifier);
2095 
2096   /**
2097    * @brief  In-place bit reversal function.
2098    * @param[in, out] *pSrc        points to the in-place buffer of Q31 data type.
2099    * @param[in]      fftLen       length of the FFT.
2100    * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
2101    * @param[in]      *pBitRevTab  points to bit reversal table.
2102    * @return none.
2103    */
2104 
2105   void arm_bitreversal_q31(
2106 			   q31_t * pSrc,
2107 			   uint32_t fftLen,
2108 			   uint16_t bitRevFactor,
2109 			   uint16_t *pBitRevTab);
2110 
2111   /**
2112    * @brief  Core function for the Q15 CFFT butterfly process.
2113    * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.
2114    * @param[in]      fftLen           length of the FFT.
2115    * @param[in]      *pCoef16         points to twiddle coefficient buffer.
2116    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
2117    * @return none.
2118    */
2119 
2120   void arm_radix4_butterfly_q15(
2121 				q15_t *pSrc16,
2122 				uint32_t fftLen,
2123 				q15_t *pCoef16,
2124 				uint32_t twidCoefModifier);
2125 
2126   /**
2127    * @brief  Core function for the Q15 CIFFT butterfly process.
2128    * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.
2129    * @param[in]      fftLen           length of the FFT.
2130    * @param[in]      *pCoef16         points to twiddle coefficient buffer.
2131    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
2132    * @return none.
2133    */
2134 
2135   void arm_radix4_butterfly_inverse_q15(
2136 					q15_t *pSrc16,
2137 					uint32_t fftLen,
2138 					q15_t *pCoef16,
2139 					uint32_t twidCoefModifier);
2140 
2141   /**
2142    * @brief  In-place bit reversal function.
2143    * @param[in, out] *pSrc        points to the in-place buffer of Q15 data type.
2144    * @param[in]      fftLen       length of the FFT.
2145    * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
2146    * @param[in]      *pBitRevTab  points to bit reversal table.
2147    * @return none.
2148    */
2149 
2150   void arm_bitreversal_q15(
2151 			   q15_t * pSrc,
2152 			   uint32_t fftLen,
2153 			   uint16_t bitRevFactor,
2154 			   uint16_t *pBitRevTab);
2155 
2156   /**
2157    * @brief Instance structure for the Q15 RFFT/RIFFT function.
2158    */
2159 
2160   typedef struct
2161   {
2162     uint32_t fftLenReal;                      /**< length of the real FFT. */
2163     uint32_t fftLenBy2;                       /**< length of the complex FFT. */
2164     uint8_t  ifftFlagR;                       /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2165 	uint8_t  bitReverseFlagR;                 /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2166     uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2167     q15_t    *pTwiddleAReal;                  /**< points to the real twiddle factor table. */
2168     q15_t    *pTwiddleBReal;                  /**< points to the imag twiddle factor table. */
2169     arm_cfft_radix4_instance_q15 *pCfft;	  /**< points to the complex FFT instance. */
2170   } arm_rfft_instance_q15;
2171 
2172   /**
2173    * @brief Instance structure for the Q31 RFFT/RIFFT function.
2174    */
2175 
2176   typedef struct
2177   {
2178     uint32_t fftLenReal;                        /**< length of the real FFT. */
2179     uint32_t fftLenBy2;                         /**< length of the complex FFT. */
2180     uint8_t  ifftFlagR;                         /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2181 	uint8_t  bitReverseFlagR;                   /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2182     uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2183     q31_t    *pTwiddleAReal;                    /**< points to the real twiddle factor table. */
2184     q31_t    *pTwiddleBReal;                    /**< points to the imag twiddle factor table. */
2185     arm_cfft_radix4_instance_q31 *pCfft;        /**< points to the complex FFT instance. */
2186   } arm_rfft_instance_q31;
2187 
2188   /**
2189    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2190    */
2191 
2192   typedef struct
2193   {
2194     uint32_t  fftLenReal;                       /**< length of the real FFT. */
2195     uint16_t  fftLenBy2;                        /**< length of the complex FFT. */
2196     uint8_t   ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2197     uint8_t   bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2198 	uint32_t  twidCoefRModifier;                /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2199     float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
2200     float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
2201     arm_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
2202   } arm_rfft_instance_f32;
2203 
2204   /**
2205    * @brief Processing function for the Q15 RFFT/RIFFT.
2206    * @param[in]  *S    points to an instance of the Q15 RFFT/RIFFT structure.
2207    * @param[in]  *pSrc points to the input buffer.
2208    * @param[out] *pDst points to the output buffer.
2209    * @return none.
2210    */
2211 
2212   void arm_rfft_q15(
2213 		    const arm_rfft_instance_q15 * S,
2214 		    q15_t * pSrc,
2215 		    q15_t * pDst);
2216 
2217   /**
2218    * @brief  Initialization function for the Q15 RFFT/RIFFT.
2219    * @param[in, out] *S             points to an instance of the Q15 RFFT/RIFFT structure.
2220    * @param[in]      *S_CFFT        points to an instance of the Q15 CFFT/CIFFT structure.
2221    * @param[in]      fftLenReal     length of the FFT.
2222    * @param[in]      ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform.
2223    * @param[in]      bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
2224    * @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value.
2225    */
2226 
2227   arm_status arm_rfft_init_q15(
2228 			       arm_rfft_instance_q15 * S,
2229 			       arm_cfft_radix4_instance_q15 * S_CFFT,
2230 			       uint32_t fftLenReal,
2231 			       uint32_t ifftFlagR,
2232 			       uint32_t bitReverseFlag);
2233 
2234   /**
2235    * @brief Processing function for the Q31 RFFT/RIFFT.
2236    * @param[in]  *S    points to an instance of the Q31 RFFT/RIFFT structure.
2237    * @param[in]  *pSrc points to the input buffer.
2238    * @param[out] *pDst points to the output buffer.
2239    * @return none.
2240    */
2241 
2242   void arm_rfft_q31(
2243 		    const arm_rfft_instance_q31 * S,
2244 		    q31_t * pSrc,
2245 		    q31_t * pDst);
2246 
2247   /**
2248    * @brief  Initialization function for the Q31 RFFT/RIFFT.
2249    * @param[in, out] *S             points to an instance of the Q31 RFFT/RIFFT structure.
2250    * @param[in, out] *S_CFFT        points to an instance of the Q31 CFFT/CIFFT structure.
2251    * @param[in]      fftLenReal     length of the FFT.
2252    * @param[in]      ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform.
2253    * @param[in]      bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
2254    * @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value.
2255    */
2256 
2257   arm_status arm_rfft_init_q31(
2258 			       arm_rfft_instance_q31 * S,
2259 			       arm_cfft_radix4_instance_q31 * S_CFFT,
2260 			       uint32_t fftLenReal,
2261 			       uint32_t ifftFlagR,
2262 			       uint32_t bitReverseFlag);
2263 
2264   /**
2265    * @brief  Initialization function for the floating-point RFFT/RIFFT.
2266    * @param[in,out] *S             points to an instance of the floating-point RFFT/RIFFT structure.
2267    * @param[in,out] *S_CFFT        points to an instance of the floating-point CFFT/CIFFT structure.
2268    * @param[in]     fftLenReal     length of the FFT.
2269    * @param[in]     ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform.
2270    * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
2271    * @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value.
2272    */
2273 
2274   arm_status arm_rfft_init_f32(
2275 			       arm_rfft_instance_f32 * S,
2276 			       arm_cfft_radix4_instance_f32 * S_CFFT,
2277 			       uint32_t fftLenReal,
2278 			       uint32_t ifftFlagR,
2279 			       uint32_t bitReverseFlag);
2280 
2281   /**
2282    * @brief Processing function for the floating-point RFFT/RIFFT.
2283    * @param[in]  *S    points to an instance of the floating-point RFFT/RIFFT structure.
2284    * @param[in]  *pSrc points to the input buffer.
2285    * @param[out] *pDst points to the output buffer.
2286    * @return none.
2287    */
2288 
2289   void arm_rfft_f32(
2290 		    const arm_rfft_instance_f32 * S,
2291 		    float32_t * pSrc,
2292 		    float32_t * pDst);
2293 
2294   /**
2295    * @brief Instance structure for the floating-point DCT4/IDCT4 function.
2296    */
2297 
2298   typedef struct
2299   {
2300     uint16_t N;                         /**< length of the DCT4. */
2301     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2302     float32_t normalize;                /**< normalizing factor. */
2303     float32_t *pTwiddle;                /**< points to the twiddle factor table. */
2304     float32_t *pCosFactor;              /**< points to the cosFactor table. */
2305     arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
2306     arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
2307   } arm_dct4_instance_f32;
2308 
2309   /**
2310    * @brief  Initialization function for the floating-point DCT4/IDCT4.
2311    * @param[in,out] *S         points to an instance of floating-point DCT4/IDCT4 structure.
2312    * @param[in]     *S_RFFT    points to an instance of floating-point RFFT/RIFFT structure.
2313    * @param[in]     *S_CFFT    points to an instance of floating-point CFFT/CIFFT structure.
2314    * @param[in]     N          length of the DCT4.
2315    * @param[in]     Nby2       half of the length of the DCT4.
2316    * @param[in]     normalize  normalizing factor.
2317    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
2318    */
2319 
2320   arm_status arm_dct4_init_f32(
2321 			       arm_dct4_instance_f32 * S,
2322 			       arm_rfft_instance_f32 * S_RFFT,
2323 			       arm_cfft_radix4_instance_f32 * S_CFFT,
2324 			       uint16_t N,
2325 			       uint16_t Nby2,
2326 			       float32_t normalize);
2327 
2328   /**
2329    * @brief Processing function for the floating-point DCT4/IDCT4.
2330    * @param[in]       *S             points to an instance of the floating-point DCT4/IDCT4 structure.
2331    * @param[in]       *pState        points to state buffer.
2332    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2333    * @return none.
2334    */
2335 
2336   void arm_dct4_f32(
2337 		    const arm_dct4_instance_f32 * S,
2338 		    float32_t * pState,
2339 		    float32_t * pInlineBuffer);
2340 
2341   /**
2342    * @brief Instance structure for the Q31 DCT4/IDCT4 function.
2343    */
2344 
2345   typedef struct
2346   {
2347     uint16_t N;                         /**< length of the DCT4. */
2348     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2349     q31_t normalize;                    /**< normalizing factor. */
2350     q31_t *pTwiddle;                    /**< points to the twiddle factor table. */
2351     q31_t *pCosFactor;                  /**< points to the cosFactor table. */
2352     arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
2353     arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
2354   } arm_dct4_instance_q31;
2355 
2356   /**
2357    * @brief  Initialization function for the Q31 DCT4/IDCT4.
2358    * @param[in,out] *S         points to an instance of Q31 DCT4/IDCT4 structure.
2359    * @param[in]     *S_RFFT    points to an instance of Q31 RFFT/RIFFT structure
2360    * @param[in]     *S_CFFT    points to an instance of Q31 CFFT/CIFFT structure
2361    * @param[in]     N          length of the DCT4.
2362    * @param[in]     Nby2       half of the length of the DCT4.
2363    * @param[in]     normalize  normalizing factor.
2364    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2365    */
2366 
2367   arm_status arm_dct4_init_q31(
2368 			       arm_dct4_instance_q31 * S,
2369 			       arm_rfft_instance_q31 * S_RFFT,
2370 			       arm_cfft_radix4_instance_q31 * S_CFFT,
2371 			       uint16_t N,
2372 			       uint16_t Nby2,
2373 			       q31_t normalize);
2374 
2375   /**
2376    * @brief Processing function for the Q31 DCT4/IDCT4.
2377    * @param[in]       *S             points to an instance of the Q31 DCT4 structure.
2378    * @param[in]       *pState        points to state buffer.
2379    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2380    * @return none.
2381    */
2382 
2383   void arm_dct4_q31(
2384 		    const arm_dct4_instance_q31 * S,
2385 		    q31_t * pState,
2386 		    q31_t * pInlineBuffer);
2387 
2388   /**
2389    * @brief Instance structure for the Q15 DCT4/IDCT4 function.
2390    */
2391 
2392   typedef struct
2393   {
2394     uint16_t N;                         /**< length of the DCT4. */
2395     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2396     q15_t normalize;                    /**< normalizing factor. */
2397     q15_t *pTwiddle;                    /**< points to the twiddle factor table. */
2398     q15_t *pCosFactor;                  /**< points to the cosFactor table. */
2399     arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
2400     arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
2401   } arm_dct4_instance_q15;
2402 
2403   /**
2404    * @brief  Initialization function for the Q15 DCT4/IDCT4.
2405    * @param[in,out] *S         points to an instance of Q15 DCT4/IDCT4 structure.
2406    * @param[in]     *S_RFFT    points to an instance of Q15 RFFT/RIFFT structure.
2407    * @param[in]     *S_CFFT    points to an instance of Q15 CFFT/CIFFT structure.
2408    * @param[in]     N          length of the DCT4.
2409    * @param[in]     Nby2       half of the length of the DCT4.
2410    * @param[in]     normalize  normalizing factor.
2411    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2412    */
2413 
2414   arm_status arm_dct4_init_q15(
2415 			       arm_dct4_instance_q15 * S,
2416 			       arm_rfft_instance_q15 * S_RFFT,
2417 			       arm_cfft_radix4_instance_q15 * S_CFFT,
2418 			       uint16_t N,
2419 			       uint16_t Nby2,
2420 			       q15_t normalize);
2421 
2422   /**
2423    * @brief Processing function for the Q15 DCT4/IDCT4.
2424    * @param[in]       *S             points to an instance of the Q15 DCT4 structure.
2425    * @param[in]       *pState        points to state buffer.
2426    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2427    * @return none.
2428    */
2429 
2430   void arm_dct4_q15(
2431 		    const arm_dct4_instance_q15 * S,
2432 		    q15_t * pState,
2433 		    q15_t * pInlineBuffer);
2434 
2435   /**
2436    * @brief Floating-point vector addition.
2437    * @param[in]       *pSrcA points to the first input vector
2438    * @param[in]       *pSrcB points to the second input vector
2439    * @param[out]      *pDst points to the output vector
2440    * @param[in]       blockSize number of samples in each vector
2441    * @return none.
2442    */
2443 
2444   void arm_add_f32(
2445 		   float32_t * pSrcA,
2446 		   float32_t * pSrcB,
2447 		   float32_t * pDst,
2448 		   uint32_t blockSize);
2449 
2450   /**
2451    * @brief Q7 vector addition.
2452    * @param[in]       *pSrcA points to the first input vector
2453    * @param[in]       *pSrcB points to the second input vector
2454    * @param[out]      *pDst points to the output vector
2455    * @param[in]       blockSize number of samples in each vector
2456    * @return none.
2457    */
2458 
2459   void arm_add_q7(
2460 		  q7_t * pSrcA,
2461 		  q7_t * pSrcB,
2462 		  q7_t * pDst,
2463 		  uint32_t blockSize);
2464 
2465   /**
2466    * @brief Q15 vector addition.
2467    * @param[in]       *pSrcA points to the first input vector
2468    * @param[in]       *pSrcB points to the second input vector
2469    * @param[out]      *pDst points to the output vector
2470    * @param[in]       blockSize number of samples in each vector
2471    * @return none.
2472    */
2473 
2474   void arm_add_q15(
2475 		    q15_t * pSrcA,
2476 		    q15_t * pSrcB,
2477 		   q15_t * pDst,
2478 		   uint32_t blockSize);
2479 
2480   /**
2481    * @brief Q31 vector addition.
2482    * @param[in]       *pSrcA points to the first input vector
2483    * @param[in]       *pSrcB points to the second input vector
2484    * @param[out]      *pDst points to the output vector
2485    * @param[in]       blockSize number of samples in each vector
2486    * @return none.
2487    */
2488 
2489   void arm_add_q31(
2490 		    q31_t * pSrcA,
2491 		    q31_t * pSrcB,
2492 		   q31_t * pDst,
2493 		   uint32_t blockSize);
2494 
2495   /**
2496    * @brief Floating-point vector subtraction.
2497    * @param[in]       *pSrcA points to the first input vector
2498    * @param[in]       *pSrcB points to the second input vector
2499    * @param[out]      *pDst points to the output vector
2500    * @param[in]       blockSize number of samples in each vector
2501    * @return none.
2502    */
2503 
2504   void arm_sub_f32(
2505 		    float32_t * pSrcA,
2506 		    float32_t * pSrcB,
2507 		   float32_t * pDst,
2508 		   uint32_t blockSize);
2509 
2510   /**
2511    * @brief Q7 vector subtraction.
2512    * @param[in]       *pSrcA points to the first input vector
2513    * @param[in]       *pSrcB points to the second input vector
2514    * @param[out]      *pDst points to the output vector
2515    * @param[in]       blockSize number of samples in each vector
2516    * @return none.
2517    */
2518 
2519   void arm_sub_q7(
2520 		   q7_t * pSrcA,
2521 		   q7_t * pSrcB,
2522 		  q7_t * pDst,
2523 		  uint32_t blockSize);
2524 
2525   /**
2526    * @brief Q15 vector subtraction.
2527    * @param[in]       *pSrcA points to the first input vector
2528    * @param[in]       *pSrcB points to the second input vector
2529    * @param[out]      *pDst points to the output vector
2530    * @param[in]       blockSize number of samples in each vector
2531    * @return none.
2532    */
2533 
2534   void arm_sub_q15(
2535 		    q15_t * pSrcA,
2536 		    q15_t * pSrcB,
2537 		   q15_t * pDst,
2538 		   uint32_t blockSize);
2539 
2540   /**
2541    * @brief Q31 vector subtraction.
2542    * @param[in]       *pSrcA points to the first input vector
2543    * @param[in]       *pSrcB points to the second input vector
2544    * @param[out]      *pDst points to the output vector
2545    * @param[in]       blockSize number of samples in each vector
2546    * @return none.
2547    */
2548 
2549   void arm_sub_q31(
2550 		    q31_t * pSrcA,
2551 		    q31_t * pSrcB,
2552 		   q31_t * pDst,
2553 		   uint32_t blockSize);
2554 
2555   /**
2556    * @brief Multiplies a floating-point vector by a scalar.
2557    * @param[in]       *pSrc points to the input vector
2558    * @param[in]       scale scale factor to be applied
2559    * @param[out]      *pDst points to the output vector
2560    * @param[in]       blockSize number of samples in the vector
2561    * @return none.
2562    */
2563 
2564   void arm_scale_f32(
2565 		      float32_t * pSrc,
2566 		     float32_t scale,
2567 		     float32_t * pDst,
2568 		     uint32_t blockSize);
2569 
2570   /**
2571    * @brief Multiplies a Q7 vector by a scalar.
2572    * @param[in]       *pSrc points to the input vector
2573    * @param[in]       scaleFract fractional portion of the scale value
2574    * @param[in]       shift number of bits to shift the result by
2575    * @param[out]      *pDst points to the output vector
2576    * @param[in]       blockSize number of samples in the vector
2577    * @return none.
2578    */
2579 
2580   void arm_scale_q7(
2581 		     q7_t * pSrc,
2582 		    q7_t scaleFract,
2583 		    int8_t shift,
2584 		    q7_t * pDst,
2585 		    uint32_t blockSize);
2586 
2587   /**
2588    * @brief Multiplies a Q15 vector by a scalar.
2589    * @param[in]       *pSrc points to the input vector
2590    * @param[in]       scaleFract fractional portion of the scale value
2591    * @param[in]       shift number of bits to shift the result by
2592    * @param[out]      *pDst points to the output vector
2593    * @param[in]       blockSize number of samples in the vector
2594    * @return none.
2595    */
2596 
2597   void arm_scale_q15(
2598 		      q15_t * pSrc,
2599 		     q15_t scaleFract,
2600 		     int8_t shift,
2601 		     q15_t * pDst,
2602 		     uint32_t blockSize);
2603 
2604   /**
2605    * @brief Multiplies a Q31 vector by a scalar.
2606    * @param[in]       *pSrc points to the input vector
2607    * @param[in]       scaleFract fractional portion of the scale value
2608    * @param[in]       shift number of bits to shift the result by
2609    * @param[out]      *pDst points to the output vector
2610    * @param[in]       blockSize number of samples in the vector
2611    * @return none.
2612    */
2613 
2614   void arm_scale_q31(
2615 		      q31_t * pSrc,
2616 		     q31_t scaleFract,
2617 		     int8_t shift,
2618 		     q31_t * pDst,
2619 		     uint32_t blockSize);
2620 
2621   /**
2622    * @brief Q7 vector absolute value.
2623    * @param[in]       *pSrc points to the input buffer
2624    * @param[out]      *pDst points to the output buffer
2625    * @param[in]       blockSize number of samples in each vector
2626    * @return none.
2627    */
2628 
2629   void arm_abs_q7(
2630 		   q7_t * pSrc,
2631 		  q7_t * pDst,
2632 		  uint32_t blockSize);
2633 
2634   /**
2635    * @brief Floating-point vector absolute value.
2636    * @param[in]       *pSrc points to the input buffer
2637    * @param[out]      *pDst points to the output buffer
2638    * @param[in]       blockSize number of samples in each vector
2639    * @return none.
2640    */
2641 
2642   void arm_abs_f32(
2643 		    float32_t * pSrc,
2644 		   float32_t * pDst,
2645 		   uint32_t blockSize);
2646 
2647   /**
2648    * @brief Q15 vector absolute value.
2649    * @param[in]       *pSrc points to the input buffer
2650    * @param[out]      *pDst points to the output buffer
2651    * @param[in]       blockSize number of samples in each vector
2652    * @return none.
2653    */
2654 
2655   void arm_abs_q15(
2656 		    q15_t * pSrc,
2657 		   q15_t * pDst,
2658 		   uint32_t blockSize);
2659 
2660   /**
2661    * @brief Q31 vector absolute value.
2662    * @param[in]       *pSrc points to the input buffer
2663    * @param[out]      *pDst points to the output buffer
2664    * @param[in]       blockSize number of samples in each vector
2665    * @return none.
2666    */
2667 
2668   void arm_abs_q31(
2669 		    q31_t * pSrc,
2670 		   q31_t * pDst,
2671 		   uint32_t blockSize);
2672 
2673   /**
2674    * @brief Dot product of floating-point vectors.
2675    * @param[in]       *pSrcA points to the first input vector
2676    * @param[in]       *pSrcB points to the second input vector
2677    * @param[in]       blockSize number of samples in each vector
2678    * @param[out]      *result output result returned here
2679    * @return none.
2680    */
2681 
2682   void arm_dot_prod_f32(
2683 			 float32_t * pSrcA,
2684 			 float32_t * pSrcB,
2685 			uint32_t blockSize,
2686 			float32_t * result);
2687 
2688   /**
2689    * @brief Dot product of Q7 vectors.
2690    * @param[in]       *pSrcA points to the first input vector
2691    * @param[in]       *pSrcB points to the second input vector
2692    * @param[in]       blockSize number of samples in each vector
2693    * @param[out]      *result output result returned here
2694    * @return none.
2695    */
2696 
2697   void arm_dot_prod_q7(
2698 		        q7_t * pSrcA,
2699 		        q7_t * pSrcB,
2700 		       uint32_t blockSize,
2701 		       q31_t * result);
2702 
2703   /**
2704    * @brief Dot product of Q15 vectors.
2705    * @param[in]       *pSrcA points to the first input vector
2706    * @param[in]       *pSrcB points to the second input vector
2707    * @param[in]       blockSize number of samples in each vector
2708    * @param[out]      *result output result returned here
2709    * @return none.
2710    */
2711 
2712   void arm_dot_prod_q15(
2713 			 q15_t * pSrcA,
2714 			 q15_t * pSrcB,
2715 			uint32_t blockSize,
2716 			q63_t * result);
2717 
2718   /**
2719    * @brief Dot product of Q31 vectors.
2720    * @param[in]       *pSrcA points to the first input vector
2721    * @param[in]       *pSrcB points to the second input vector
2722    * @param[in]       blockSize number of samples in each vector
2723    * @param[out]      *result output result returned here
2724    * @return none.
2725    */
2726 
2727   void arm_dot_prod_q31(
2728 			 q31_t * pSrcA,
2729 			 q31_t * pSrcB,
2730 			uint32_t blockSize,
2731 			q63_t * result);
2732 
2733   /**
2734    * @brief  Shifts the elements of a Q7 vector a specified number of bits.
2735    * @param[in]  *pSrc points to the input vector
2736    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2737    * @param[out]  *pDst points to the output vector
2738    * @param[in]  blockSize number of samples in the vector
2739    * @return none.
2740    */
2741 
2742   void arm_shift_q7(
2743 		     q7_t * pSrc,
2744 		    int8_t shiftBits,
2745 		    q7_t * pDst,
2746 		    uint32_t blockSize);
2747 
2748   /**
2749    * @brief  Shifts the elements of a Q15 vector a specified number of bits.
2750    * @param[in]  *pSrc points to the input vector
2751    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2752    * @param[out]  *pDst points to the output vector
2753    * @param[in]  blockSize number of samples in the vector
2754    * @return none.
2755    */
2756 
2757   void arm_shift_q15(
2758 		      q15_t * pSrc,
2759 		     int8_t shiftBits,
2760 		     q15_t * pDst,
2761 		     uint32_t blockSize);
2762 
2763   /**
2764    * @brief  Shifts the elements of a Q31 vector a specified number of bits.
2765    * @param[in]  *pSrc points to the input vector
2766    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2767    * @param[out]  *pDst points to the output vector
2768    * @param[in]  blockSize number of samples in the vector
2769    * @return none.
2770    */
2771 
2772   void arm_shift_q31(
2773 		      q31_t * pSrc,
2774 		     int8_t shiftBits,
2775 		     q31_t * pDst,
2776 		     uint32_t blockSize);
2777 
2778   /**
2779    * @brief  Adds a constant offset to a floating-point vector.
2780    * @param[in]  *pSrc points to the input vector
2781    * @param[in]  offset is the offset to be added
2782    * @param[out]  *pDst points to the output vector
2783    * @param[in]  blockSize number of samples in the vector
2784    * @return none.
2785    */
2786 
2787   void arm_offset_f32(
2788 		       float32_t * pSrc,
2789 		      float32_t offset,
2790 		      float32_t * pDst,
2791 		      uint32_t blockSize);
2792 
2793   /**
2794    * @brief  Adds a constant offset to a Q7 vector.
2795    * @param[in]  *pSrc points to the input vector
2796    * @param[in]  offset is the offset to be added
2797    * @param[out]  *pDst points to the output vector
2798    * @param[in]  blockSize number of samples in the vector
2799    * @return none.
2800    */
2801 
2802   void arm_offset_q7(
2803 		      q7_t * pSrc,
2804 		     q7_t offset,
2805 		     q7_t * pDst,
2806 		     uint32_t blockSize);
2807 
2808   /**
2809    * @brief  Adds a constant offset to a Q15 vector.
2810    * @param[in]  *pSrc points to the input vector
2811    * @param[in]  offset is the offset to be added
2812    * @param[out]  *pDst points to the output vector
2813    * @param[in]  blockSize number of samples in the vector
2814    * @return none.
2815    */
2816 
2817   void arm_offset_q15(
2818 		       q15_t * pSrc,
2819 		      q15_t offset,
2820 		      q15_t * pDst,
2821 		      uint32_t blockSize);
2822 
2823   /**
2824    * @brief  Adds a constant offset to a Q31 vector.
2825    * @param[in]  *pSrc points to the input vector
2826    * @param[in]  offset is the offset to be added
2827    * @param[out]  *pDst points to the output vector
2828    * @param[in]  blockSize number of samples in the vector
2829    * @return none.
2830    */
2831 
2832   void arm_offset_q31(
2833 		       q31_t * pSrc,
2834 		      q31_t offset,
2835 		      q31_t * pDst,
2836 		      uint32_t blockSize);
2837 
2838   /**
2839    * @brief  Negates the elements of a floating-point vector.
2840    * @param[in]  *pSrc points to the input vector
2841    * @param[out]  *pDst points to the output vector
2842    * @param[in]  blockSize number of samples in the vector
2843    * @return none.
2844    */
2845 
2846   void arm_negate_f32(
2847 		       float32_t * pSrc,
2848 		      float32_t * pDst,
2849 		      uint32_t blockSize);
2850 
2851   /**
2852    * @brief  Negates the elements of a Q7 vector.
2853    * @param[in]  *pSrc points to the input vector
2854    * @param[out]  *pDst points to the output vector
2855    * @param[in]  blockSize number of samples in the vector
2856    * @return none.
2857    */
2858 
2859   void arm_negate_q7(
2860 		      q7_t * pSrc,
2861 		     q7_t * pDst,
2862 		     uint32_t blockSize);
2863 
2864   /**
2865    * @brief  Negates the elements of a Q15 vector.
2866    * @param[in]  *pSrc points to the input vector
2867    * @param[out]  *pDst points to the output vector
2868    * @param[in]  blockSize number of samples in the vector
2869    * @return none.
2870    */
2871 
2872   void arm_negate_q15(
2873 		       q15_t * pSrc,
2874 		      q15_t * pDst,
2875 		      uint32_t blockSize);
2876 
2877   /**
2878    * @brief  Negates the elements of a Q31 vector.
2879    * @param[in]  *pSrc points to the input vector
2880    * @param[out]  *pDst points to the output vector
2881    * @param[in]  blockSize number of samples in the vector
2882    * @return none.
2883    */
2884 
2885   void arm_negate_q31(
2886 		       q31_t * pSrc,
2887 		      q31_t * pDst,
2888 		      uint32_t blockSize);
2889   /**
2890    * @brief  Copies the elements of a floating-point vector.
2891    * @param[in]  *pSrc input pointer
2892    * @param[out]  *pDst output pointer
2893    * @param[in]  blockSize number of samples to process
2894    * @return none.
2895    */
2896   void arm_copy_f32(
2897 		     float32_t * pSrc,
2898 		    float32_t * pDst,
2899 		    uint32_t blockSize);
2900 
2901   /**
2902    * @brief  Copies the elements of a Q7 vector.
2903    * @param[in]  *pSrc input pointer
2904    * @param[out]  *pDst output pointer
2905    * @param[in]  blockSize number of samples to process
2906    * @return none.
2907    */
2908   void arm_copy_q7(
2909 		    q7_t * pSrc,
2910 		   q7_t * pDst,
2911 		   uint32_t blockSize);
2912 
2913   /**
2914    * @brief  Copies the elements of a Q15 vector.
2915    * @param[in]  *pSrc input pointer
2916    * @param[out]  *pDst output pointer
2917    * @param[in]  blockSize number of samples to process
2918    * @return none.
2919    */
2920   void arm_copy_q15(
2921 		     q15_t * pSrc,
2922 		    q15_t * pDst,
2923 		    uint32_t blockSize);
2924 
2925   /**
2926    * @brief  Copies the elements of a Q31 vector.
2927    * @param[in]  *pSrc input pointer
2928    * @param[out]  *pDst output pointer
2929    * @param[in]  blockSize number of samples to process
2930    * @return none.
2931    */
2932   void arm_copy_q31(
2933 		     q31_t * pSrc,
2934 		    q31_t * pDst,
2935 		    uint32_t blockSize);
2936   /**
2937    * @brief  Fills a constant value into a floating-point vector.
2938    * @param[in]  value input value to be filled
2939    * @param[out]  *pDst output pointer
2940    * @param[in]  blockSize number of samples to process
2941    * @return none.
2942    */
2943   void arm_fill_f32(
2944 		     float32_t value,
2945 		    float32_t * pDst,
2946 		    uint32_t blockSize);
2947 
2948   /**
2949    * @brief  Fills a constant value into a Q7 vector.
2950    * @param[in]  value input value to be filled
2951    * @param[out]  *pDst output pointer
2952    * @param[in]  blockSize number of samples to process
2953    * @return none.
2954    */
2955   void arm_fill_q7(
2956 		    q7_t value,
2957 		   q7_t * pDst,
2958 		   uint32_t blockSize);
2959 
2960   /**
2961    * @brief  Fills a constant value into a Q15 vector.
2962    * @param[in]  value input value to be filled
2963    * @param[out]  *pDst output pointer
2964    * @param[in]  blockSize number of samples to process
2965    * @return none.
2966    */
2967   void arm_fill_q15(
2968 		     q15_t value,
2969 		    q15_t * pDst,
2970 		    uint32_t blockSize);
2971 
2972   /**
2973    * @brief  Fills a constant value into a Q31 vector.
2974    * @param[in]  value input value to be filled
2975    * @param[out]  *pDst output pointer
2976    * @param[in]  blockSize number of samples to process
2977    * @return none.
2978    */
2979   void arm_fill_q31(
2980 		     q31_t value,
2981 		    q31_t * pDst,
2982 		    uint32_t blockSize);
2983 
2984 /**
2985  * @brief Convolution of floating-point sequences.
2986  * @param[in] *pSrcA points to the first input sequence.
2987  * @param[in] srcALen length of the first input sequence.
2988  * @param[in] *pSrcB points to the second input sequence.
2989  * @param[in] srcBLen length of the second input sequence.
2990  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
2991  * @return none.
2992  */
2993 
2994   void arm_conv_f32(
2995 		     float32_t * pSrcA,
2996 		    uint32_t srcALen,
2997 		     float32_t * pSrcB,
2998 		    uint32_t srcBLen,
2999 		    float32_t * pDst);
3000 
3001 /**
3002  * @brief Convolution of Q15 sequences.
3003  * @param[in] *pSrcA points to the first input sequence.
3004  * @param[in] srcALen length of the first input sequence.
3005  * @param[in] *pSrcB points to the second input sequence.
3006  * @param[in] srcBLen length of the second input sequence.
3007  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
3008  * @return none.
3009  */
3010 
3011   void arm_conv_q15(
3012 		     q15_t * pSrcA,
3013 		    uint32_t srcALen,
3014 		     q15_t * pSrcB,
3015 		    uint32_t srcBLen,
3016 		    q15_t * pDst);
3017 
3018   /**
3019    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3020    * @param[in] *pSrcA points to the first input sequence.
3021    * @param[in] srcALen length of the first input sequence.
3022    * @param[in] *pSrcB points to the second input sequence.
3023    * @param[in] srcBLen length of the second input sequence.
3024    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3025    * @return none.
3026    */
3027 
3028   void arm_conv_fast_q15(
3029 			  q15_t * pSrcA,
3030 			 uint32_t srcALen,
3031 			  q15_t * pSrcB,
3032 			 uint32_t srcBLen,
3033 			 q15_t * pDst);
3034 
3035   /**
3036    * @brief Convolution of Q31 sequences.
3037    * @param[in] *pSrcA points to the first input sequence.
3038    * @param[in] srcALen length of the first input sequence.
3039    * @param[in] *pSrcB points to the second input sequence.
3040    * @param[in] srcBLen length of the second input sequence.
3041    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3042    * @return none.
3043    */
3044 
3045   void arm_conv_q31(
3046 		     q31_t * pSrcA,
3047 		    uint32_t srcALen,
3048 		     q31_t * pSrcB,
3049 		    uint32_t srcBLen,
3050 		    q31_t * pDst);
3051 
3052   /**
3053    * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3054    * @param[in] *pSrcA points to the first input sequence.
3055    * @param[in] srcALen length of the first input sequence.
3056    * @param[in] *pSrcB points to the second input sequence.
3057    * @param[in] srcBLen length of the second input sequence.
3058    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3059    * @return none.
3060    */
3061 
3062   void arm_conv_fast_q31(
3063 			  q31_t * pSrcA,
3064 			 uint32_t srcALen,
3065 			  q31_t * pSrcB,
3066 			 uint32_t srcBLen,
3067 			 q31_t * pDst);
3068 
3069   /**
3070    * @brief Convolution of Q7 sequences.
3071    * @param[in] *pSrcA points to the first input sequence.
3072    * @param[in] srcALen length of the first input sequence.
3073    * @param[in] *pSrcB points to the second input sequence.
3074    * @param[in] srcBLen length of the second input sequence.
3075    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3076    * @return none.
3077    */
3078 
3079   void arm_conv_q7(
3080 		    q7_t * pSrcA,
3081 		   uint32_t srcALen,
3082 		    q7_t * pSrcB,
3083 		   uint32_t srcBLen,
3084 		   q7_t * pDst);
3085 
3086   /**
3087    * @brief Partial convolution of floating-point sequences.
3088    * @param[in]       *pSrcA points to the first input sequence.
3089    * @param[in]       srcALen length of the first input sequence.
3090    * @param[in]       *pSrcB points to the second input sequence.
3091    * @param[in]       srcBLen length of the second input sequence.
3092    * @param[out]      *pDst points to the block of output data
3093    * @param[in]       firstIndex is the first output sample to start with.
3094    * @param[in]       numPoints is the number of output points to be computed.
3095    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3096    */
3097 
3098   arm_status arm_conv_partial_f32(
3099 				   float32_t * pSrcA,
3100 				  uint32_t srcALen,
3101 				   float32_t * pSrcB,
3102 				  uint32_t srcBLen,
3103 				  float32_t * pDst,
3104 				  uint32_t firstIndex,
3105 				  uint32_t numPoints);
3106 
3107   /**
3108    * @brief Partial convolution of Q15 sequences.
3109    * @param[in]       *pSrcA points to the first input sequence.
3110    * @param[in]       srcALen length of the first input sequence.
3111    * @param[in]       *pSrcB points to the second input sequence.
3112    * @param[in]       srcBLen length of the second input sequence.
3113    * @param[out]      *pDst points to the block of output data
3114    * @param[in]       firstIndex is the first output sample to start with.
3115    * @param[in]       numPoints is the number of output points to be computed.
3116    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3117    */
3118 
3119   arm_status arm_conv_partial_q15(
3120 				   q15_t * pSrcA,
3121 				  uint32_t srcALen,
3122 				   q15_t * pSrcB,
3123 				  uint32_t srcBLen,
3124 				  q15_t * pDst,
3125 				  uint32_t firstIndex,
3126 				  uint32_t numPoints);
3127 
3128   /**
3129    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3130    * @param[in]       *pSrcA points to the first input sequence.
3131    * @param[in]       srcALen length of the first input sequence.
3132    * @param[in]       *pSrcB points to the second input sequence.
3133    * @param[in]       srcBLen length of the second input sequence.
3134    * @param[out]      *pDst points to the block of output data
3135    * @param[in]       firstIndex is the first output sample to start with.
3136    * @param[in]       numPoints is the number of output points to be computed.
3137    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3138    */
3139 
3140   arm_status arm_conv_partial_fast_q15(
3141 				        q15_t * pSrcA,
3142 				       uint32_t srcALen,
3143 				        q15_t * pSrcB,
3144 				       uint32_t srcBLen,
3145 				       q15_t * pDst,
3146 				       uint32_t firstIndex,
3147 				       uint32_t numPoints);
3148 
3149   /**
3150    * @brief Partial convolution of Q31 sequences.
3151    * @param[in]       *pSrcA points to the first input sequence.
3152    * @param[in]       srcALen length of the first input sequence.
3153    * @param[in]       *pSrcB points to the second input sequence.
3154    * @param[in]       srcBLen length of the second input sequence.
3155    * @param[out]      *pDst points to the block of output data
3156    * @param[in]       firstIndex is the first output sample to start with.
3157    * @param[in]       numPoints is the number of output points to be computed.
3158    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3159    */
3160 
3161   arm_status arm_conv_partial_q31(
3162 				   q31_t * pSrcA,
3163 				  uint32_t srcALen,
3164 				   q31_t * pSrcB,
3165 				  uint32_t srcBLen,
3166 				  q31_t * pDst,
3167 				  uint32_t firstIndex,
3168 				  uint32_t numPoints);
3169 
3170 
3171   /**
3172    * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3173    * @param[in]       *pSrcA points to the first input sequence.
3174    * @param[in]       srcALen length of the first input sequence.
3175    * @param[in]       *pSrcB points to the second input sequence.
3176    * @param[in]       srcBLen length of the second input sequence.
3177    * @param[out]      *pDst points to the block of output data
3178    * @param[in]       firstIndex is the first output sample to start with.
3179    * @param[in]       numPoints is the number of output points to be computed.
3180    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3181    */
3182 
3183   arm_status arm_conv_partial_fast_q31(
3184 				        q31_t * pSrcA,
3185 				       uint32_t srcALen,
3186 				        q31_t * pSrcB,
3187 				       uint32_t srcBLen,
3188 				       q31_t * pDst,
3189 				       uint32_t firstIndex,
3190 				       uint32_t numPoints);
3191 
3192   /**
3193    * @brief Partial convolution of Q7 sequences.
3194    * @param[in]       *pSrcA points to the first input sequence.
3195    * @param[in]       srcALen length of the first input sequence.
3196    * @param[in]       *pSrcB points to the second input sequence.
3197    * @param[in]       srcBLen length of the second input sequence.
3198    * @param[out]      *pDst points to the block of output data
3199    * @param[in]       firstIndex is the first output sample to start with.
3200    * @param[in]       numPoints is the number of output points to be computed.
3201    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3202    */
3203 
3204   arm_status arm_conv_partial_q7(
3205 				  q7_t * pSrcA,
3206 				 uint32_t srcALen,
3207 				  q7_t * pSrcB,
3208 				 uint32_t srcBLen,
3209 				 q7_t * pDst,
3210 				 uint32_t firstIndex,
3211 				 uint32_t numPoints);
3212 
3213 
3214   /**
3215    * @brief Instance structure for the Q15 FIR decimator.
3216    */
3217 
3218   typedef struct
3219   {
3220     uint8_t M;                      /**< decimation factor. */
3221     uint16_t numTaps;               /**< number of coefficients in the filter. */
3222     q15_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
3223     q15_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3224   } arm_fir_decimate_instance_q15;
3225 
3226   /**
3227    * @brief Instance structure for the Q31 FIR decimator.
3228    */
3229 
3230   typedef struct
3231   {
3232     uint8_t M;                  /**< decimation factor. */
3233     uint16_t numTaps;           /**< number of coefficients in the filter. */
3234     q31_t *pCoeffs;              /**< points to the coefficient array. The array is of length numTaps.*/
3235     q31_t *pState;               /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3236 
3237   } arm_fir_decimate_instance_q31;
3238 
3239   /**
3240    * @brief Instance structure for the floating-point FIR decimator.
3241    */
3242 
3243   typedef struct
3244   {
3245     uint8_t M;                          /**< decimation factor. */
3246     uint16_t numTaps;                   /**< number of coefficients in the filter. */
3247     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
3248     float32_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3249 
3250   } arm_fir_decimate_instance_f32;
3251 
3252 
3253 
3254   /**
3255    * @brief Processing function for the floating-point FIR decimator.
3256    * @param[in] *S points to an instance of the floating-point FIR decimator structure.
3257    * @param[in] *pSrc points to the block of input data.
3258    * @param[out] *pDst points to the block of output data
3259    * @param[in] blockSize number of input samples to process per call.
3260    * @return none
3261    */
3262 
3263   void arm_fir_decimate_f32(
3264 			    const arm_fir_decimate_instance_f32 * S,
3265 			     float32_t * pSrc,
3266 			    float32_t * pDst,
3267 			    uint32_t blockSize);
3268 
3269 
3270   /**
3271    * @brief  Initialization function for the floating-point FIR decimator.
3272    * @param[in,out] *S points to an instance of the floating-point FIR decimator structure.
3273    * @param[in] numTaps  number of coefficients in the filter.
3274    * @param[in] M  decimation factor.
3275    * @param[in] *pCoeffs points to the filter coefficients.
3276    * @param[in] *pState points to the state buffer.
3277    * @param[in] blockSize number of input samples to process per call.
3278    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3279    * <code>blockSize</code> is not a multiple of <code>M</code>.
3280    */
3281 
3282   arm_status arm_fir_decimate_init_f32(
3283 				       arm_fir_decimate_instance_f32 * S,
3284 				       uint16_t numTaps,
3285 				       uint8_t M,
3286 				       float32_t * pCoeffs,
3287 				       float32_t * pState,
3288 				       uint32_t blockSize);
3289 
3290   /**
3291    * @brief Processing function for the Q15 FIR decimator.
3292    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
3293    * @param[in] *pSrc points to the block of input data.
3294    * @param[out] *pDst points to the block of output data
3295    * @param[in] blockSize number of input samples to process per call.
3296    * @return none
3297    */
3298 
3299   void arm_fir_decimate_q15(
3300 			    const arm_fir_decimate_instance_q15 * S,
3301 			     q15_t * pSrc,
3302 			    q15_t * pDst,
3303 			    uint32_t blockSize);
3304 
3305   /**
3306    * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3307    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
3308    * @param[in] *pSrc points to the block of input data.
3309    * @param[out] *pDst points to the block of output data
3310    * @param[in] blockSize number of input samples to process per call.
3311    * @return none
3312    */
3313 
3314   void arm_fir_decimate_fast_q15(
3315 				 const arm_fir_decimate_instance_q15 * S,
3316 				  q15_t * pSrc,
3317 				 q15_t * pDst,
3318 				 uint32_t blockSize);
3319 
3320 
3321 
3322   /**
3323    * @brief  Initialization function for the Q15 FIR decimator.
3324    * @param[in,out] *S points to an instance of the Q15 FIR decimator structure.
3325    * @param[in] numTaps  number of coefficients in the filter.
3326    * @param[in] M  decimation factor.
3327    * @param[in] *pCoeffs points to the filter coefficients.
3328    * @param[in] *pState points to the state buffer.
3329    * @param[in] blockSize number of input samples to process per call.
3330    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3331    * <code>blockSize</code> is not a multiple of <code>M</code>.
3332    */
3333 
3334   arm_status arm_fir_decimate_init_q15(
3335 				       arm_fir_decimate_instance_q15 * S,
3336 				       uint16_t numTaps,
3337 				       uint8_t M,
3338 				       q15_t * pCoeffs,
3339 				       q15_t * pState,
3340 				       uint32_t blockSize);
3341 
3342   /**
3343    * @brief Processing function for the Q31 FIR decimator.
3344    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
3345    * @param[in] *pSrc points to the block of input data.
3346    * @param[out] *pDst points to the block of output data
3347    * @param[in] blockSize number of input samples to process per call.
3348    * @return none
3349    */
3350 
3351   void arm_fir_decimate_q31(
3352 			    const arm_fir_decimate_instance_q31 * S,
3353 			     q31_t * pSrc,
3354 			    q31_t * pDst,
3355 			    uint32_t blockSize);
3356 
3357   /**
3358    * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3359    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
3360    * @param[in] *pSrc points to the block of input data.
3361    * @param[out] *pDst points to the block of output data
3362    * @param[in] blockSize number of input samples to process per call.
3363    * @return none
3364    */
3365 
3366   void arm_fir_decimate_fast_q31(
3367 				 arm_fir_decimate_instance_q31 * S,
3368 				  q31_t * pSrc,
3369 				 q31_t * pDst,
3370 				 uint32_t blockSize);
3371 
3372 
3373   /**
3374    * @brief  Initialization function for the Q31 FIR decimator.
3375    * @param[in,out] *S points to an instance of the Q31 FIR decimator structure.
3376    * @param[in] numTaps  number of coefficients in the filter.
3377    * @param[in] M  decimation factor.
3378    * @param[in] *pCoeffs points to the filter coefficients.
3379    * @param[in] *pState points to the state buffer.
3380    * @param[in] blockSize number of input samples to process per call.
3381    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3382    * <code>blockSize</code> is not a multiple of <code>M</code>.
3383    */
3384 
3385   arm_status arm_fir_decimate_init_q31(
3386 				       arm_fir_decimate_instance_q31 * S,
3387 				       uint16_t numTaps,
3388 				       uint8_t M,
3389 				       q31_t * pCoeffs,
3390 				       q31_t * pState,
3391 				       uint32_t blockSize);
3392 
3393 
3394 
3395   /**
3396    * @brief Instance structure for the Q15 FIR interpolator.
3397    */
3398 
3399   typedef struct
3400   {
3401     uint8_t L;                      /**< upsample factor. */
3402     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3403     q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
3404     q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3405   } arm_fir_interpolate_instance_q15;
3406 
3407   /**
3408    * @brief Instance structure for the Q31 FIR interpolator.
3409    */
3410 
3411   typedef struct
3412   {
3413     uint8_t L;                      /**< upsample factor. */
3414     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3415     q31_t *pCoeffs;                  /**< points to the coefficient array. The array is of length L*phaseLength. */
3416     q31_t *pState;                   /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3417   } arm_fir_interpolate_instance_q31;
3418 
3419   /**
3420    * @brief Instance structure for the floating-point FIR interpolator.
3421    */
3422 
3423   typedef struct
3424   {
3425     uint8_t L;                     /**< upsample factor. */
3426     uint16_t phaseLength;          /**< length of each polyphase filter component. */
3427     float32_t *pCoeffs;             /**< points to the coefficient array. The array is of length L*phaseLength. */
3428     float32_t *pState;              /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
3429   } arm_fir_interpolate_instance_f32;
3430 
3431 
3432   /**
3433    * @brief Processing function for the Q15 FIR interpolator.
3434    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
3435    * @param[in] *pSrc     points to the block of input data.
3436    * @param[out] *pDst    points to the block of output data.
3437    * @param[in] blockSize number of input samples to process per call.
3438    * @return none.
3439    */
3440 
3441   void arm_fir_interpolate_q15(
3442 			       const arm_fir_interpolate_instance_q15 * S,
3443 			        q15_t * pSrc,
3444 			       q15_t * pDst,
3445 			       uint32_t blockSize);
3446 
3447 
3448   /**
3449    * @brief  Initialization function for the Q15 FIR interpolator.
3450    * @param[in,out] *S        points to an instance of the Q15 FIR interpolator structure.
3451    * @param[in]     L         upsample factor.
3452    * @param[in]     numTaps   number of filter coefficients in the filter.
3453    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3454    * @param[in]     *pState   points to the state buffer.
3455    * @param[in]     blockSize number of input samples to process per call.
3456    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3457    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3458    */
3459 
3460   arm_status arm_fir_interpolate_init_q15(
3461 					  arm_fir_interpolate_instance_q15 * S,
3462 					  uint8_t L,
3463 					  uint16_t numTaps,
3464 					  q15_t * pCoeffs,
3465 					  q15_t * pState,
3466 					  uint32_t blockSize);
3467 
3468   /**
3469    * @brief Processing function for the Q31 FIR interpolator.
3470    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
3471    * @param[in] *pSrc     points to the block of input data.
3472    * @param[out] *pDst    points to the block of output data.
3473    * @param[in] blockSize number of input samples to process per call.
3474    * @return none.
3475    */
3476 
3477   void arm_fir_interpolate_q31(
3478 			       const arm_fir_interpolate_instance_q31 * S,
3479 			        q31_t * pSrc,
3480 			       q31_t * pDst,
3481 			       uint32_t blockSize);
3482 
3483   /**
3484    * @brief  Initialization function for the Q31 FIR interpolator.
3485    * @param[in,out] *S        points to an instance of the Q31 FIR interpolator structure.
3486    * @param[in]     L         upsample factor.
3487    * @param[in]     numTaps   number of filter coefficients in the filter.
3488    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3489    * @param[in]     *pState   points to the state buffer.
3490    * @param[in]     blockSize number of input samples to process per call.
3491    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3492    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3493    */
3494 
3495   arm_status arm_fir_interpolate_init_q31(
3496 					  arm_fir_interpolate_instance_q31 * S,
3497 					  uint8_t L,
3498 					  uint16_t numTaps,
3499 					  q31_t * pCoeffs,
3500 					  q31_t * pState,
3501 					  uint32_t blockSize);
3502 
3503 
3504   /**
3505    * @brief Processing function for the floating-point FIR interpolator.
3506    * @param[in] *S        points to an instance of the floating-point FIR interpolator structure.
3507    * @param[in] *pSrc     points to the block of input data.
3508    * @param[out] *pDst    points to the block of output data.
3509    * @param[in] blockSize number of input samples to process per call.
3510    * @return none.
3511    */
3512 
3513   void arm_fir_interpolate_f32(
3514 			       const arm_fir_interpolate_instance_f32 * S,
3515 			        float32_t * pSrc,
3516 			       float32_t * pDst,
3517 			       uint32_t blockSize);
3518 
3519   /**
3520    * @brief  Initialization function for the floating-point FIR interpolator.
3521    * @param[in,out] *S        points to an instance of the floating-point FIR interpolator structure.
3522    * @param[in]     L         upsample factor.
3523    * @param[in]     numTaps   number of filter coefficients in the filter.
3524    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3525    * @param[in]     *pState   points to the state buffer.
3526    * @param[in]     blockSize number of input samples to process per call.
3527    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3528    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3529    */
3530 
3531   arm_status arm_fir_interpolate_init_f32(
3532 					  arm_fir_interpolate_instance_f32 * S,
3533 					  uint8_t L,
3534 					  uint16_t numTaps,
3535 					  float32_t * pCoeffs,
3536 					  float32_t * pState,
3537 					  uint32_t blockSize);
3538 
3539   /**
3540    * @brief Instance structure for the high precision Q31 Biquad cascade filter.
3541    */
3542 
3543   typedef struct
3544   {
3545     uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3546     q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
3547     q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
3548     uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
3549 
3550   } arm_biquad_cas_df1_32x64_ins_q31;
3551 
3552 
3553   /**
3554    * @param[in]  *S        points to an instance of the high precision Q31 Biquad cascade filter structure.
3555    * @param[in]  *pSrc     points to the block of input data.
3556    * @param[out] *pDst     points to the block of output data
3557    * @param[in]  blockSize number of samples to process.
3558    * @return none.
3559    */
3560 
3561   void arm_biquad_cas_df1_32x64_q31(
3562 				    const arm_biquad_cas_df1_32x64_ins_q31 * S,
3563 				     q31_t * pSrc,
3564 				    q31_t * pDst,
3565 				    uint32_t blockSize);
3566 
3567 
3568   /**
3569    * @param[in,out] *S           points to an instance of the high precision Q31 Biquad cascade filter structure.
3570    * @param[in]     numStages    number of 2nd order stages in the filter.
3571    * @param[in]     *pCoeffs     points to the filter coefficients.
3572    * @param[in]     *pState      points to the state buffer.
3573    * @param[in]     postShift    shift to be applied to the output. Varies according to the coefficients format
3574    * @return        none
3575    */
3576 
3577   void arm_biquad_cas_df1_32x64_init_q31(
3578 					 arm_biquad_cas_df1_32x64_ins_q31 * S,
3579 					 uint8_t numStages,
3580 					 q31_t * pCoeffs,
3581 					 q63_t * pState,
3582 					 uint8_t postShift);
3583 
3584 
3585 
3586   /**
3587    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3588    */
3589 
3590   typedef struct
3591   {
3592     uint8_t   numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3593     float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
3594     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3595   } arm_biquad_cascade_df2T_instance_f32;
3596 
3597 
3598   /**
3599    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
3600    * @param[in]  *S        points to an instance of the filter data structure.
3601    * @param[in]  *pSrc     points to the block of input data.
3602    * @param[out] *pDst     points to the block of output data
3603    * @param[in]  blockSize number of samples to process.
3604    * @return none.
3605    */
3606 
3607   void arm_biquad_cascade_df2T_f32(
3608 				   const arm_biquad_cascade_df2T_instance_f32 * S,
3609 				    float32_t * pSrc,
3610 				   float32_t * pDst,
3611 				   uint32_t blockSize);
3612 
3613 
3614   /**
3615    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3616    * @param[in,out] *S           points to an instance of the filter data structure.
3617    * @param[in]     numStages    number of 2nd order stages in the filter.
3618    * @param[in]     *pCoeffs     points to the filter coefficients.
3619    * @param[in]     *pState      points to the state buffer.
3620    * @return        none
3621    */
3622 
3623   void arm_biquad_cascade_df2T_init_f32(
3624 					arm_biquad_cascade_df2T_instance_f32 * S,
3625 					uint8_t numStages,
3626 					float32_t * pCoeffs,
3627 					float32_t * pState);
3628 
3629 
3630 
3631   /**
3632    * @brief Instance structure for the Q15 FIR lattice filter.
3633    */
3634 
3635   typedef struct
3636   {
3637     uint16_t numStages;                          /**< number of filter stages. */
3638     q15_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
3639     q15_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
3640   } arm_fir_lattice_instance_q15;
3641 
3642   /**
3643    * @brief Instance structure for the Q31 FIR lattice filter.
3644    */
3645 
3646   typedef struct
3647   {
3648     uint16_t numStages;                          /**< number of filter stages. */
3649     q31_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
3650     q31_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
3651   } arm_fir_lattice_instance_q31;
3652 
3653   /**
3654    * @brief Instance structure for the floating-point FIR lattice filter.
3655    */
3656 
3657   typedef struct
3658   {
3659     uint16_t numStages;                  /**< number of filter stages. */
3660     float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
3661     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
3662   } arm_fir_lattice_instance_f32;
3663 
3664   /**
3665    * @brief Initialization function for the Q15 FIR lattice filter.
3666    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
3667    * @param[in] numStages  number of filter stages.
3668    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3669    * @param[in] *pState points to the state buffer.  The array is of length numStages.
3670    * @return none.
3671    */
3672 
3673   void arm_fir_lattice_init_q15(
3674 				arm_fir_lattice_instance_q15 * S,
3675 				uint16_t numStages,
3676 				q15_t * pCoeffs,
3677 				q15_t * pState);
3678 
3679 
3680   /**
3681    * @brief Processing function for the Q15 FIR lattice filter.
3682    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
3683    * @param[in] *pSrc points to the block of input data.
3684    * @param[out] *pDst points to the block of output data.
3685    * @param[in] blockSize number of samples to process.
3686    * @return none.
3687    */
3688   void arm_fir_lattice_q15(
3689 			   const arm_fir_lattice_instance_q15 * S,
3690 			    q15_t * pSrc,
3691 			   q15_t * pDst,
3692 			   uint32_t blockSize);
3693 
3694   /**
3695    * @brief Initialization function for the Q31 FIR lattice filter.
3696    * @param[in] *S points to an instance of the Q31 FIR lattice structure.
3697    * @param[in] numStages  number of filter stages.
3698    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3699    * @param[in] *pState points to the state buffer.   The array is of length numStages.
3700    * @return none.
3701    */
3702 
3703   void arm_fir_lattice_init_q31(
3704 				arm_fir_lattice_instance_q31 * S,
3705 				uint16_t numStages,
3706 				q31_t * pCoeffs,
3707 				q31_t * pState);
3708 
3709 
3710   /**
3711    * @brief Processing function for the Q31 FIR lattice filter.
3712    * @param[in]  *S        points to an instance of the Q31 FIR lattice structure.
3713    * @param[in]  *pSrc     points to the block of input data.
3714    * @param[out] *pDst     points to the block of output data
3715    * @param[in]  blockSize number of samples to process.
3716    * @return none.
3717    */
3718 
3719   void arm_fir_lattice_q31(
3720 			   const arm_fir_lattice_instance_q31 * S,
3721 			    q31_t * pSrc,
3722 			   q31_t * pDst,
3723 			   uint32_t blockSize);
3724 
3725 /**
3726  * @brief Initialization function for the floating-point FIR lattice filter.
3727  * @param[in] *S points to an instance of the floating-point FIR lattice structure.
3728  * @param[in] numStages  number of filter stages.
3729  * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3730  * @param[in] *pState points to the state buffer.  The array is of length numStages.
3731  * @return none.
3732  */
3733 
3734   void arm_fir_lattice_init_f32(
3735 				arm_fir_lattice_instance_f32 * S,
3736 				uint16_t numStages,
3737 				float32_t * pCoeffs,
3738 				float32_t * pState);
3739 
3740   /**
3741    * @brief Processing function for the floating-point FIR lattice filter.
3742    * @param[in]  *S        points to an instance of the floating-point FIR lattice structure.
3743    * @param[in]  *pSrc     points to the block of input data.
3744    * @param[out] *pDst     points to the block of output data
3745    * @param[in]  blockSize number of samples to process.
3746    * @return none.
3747    */
3748 
3749   void arm_fir_lattice_f32(
3750 			   const arm_fir_lattice_instance_f32 * S,
3751 			    float32_t * pSrc,
3752 			   float32_t * pDst,
3753 			   uint32_t blockSize);
3754 
3755   /**
3756    * @brief Instance structure for the Q15 IIR lattice filter.
3757    */
3758   typedef struct
3759   {
3760     uint16_t numStages;                         /**< number of stages in the filter. */
3761     q15_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
3762     q15_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
3763     q15_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
3764   } arm_iir_lattice_instance_q15;
3765 
3766   /**
3767    * @brief Instance structure for the Q31 IIR lattice filter.
3768    */
3769   typedef struct
3770   {
3771     uint16_t numStages;                         /**< number of stages in the filter. */
3772     q31_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
3773     q31_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
3774     q31_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
3775   } arm_iir_lattice_instance_q31;
3776 
3777   /**
3778    * @brief Instance structure for the floating-point IIR lattice filter.
3779    */
3780   typedef struct
3781   {
3782     uint16_t numStages;                         /**< number of stages in the filter. */
3783     float32_t *pState;                          /**< points to the state variable array. The array is of length numStages+blockSize. */
3784     float32_t *pkCoeffs;                        /**< points to the reflection coefficient array. The array is of length numStages. */
3785     float32_t *pvCoeffs;                        /**< points to the ladder coefficient array. The array is of length numStages+1. */
3786   } arm_iir_lattice_instance_f32;
3787 
3788   /**
3789    * @brief Processing function for the floating-point IIR lattice filter.
3790    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
3791    * @param[in] *pSrc points to the block of input data.
3792    * @param[out] *pDst points to the block of output data.
3793    * @param[in] blockSize number of samples to process.
3794    * @return none.
3795    */
3796 
3797   void arm_iir_lattice_f32(
3798 			   const arm_iir_lattice_instance_f32 * S,
3799 			    float32_t * pSrc,
3800 			   float32_t * pDst,
3801 			   uint32_t blockSize);
3802 
3803   /**
3804    * @brief Initialization function for the floating-point IIR lattice filter.
3805    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
3806    * @param[in] numStages number of stages in the filter.
3807    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
3808    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
3809    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize-1.
3810    * @param[in] blockSize number of samples to process.
3811    * @return none.
3812    */
3813 
3814   void arm_iir_lattice_init_f32(
3815 				arm_iir_lattice_instance_f32 * S,
3816 				uint16_t numStages,
3817 				float32_t *pkCoeffs,
3818 				float32_t *pvCoeffs,
3819 				float32_t *pState,
3820 				uint32_t blockSize);
3821 
3822 
3823   /**
3824    * @brief Processing function for the Q31 IIR lattice filter.
3825    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
3826    * @param[in] *pSrc points to the block of input data.
3827    * @param[out] *pDst points to the block of output data.
3828    * @param[in] blockSize number of samples to process.
3829    * @return none.
3830    */
3831 
3832   void arm_iir_lattice_q31(
3833 			   const arm_iir_lattice_instance_q31 * S,
3834 			    q31_t * pSrc,
3835 			   q31_t * pDst,
3836 			   uint32_t blockSize);
3837 
3838 
3839   /**
3840    * @brief Initialization function for the Q31 IIR lattice filter.
3841    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
3842    * @param[in] numStages number of stages in the filter.
3843    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
3844    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
3845    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize.
3846    * @param[in] blockSize number of samples to process.
3847    * @return none.
3848    */
3849 
3850   void arm_iir_lattice_init_q31(
3851 				arm_iir_lattice_instance_q31 * S,
3852 				uint16_t numStages,
3853 				q31_t *pkCoeffs,
3854 				q31_t *pvCoeffs,
3855 				q31_t *pState,
3856 				uint32_t blockSize);
3857 
3858 
3859   /**
3860    * @brief Processing function for the Q15 IIR lattice filter.
3861    * @param[in] *S points to an instance of the Q15 IIR lattice structure.
3862    * @param[in] *pSrc points to the block of input data.
3863    * @param[out] *pDst points to the block of output data.
3864    * @param[in] blockSize number of samples to process.
3865    * @return none.
3866    */
3867 
3868   void arm_iir_lattice_q15(
3869 			   const arm_iir_lattice_instance_q15 * S,
3870 			    q15_t * pSrc,
3871 			   q15_t * pDst,
3872 			   uint32_t blockSize);
3873 
3874 
3875 /**
3876  * @brief Initialization function for the Q15 IIR lattice filter.
3877  * @param[in] *S points to an instance of the fixed-point Q15 IIR lattice structure.
3878  * @param[in] numStages  number of stages in the filter.
3879  * @param[in] *pkCoeffs points to reflection coefficient buffer.  The array is of length numStages.
3880  * @param[in] *pvCoeffs points to ladder coefficient buffer.  The array is of length numStages+1.
3881  * @param[in] *pState points to state buffer.  The array is of length numStages+blockSize.
3882  * @param[in] blockSize number of samples to process per call.
3883  * @return none.
3884  */
3885 
3886   void arm_iir_lattice_init_q15(
3887 				arm_iir_lattice_instance_q15 * S,
3888 				uint16_t numStages,
3889 				q15_t *pkCoeffs,
3890 				q15_t *pvCoeffs,
3891 				q15_t *pState,
3892 				uint32_t blockSize);
3893 
3894   /**
3895    * @brief Instance structure for the floating-point LMS filter.
3896    */
3897 
3898   typedef struct
3899   {
3900     uint16_t numTaps;    /**< number of coefficients in the filter. */
3901     float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3902     float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
3903     float32_t mu;        /**< step size that controls filter coefficient updates. */
3904   } arm_lms_instance_f32;
3905 
3906   /**
3907    * @brief Processing function for floating-point LMS filter.
3908    * @param[in]  *S points to an instance of the floating-point LMS filter structure.
3909    * @param[in]  *pSrc points to the block of input data.
3910    * @param[in]  *pRef points to the block of reference data.
3911    * @param[out] *pOut points to the block of output data.
3912    * @param[out] *pErr points to the block of error data.
3913    * @param[in]  blockSize number of samples to process.
3914    * @return     none.
3915    */
3916 
3917   void arm_lms_f32(
3918 		   const arm_lms_instance_f32 * S,
3919 		    float32_t * pSrc,
3920 		    float32_t * pRef,
3921 		   float32_t * pOut,
3922 		   float32_t * pErr,
3923 		   uint32_t blockSize);
3924 
3925   /**
3926    * @brief Initialization function for floating-point LMS filter.
3927    * @param[in] *S points to an instance of the floating-point LMS filter structure.
3928    * @param[in] numTaps  number of filter coefficients.
3929    * @param[in] *pCoeffs points to the coefficient buffer.
3930    * @param[in] *pState points to state buffer.
3931    * @param[in] mu step size that controls filter coefficient updates.
3932    * @param[in] blockSize number of samples to process.
3933    * @return none.
3934    */
3935 
3936   void arm_lms_init_f32(
3937 			arm_lms_instance_f32 * S,
3938 			uint16_t numTaps,
3939 			float32_t * pCoeffs,
3940 			float32_t * pState,
3941 			float32_t mu,
3942 			uint32_t blockSize);
3943 
3944   /**
3945    * @brief Instance structure for the Q15 LMS filter.
3946    */
3947 
3948   typedef struct
3949   {
3950     uint16_t numTaps;    /**< number of coefficients in the filter. */
3951     q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3952     q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
3953     q15_t mu;            /**< step size that controls filter coefficient updates. */
3954     uint32_t postShift;  /**< bit shift applied to coefficients. */
3955   } arm_lms_instance_q15;
3956 
3957 
3958   /**
3959    * @brief Initialization function for the Q15 LMS filter.
3960    * @param[in] *S points to an instance of the Q15 LMS filter structure.
3961    * @param[in] numTaps  number of filter coefficients.
3962    * @param[in] *pCoeffs points to the coefficient buffer.
3963    * @param[in] *pState points to the state buffer.
3964    * @param[in] mu step size that controls filter coefficient updates.
3965    * @param[in] blockSize number of samples to process.
3966    * @param[in] postShift bit shift applied to coefficients.
3967    * @return    none.
3968    */
3969 
3970   void arm_lms_init_q15(
3971 			arm_lms_instance_q15 * S,
3972 			uint16_t numTaps,
3973 			q15_t * pCoeffs,
3974 			q15_t * pState,
3975 			q15_t mu,
3976 			uint32_t blockSize,
3977 			uint32_t postShift);
3978 
3979   /**
3980    * @brief Processing function for Q15 LMS filter.
3981    * @param[in] *S points to an instance of the Q15 LMS filter structure.
3982    * @param[in] *pSrc points to the block of input data.
3983    * @param[in] *pRef points to the block of reference data.
3984    * @param[out] *pOut points to the block of output data.
3985    * @param[out] *pErr points to the block of error data.
3986    * @param[in] blockSize number of samples to process.
3987    * @return none.
3988    */
3989 
3990   void arm_lms_q15(
3991 		   const arm_lms_instance_q15 * S,
3992 		    q15_t * pSrc,
3993 		    q15_t * pRef,
3994 		   q15_t * pOut,
3995 		   q15_t * pErr,
3996 		   uint32_t blockSize);
3997 
3998 
3999   /**
4000    * @brief Instance structure for the Q31 LMS filter.
4001    */
4002 
4003   typedef struct
4004   {
4005     uint16_t numTaps;    /**< number of coefficients in the filter. */
4006     q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4007     q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4008     q31_t mu;            /**< step size that controls filter coefficient updates. */
4009     uint32_t postShift;  /**< bit shift applied to coefficients. */
4010 
4011   } arm_lms_instance_q31;
4012 
4013   /**
4014    * @brief Processing function for Q31 LMS filter.
4015    * @param[in]  *S points to an instance of the Q15 LMS filter structure.
4016    * @param[in]  *pSrc points to the block of input data.
4017    * @param[in]  *pRef points to the block of reference data.
4018    * @param[out] *pOut points to the block of output data.
4019    * @param[out] *pErr points to the block of error data.
4020    * @param[in]  blockSize number of samples to process.
4021    * @return     none.
4022    */
4023 
4024   void arm_lms_q31(
4025 		   const arm_lms_instance_q31 * S,
4026 		    q31_t * pSrc,
4027 		    q31_t * pRef,
4028 		   q31_t * pOut,
4029 		   q31_t * pErr,
4030 		   uint32_t blockSize);
4031 
4032   /**
4033    * @brief Initialization function for Q31 LMS filter.
4034    * @param[in] *S points to an instance of the Q31 LMS filter structure.
4035    * @param[in] numTaps  number of filter coefficients.
4036    * @param[in] *pCoeffs points to coefficient buffer.
4037    * @param[in] *pState points to state buffer.
4038    * @param[in] mu step size that controls filter coefficient updates.
4039    * @param[in] blockSize number of samples to process.
4040    * @param[in] postShift bit shift applied to coefficients.
4041    * @return none.
4042    */
4043 
4044   void arm_lms_init_q31(
4045 			arm_lms_instance_q31 * S,
4046 			uint16_t numTaps,
4047 			q31_t *pCoeffs,
4048 			q31_t *pState,
4049 			q31_t mu,
4050 			uint32_t blockSize,
4051 			uint32_t postShift);
4052 
4053   /**
4054    * @brief Instance structure for the floating-point normalized LMS filter.
4055    */
4056 
4057   typedef struct
4058   {
4059     uint16_t  numTaps;    /**< number of coefficients in the filter. */
4060     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4061     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
4062     float32_t mu;        /**< step size that control filter coefficient updates. */
4063     float32_t energy;    /**< saves previous frame energy. */
4064     float32_t x0;        /**< saves previous input sample. */
4065   } arm_lms_norm_instance_f32;
4066 
4067   /**
4068    * @brief Processing function for floating-point normalized LMS filter.
4069    * @param[in] *S points to an instance of the floating-point normalized LMS filter structure.
4070    * @param[in] *pSrc points to the block of input data.
4071    * @param[in] *pRef points to the block of reference data.
4072    * @param[out] *pOut points to the block of output data.
4073    * @param[out] *pErr points to the block of error data.
4074    * @param[in] blockSize number of samples to process.
4075    * @return none.
4076    */
4077 
4078   void arm_lms_norm_f32(
4079 			arm_lms_norm_instance_f32 * S,
4080 			 float32_t * pSrc,
4081 			 float32_t * pRef,
4082 			float32_t * pOut,
4083 			float32_t * pErr,
4084 			uint32_t blockSize);
4085 
4086   /**
4087    * @brief Initialization function for floating-point normalized LMS filter.
4088    * @param[in] *S points to an instance of the floating-point LMS filter structure.
4089    * @param[in] numTaps  number of filter coefficients.
4090    * @param[in] *pCoeffs points to coefficient buffer.
4091    * @param[in] *pState points to state buffer.
4092    * @param[in] mu step size that controls filter coefficient updates.
4093    * @param[in] blockSize number of samples to process.
4094    * @return none.
4095    */
4096 
4097   void arm_lms_norm_init_f32(
4098 			     arm_lms_norm_instance_f32 * S,
4099 			     uint16_t numTaps,
4100 			     float32_t * pCoeffs,
4101 			     float32_t * pState,
4102 			     float32_t mu,
4103 			     uint32_t blockSize);
4104 
4105 
4106   /**
4107    * @brief Instance structure for the Q31 normalized LMS filter.
4108    */
4109   typedef struct
4110   {
4111     uint16_t numTaps;     /**< number of coefficients in the filter. */
4112     q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4113     q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4114     q31_t mu;             /**< step size that controls filter coefficient updates. */
4115     uint8_t postShift;    /**< bit shift applied to coefficients. */
4116     q31_t *recipTable;    /**< points to the reciprocal initial value table. */
4117     q31_t energy;         /**< saves previous frame energy. */
4118     q31_t x0;             /**< saves previous input sample. */
4119   } arm_lms_norm_instance_q31;
4120 
4121   /**
4122    * @brief Processing function for Q31 normalized LMS filter.
4123    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
4124    * @param[in] *pSrc points to the block of input data.
4125    * @param[in] *pRef points to the block of reference data.
4126    * @param[out] *pOut points to the block of output data.
4127    * @param[out] *pErr points to the block of error data.
4128    * @param[in] blockSize number of samples to process.
4129    * @return none.
4130    */
4131 
4132   void arm_lms_norm_q31(
4133 			arm_lms_norm_instance_q31 * S,
4134 			 q31_t * pSrc,
4135 			 q31_t * pRef,
4136 			q31_t * pOut,
4137 			q31_t * pErr,
4138 			uint32_t blockSize);
4139 
4140   /**
4141    * @brief Initialization function for Q31 normalized LMS filter.
4142    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
4143    * @param[in] numTaps  number of filter coefficients.
4144    * @param[in] *pCoeffs points to coefficient buffer.
4145    * @param[in] *pState points to state buffer.
4146    * @param[in] mu step size that controls filter coefficient updates.
4147    * @param[in] blockSize number of samples to process.
4148    * @param[in] postShift bit shift applied to coefficients.
4149    * @return none.
4150    */
4151 
4152   void arm_lms_norm_init_q31(
4153 			     arm_lms_norm_instance_q31 * S,
4154 			     uint16_t numTaps,
4155 			     q31_t * pCoeffs,
4156 			     q31_t * pState,
4157 			     q31_t mu,
4158 			     uint32_t blockSize,
4159 			     uint8_t postShift);
4160 
4161   /**
4162    * @brief Instance structure for the Q15 normalized LMS filter.
4163    */
4164 
4165   typedef struct
4166   {
4167     uint16_t numTaps;    /**< Number of coefficients in the filter. */
4168     q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4169     q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4170     q15_t mu;            /**< step size that controls filter coefficient updates. */
4171     uint8_t postShift;   /**< bit shift applied to coefficients. */
4172     q15_t *recipTable;   /**< Points to the reciprocal initial value table. */
4173     q15_t energy;        /**< saves previous frame energy. */
4174     q15_t x0;            /**< saves previous input sample. */
4175   } arm_lms_norm_instance_q15;
4176 
4177   /**
4178    * @brief Processing function for Q15 normalized LMS filter.
4179    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
4180    * @param[in] *pSrc points to the block of input data.
4181    * @param[in] *pRef points to the block of reference data.
4182    * @param[out] *pOut points to the block of output data.
4183    * @param[out] *pErr points to the block of error data.
4184    * @param[in] blockSize number of samples to process.
4185    * @return none.
4186    */
4187 
4188   void arm_lms_norm_q15(
4189 			arm_lms_norm_instance_q15 * S,
4190 			 q15_t * pSrc,
4191 			 q15_t * pRef,
4192 			q15_t * pOut,
4193 			q15_t * pErr,
4194 			uint32_t blockSize);
4195 
4196 
4197   /**
4198    * @brief Initialization function for Q15 normalized LMS filter.
4199    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
4200    * @param[in] numTaps  number of filter coefficients.
4201    * @param[in] *pCoeffs points to coefficient buffer.
4202    * @param[in] *pState points to state buffer.
4203    * @param[in] mu step size that controls filter coefficient updates.
4204    * @param[in] blockSize number of samples to process.
4205    * @param[in] postShift bit shift applied to coefficients.
4206    * @return none.
4207    */
4208 
4209   void arm_lms_norm_init_q15(
4210 			     arm_lms_norm_instance_q15 * S,
4211 			     uint16_t numTaps,
4212 			     q15_t * pCoeffs,
4213 			     q15_t * pState,
4214 			     q15_t mu,
4215 			     uint32_t blockSize,
4216 			     uint8_t postShift);
4217 
4218   /**
4219    * @brief Correlation of floating-point sequences.
4220    * @param[in] *pSrcA points to the first input sequence.
4221    * @param[in] srcALen length of the first input sequence.
4222    * @param[in] *pSrcB points to the second input sequence.
4223    * @param[in] srcBLen length of the second input sequence.
4224    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4225    * @return none.
4226    */
4227 
4228   void arm_correlate_f32(
4229 			  float32_t * pSrcA,
4230 			 uint32_t srcALen,
4231 			  float32_t * pSrcB,
4232 			 uint32_t srcBLen,
4233 			 float32_t * pDst);
4234 
4235   /**
4236    * @brief Correlation of Q15 sequences.
4237    * @param[in] *pSrcA points to the first input sequence.
4238    * @param[in] srcALen length of the first input sequence.
4239    * @param[in] *pSrcB points to the second input sequence.
4240    * @param[in] srcBLen length of the second input sequence.
4241    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4242    * @return none.
4243    */
4244 
4245   void arm_correlate_q15(
4246 			  q15_t * pSrcA,
4247 			 uint32_t srcALen,
4248 			  q15_t * pSrcB,
4249 			 uint32_t srcBLen,
4250 			 q15_t * pDst);
4251 
4252   /**
4253    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
4254    * @param[in] *pSrcA points to the first input sequence.
4255    * @param[in] srcALen length of the first input sequence.
4256    * @param[in] *pSrcB points to the second input sequence.
4257    * @param[in] srcBLen length of the second input sequence.
4258    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4259    * @return none.
4260    */
4261 
4262   void arm_correlate_fast_q15(
4263 			       q15_t * pSrcA,
4264 			      uint32_t srcALen,
4265 			       q15_t * pSrcB,
4266 			      uint32_t srcBLen,
4267 			      q15_t * pDst);
4268 
4269   /**
4270    * @brief Correlation of Q31 sequences.
4271    * @param[in] *pSrcA points to the first input sequence.
4272    * @param[in] srcALen length of the first input sequence.
4273    * @param[in] *pSrcB points to the second input sequence.
4274    * @param[in] srcBLen length of the second input sequence.
4275    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4276    * @return none.
4277    */
4278 
4279   void arm_correlate_q31(
4280 			  q31_t * pSrcA,
4281 			 uint32_t srcALen,
4282 			  q31_t * pSrcB,
4283 			 uint32_t srcBLen,
4284 			 q31_t * pDst);
4285 
4286   /**
4287    * @brief Correlation of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
4288    * @param[in] *pSrcA points to the first input sequence.
4289    * @param[in] srcALen length of the first input sequence.
4290    * @param[in] *pSrcB points to the second input sequence.
4291    * @param[in] srcBLen length of the second input sequence.
4292    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4293    * @return none.
4294    */
4295 
4296   void arm_correlate_fast_q31(
4297 			       q31_t * pSrcA,
4298 			      uint32_t srcALen,
4299 			       q31_t * pSrcB,
4300 			      uint32_t srcBLen,
4301 			      q31_t * pDst);
4302 
4303   /**
4304    * @brief Correlation of Q7 sequences.
4305    * @param[in] *pSrcA points to the first input sequence.
4306    * @param[in] srcALen length of the first input sequence.
4307    * @param[in] *pSrcB points to the second input sequence.
4308    * @param[in] srcBLen length of the second input sequence.
4309    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4310    * @return none.
4311    */
4312 
4313   void arm_correlate_q7(
4314 			 q7_t * pSrcA,
4315 			uint32_t srcALen,
4316 			 q7_t * pSrcB,
4317 			uint32_t srcBLen,
4318 			q7_t * pDst);
4319 
4320   /**
4321    * @brief Instance structure for the floating-point sparse FIR filter.
4322    */
4323   typedef struct
4324   {
4325     uint16_t numTaps;             /**< number of coefficients in the filter. */
4326     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4327     float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4328     float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
4329     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4330     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4331   } arm_fir_sparse_instance_f32;
4332 
4333   /**
4334    * @brief Instance structure for the Q31 sparse FIR filter.
4335    */
4336 
4337   typedef struct
4338   {
4339     uint16_t numTaps;             /**< number of coefficients in the filter. */
4340     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4341     q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4342     q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4343     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4344     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4345   } arm_fir_sparse_instance_q31;
4346 
4347   /**
4348    * @brief Instance structure for the Q15 sparse FIR filter.
4349    */
4350 
4351   typedef struct
4352   {
4353     uint16_t numTaps;             /**< number of coefficients in the filter. */
4354     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4355     q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4356     q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4357     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4358     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4359   } arm_fir_sparse_instance_q15;
4360 
4361   /**
4362    * @brief Instance structure for the Q7 sparse FIR filter.
4363    */
4364 
4365   typedef struct
4366   {
4367     uint16_t numTaps;             /**< number of coefficients in the filter. */
4368     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4369     q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4370     q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
4371     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4372     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4373   } arm_fir_sparse_instance_q7;
4374 
4375   /**
4376    * @brief Processing function for the floating-point sparse FIR filter.
4377    * @param[in]  *S          points to an instance of the floating-point sparse FIR structure.
4378    * @param[in]  *pSrc       points to the block of input data.
4379    * @param[out] *pDst       points to the block of output data
4380    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
4381    * @param[in]  blockSize   number of input samples to process per call.
4382    * @return none.
4383    */
4384 
4385   void arm_fir_sparse_f32(
4386 			  arm_fir_sparse_instance_f32 * S,
4387 			   float32_t * pSrc,
4388 			  float32_t * pDst,
4389 			  float32_t * pScratchIn,
4390 			  uint32_t blockSize);
4391 
4392   /**
4393    * @brief  Initialization function for the floating-point sparse FIR filter.
4394    * @param[in,out] *S         points to an instance of the floating-point sparse FIR structure.
4395    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4396    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4397    * @param[in]     *pState    points to the state buffer.
4398    * @param[in]     *pTapDelay points to the array of offset times.
4399    * @param[in]     maxDelay   maximum offset time supported.
4400    * @param[in]     blockSize  number of samples that will be processed per block.
4401    * @return none
4402    */
4403 
4404   void arm_fir_sparse_init_f32(
4405 			       arm_fir_sparse_instance_f32 * S,
4406 			       uint16_t numTaps,
4407 			       float32_t * pCoeffs,
4408 			       float32_t * pState,
4409 			       int32_t * pTapDelay,
4410 			       uint16_t maxDelay,
4411 			       uint32_t blockSize);
4412 
4413   /**
4414    * @brief Processing function for the Q31 sparse FIR filter.
4415    * @param[in]  *S          points to an instance of the Q31 sparse FIR structure.
4416    * @param[in]  *pSrc       points to the block of input data.
4417    * @param[out] *pDst       points to the block of output data
4418    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
4419    * @param[in]  blockSize   number of input samples to process per call.
4420    * @return none.
4421    */
4422 
4423   void arm_fir_sparse_q31(
4424 			  arm_fir_sparse_instance_q31 * S,
4425 			   q31_t * pSrc,
4426 			  q31_t * pDst,
4427 			  q31_t * pScratchIn,
4428 			  uint32_t blockSize);
4429 
4430   /**
4431    * @brief  Initialization function for the Q31 sparse FIR filter.
4432    * @param[in,out] *S         points to an instance of the Q31 sparse FIR structure.
4433    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4434    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4435    * @param[in]     *pState    points to the state buffer.
4436    * @param[in]     *pTapDelay points to the array of offset times.
4437    * @param[in]     maxDelay   maximum offset time supported.
4438    * @param[in]     blockSize  number of samples that will be processed per block.
4439    * @return none
4440    */
4441 
4442   void arm_fir_sparse_init_q31(
4443 			       arm_fir_sparse_instance_q31 * S,
4444 			       uint16_t numTaps,
4445 			       q31_t * pCoeffs,
4446 			       q31_t * pState,
4447 			       int32_t * pTapDelay,
4448 			       uint16_t maxDelay,
4449 			       uint32_t blockSize);
4450 
4451   /**
4452    * @brief Processing function for the Q15 sparse FIR filter.
4453    * @param[in]  *S           points to an instance of the Q15 sparse FIR structure.
4454    * @param[in]  *pSrc        points to the block of input data.
4455    * @param[out] *pDst        points to the block of output data
4456    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
4457    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
4458    * @param[in]  blockSize    number of input samples to process per call.
4459    * @return none.
4460    */
4461 
4462   void arm_fir_sparse_q15(
4463 			  arm_fir_sparse_instance_q15 * S,
4464 			   q15_t * pSrc,
4465 			  q15_t * pDst,
4466 			  q15_t * pScratchIn,
4467 			  q31_t * pScratchOut,
4468 			  uint32_t blockSize);
4469 
4470 
4471   /**
4472    * @brief  Initialization function for the Q15 sparse FIR filter.
4473    * @param[in,out] *S         points to an instance of the Q15 sparse FIR structure.
4474    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4475    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4476    * @param[in]     *pState    points to the state buffer.
4477    * @param[in]     *pTapDelay points to the array of offset times.
4478    * @param[in]     maxDelay   maximum offset time supported.
4479    * @param[in]     blockSize  number of samples that will be processed per block.
4480    * @return none
4481    */
4482 
4483   void arm_fir_sparse_init_q15(
4484 			       arm_fir_sparse_instance_q15 * S,
4485 			       uint16_t numTaps,
4486 			       q15_t * pCoeffs,
4487 			       q15_t * pState,
4488 			       int32_t * pTapDelay,
4489 			       uint16_t maxDelay,
4490 			       uint32_t blockSize);
4491 
4492   /**
4493    * @brief Processing function for the Q7 sparse FIR filter.
4494    * @param[in]  *S           points to an instance of the Q7 sparse FIR structure.
4495    * @param[in]  *pSrc        points to the block of input data.
4496    * @param[out] *pDst        points to the block of output data
4497    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
4498    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
4499    * @param[in]  blockSize    number of input samples to process per call.
4500    * @return none.
4501    */
4502 
4503   void arm_fir_sparse_q7(
4504 			 arm_fir_sparse_instance_q7 * S,
4505 			  q7_t * pSrc,
4506 			 q7_t * pDst,
4507 			 q7_t * pScratchIn,
4508 			 q31_t * pScratchOut,
4509 			 uint32_t blockSize);
4510 
4511   /**
4512    * @brief  Initialization function for the Q7 sparse FIR filter.
4513    * @param[in,out] *S         points to an instance of the Q7 sparse FIR structure.
4514    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4515    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4516    * @param[in]     *pState    points to the state buffer.
4517    * @param[in]     *pTapDelay points to the array of offset times.
4518    * @param[in]     maxDelay   maximum offset time supported.
4519    * @param[in]     blockSize  number of samples that will be processed per block.
4520    * @return none
4521    */
4522 
4523   void arm_fir_sparse_init_q7(
4524 			      arm_fir_sparse_instance_q7 * S,
4525 			      uint16_t numTaps,
4526 			      q7_t * pCoeffs,
4527 			      q7_t * pState,
4528 			      int32_t *pTapDelay,
4529 			      uint16_t maxDelay,
4530 			      uint32_t blockSize);
4531 
4532 
4533   /*
4534    * @brief  Floating-point sin_cos function.
4535    * @param[in]  theta    input value in degrees
4536    * @param[out] *pSinVal points to the processed sine output.
4537    * @param[out] *pCosVal points to the processed cos output.
4538    * @return none.
4539    */
4540 
4541   void arm_sin_cos_f32(
4542 		       float32_t theta,
4543 		       float32_t *pSinVal,
4544 		       float32_t *pCcosVal);
4545 
4546   /*
4547    * @brief  Q31 sin_cos function.
4548    * @param[in]  theta    scaled input value in degrees
4549    * @param[out] *pSinVal points to the processed sine output.
4550    * @param[out] *pCosVal points to the processed cosine output.
4551    * @return none.
4552    */
4553 
4554   void arm_sin_cos_q31(
4555 		       q31_t theta,
4556 		       q31_t *pSinVal,
4557 		       q31_t *pCosVal);
4558 
4559 
4560   /**
4561    * @brief  Floating-point complex conjugate.
4562    * @param[in]  *pSrc points to the input vector
4563    * @param[out]  *pDst points to the output vector
4564    * @param[in]  numSamples number of complex samples in each vector
4565    * @return none.
4566    */
4567 
4568   void arm_cmplx_conj_f32(
4569 			   float32_t * pSrc,
4570 			  float32_t * pDst,
4571 			  uint32_t numSamples);
4572 
4573   /**
4574    * @brief  Q31 complex conjugate.
4575    * @param[in]  *pSrc points to the input vector
4576    * @param[out]  *pDst points to the output vector
4577    * @param[in]  numSamples number of complex samples in each vector
4578    * @return none.
4579    */
4580 
4581   void arm_cmplx_conj_q31(
4582 			   q31_t * pSrc,
4583 			  q31_t * pDst,
4584 			  uint32_t numSamples);
4585 
4586   /**
4587    * @brief  Q15 complex conjugate.
4588    * @param[in]  *pSrc points to the input vector
4589    * @param[out]  *pDst points to the output vector
4590    * @param[in]  numSamples number of complex samples in each vector
4591    * @return none.
4592    */
4593 
4594   void arm_cmplx_conj_q15(
4595 			   q15_t * pSrc,
4596 			  q15_t * pDst,
4597 			  uint32_t numSamples);
4598 
4599 
4600 
4601   /**
4602    * @brief  Floating-point complex magnitude squared
4603    * @param[in]  *pSrc points to the complex input vector
4604    * @param[out]  *pDst points to the real output vector
4605    * @param[in]  numSamples number of complex samples in the input vector
4606    * @return none.
4607    */
4608 
4609   void arm_cmplx_mag_squared_f32(
4610 				  float32_t * pSrc,
4611 				 float32_t * pDst,
4612 				 uint32_t numSamples);
4613 
4614   /**
4615    * @brief  Q31 complex magnitude squared
4616    * @param[in]  *pSrc points to the complex input vector
4617    * @param[out]  *pDst points to the real output vector
4618    * @param[in]  numSamples number of complex samples in the input vector
4619    * @return none.
4620    */
4621 
4622   void arm_cmplx_mag_squared_q31(
4623 				  q31_t * pSrc,
4624 				 q31_t * pDst,
4625 				 uint32_t numSamples);
4626 
4627   /**
4628    * @brief  Q15 complex magnitude squared
4629    * @param[in]  *pSrc points to the complex input vector
4630    * @param[out]  *pDst points to the real output vector
4631    * @param[in]  numSamples number of complex samples in the input vector
4632    * @return none.
4633    */
4634 
4635   void arm_cmplx_mag_squared_q15(
4636 				  q15_t * pSrc,
4637 				 q15_t * pDst,
4638 				 uint32_t numSamples);
4639 
4640 
4641  /**
4642    * @ingroup groupController
4643    */
4644 
4645   /**
4646    * @defgroup PID PID Motor Control
4647    *
4648    * A Proportional Integral Derivative (PID) controller is a generic feedback control
4649    * loop mechanism widely used in industrial control systems.
4650    * A PID controller is the most commonly used type of feedback controller.
4651    *
4652    * This set of functions implements (PID) controllers
4653    * for Q15, Q31, and floating-point data types.  The functions operate on a single sample
4654    * of data and each call to the function returns a single processed value.
4655    * <code>S</code> points to an instance of the PID control data structure.  <code>in</code>
4656    * is the input sample value. The functions return the output value.
4657    *
4658    * \par Algorithm:
4659    * <pre>
4660    *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
4661    *    A0 = Kp + Ki + Kd
4662    *    A1 = (-Kp ) - (2 * Kd )
4663    *    A2 = Kd  </pre>
4664    *
4665    * \par
4666    * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
4667    *
4668    * \par
4669    * \image html PID.gif "Proportional Integral Derivative Controller"
4670    *
4671    * \par
4672    * The PID controller calculates an "error" value as the difference between
4673    * the measured output and the reference input.
4674    * The controller attempts to minimize the error by adjusting the process control inputs.
4675    * The proportional value determines the reaction to the current error,
4676    * the integral value determines the reaction based on the sum of recent errors,
4677    * and the derivative value determines the reaction based on the rate at which the error has been changing.
4678    *
4679    * \par Instance Structure
4680    * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
4681    * A separate instance structure must be defined for each PID Controller.
4682    * There are separate instance structure declarations for each of the 3 supported data types.
4683    *
4684    * \par Reset Functions
4685    * There is also an associated reset function for each data type which clears the state array.
4686    *
4687    * \par Initialization Functions
4688    * There is also an associated initialization function for each data type.
4689    * The initialization function performs the following operations:
4690    * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
4691    * - Zeros out the values in the state buffer.
4692    *
4693    * \par
4694    * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.
4695    *
4696    * \par Fixed-Point Behavior
4697    * Care must be taken when using the fixed-point versions of the PID Controller functions.
4698    * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
4699    * Refer to the function specific documentation below for usage guidelines.
4700    */
4701 
4702   /**
4703    * @addtogroup PID
4704    * @{
4705    */
4706 
4707   /**
4708    * @brief  Process function for the floating-point PID Control.
4709    * @param[in,out] *S is an instance of the floating-point PID Control structure
4710    * @param[in] in input sample to process
4711    * @return out processed output sample.
4712    */
4713 
4714 
arm_pid_f32(arm_pid_instance_f32 * S,float32_t in)4715   __STATIC_INLINE float32_t arm_pid_f32(
4716 					arm_pid_instance_f32 * S,
4717 					float32_t in)
4718   {
4719     float32_t out;
4720 
4721     /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
4722     out = (S->A0 * in) +
4723       (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
4724 
4725     /* Update state */
4726     S->state[1] = S->state[0];
4727     S->state[0] = in;
4728     S->state[2] = out;
4729 
4730     /* return to application */
4731     return (out);
4732 
4733   }
4734 
4735   /**
4736    * @brief  Process function for the Q31 PID Control.
4737    * @param[in,out] *S points to an instance of the Q31 PID Control structure
4738    * @param[in] in input sample to process
4739    * @return out processed output sample.
4740    *
4741    * <b>Scaling and Overflow Behavior:</b>
4742    * \par
4743    * The function is implemented using an internal 64-bit accumulator.
4744    * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
4745    * Thus, if the accumulator result overflows it wraps around rather than clip.
4746    * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
4747    * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
4748    */
4749 
arm_pid_q31(arm_pid_instance_q31 * S,q31_t in)4750   __STATIC_INLINE q31_t arm_pid_q31(
4751 				    arm_pid_instance_q31 * S,
4752 				    q31_t in)
4753   {
4754     q63_t acc;
4755 	q31_t out;
4756 
4757     /* acc = A0 * x[n]  */
4758     acc = (q63_t) S->A0 * in;
4759 
4760     /* acc += A1 * x[n-1] */
4761     acc += (q63_t) S->A1 * S->state[0];
4762 
4763     /* acc += A2 * x[n-2]  */
4764     acc += (q63_t) S->A2 * S->state[1];
4765 
4766     /* convert output to 1.31 format to add y[n-1] */
4767     out = (q31_t) (acc >> 31u);
4768 
4769     /* out += y[n-1] */
4770     out += S->state[2];
4771 
4772     /* Update state */
4773     S->state[1] = S->state[0];
4774     S->state[0] = in;
4775     S->state[2] = out;
4776 
4777     /* return to application */
4778     return (out);
4779 
4780   }
4781 
4782   /**
4783    * @brief  Process function for the Q15 PID Control.
4784    * @param[in,out] *S points to an instance of the Q15 PID Control structure
4785    * @param[in] in input sample to process
4786    * @return out processed output sample.
4787    *
4788    * <b>Scaling and Overflow Behavior:</b>
4789    * \par
4790    * The function is implemented using a 64-bit internal accumulator.
4791    * Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
4792    * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
4793    * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
4794    * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
4795    * Lastly, the accumulator is saturated to yield a result in 1.15 format.
4796    */
4797 
arm_pid_q15(arm_pid_instance_q15 * S,q15_t in)4798   __STATIC_INLINE q15_t arm_pid_q15(
4799 				    arm_pid_instance_q15 * S,
4800 				    q15_t in)
4801   {
4802     q63_t acc;
4803     q15_t out;
4804 
4805     /* Implementation of PID controller */
4806 
4807 	#ifdef ARM_MATH_CM0
4808 
4809  	/* acc = A0 * x[n]  */
4810 	acc = ((q31_t) S->A0 )* in ;
4811 
4812     #else
4813 
4814     /* acc = A0 * x[n]  */
4815     acc = (q31_t) __SMUAD(S->A0, in);
4816 
4817 	#endif
4818 
4819 	#ifdef ARM_MATH_CM0
4820 
4821 	/* acc += A1 * x[n-1] + A2 * x[n-2]  */
4822 	acc += (q31_t) S->A1  *  S->state[0] ;
4823 	acc += (q31_t) S->A2  *  S->state[1] ;
4824 
4825 	#else
4826 
4827     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
4828     acc = __SMLALD(S->A1, (q31_t)__SIMD32(S->state), acc);
4829 
4830 	#endif
4831 
4832     /* acc += y[n-1] */
4833     acc += (q31_t) S->state[2] << 15;
4834 
4835     /* saturate the output */
4836     out = (q15_t) (__SSAT((acc >> 15), 16));
4837 
4838     /* Update state */
4839     S->state[1] = S->state[0];
4840     S->state[0] = in;
4841     S->state[2] = out;
4842 
4843     /* return to application */
4844     return (out);
4845 
4846   }
4847 
4848   /**
4849    * @} end of PID group
4850    */
4851 
4852 
4853   /**
4854    * @brief Floating-point matrix inverse.
4855    * @param[in]  *src points to the instance of the input floating-point matrix structure.
4856    * @param[out] *dst points to the instance of the output floating-point matrix structure.
4857    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
4858    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
4859    */
4860 
4861   arm_status arm_mat_inverse_f32(
4862 				 const arm_matrix_instance_f32 * src,
4863 				 arm_matrix_instance_f32 * dst);
4864 
4865 
4866 
4867   /**
4868    * @ingroup groupController
4869    */
4870 
4871 
4872   /**
4873    * @defgroup clarke Vector Clarke Transform
4874    * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
4875    * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
4876    * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
4877    * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
4878    * \image html clarke.gif Stator current space vector and its components in (a,b).
4879    * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
4880    * can be calculated using only <code>Ia</code> and <code>Ib</code>.
4881    *
4882    * The function operates on a single sample of data and each call to the function returns the processed output.
4883    * The library provides separate functions for Q31 and floating-point data types.
4884    * \par Algorithm
4885    * \image html clarkeFormula.gif
4886    * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
4887    * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
4888    * \par Fixed-Point Behavior
4889    * Care must be taken when using the Q31 version of the Clarke transform.
4890    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
4891    * Refer to the function specific documentation below for usage guidelines.
4892    */
4893 
4894   /**
4895    * @addtogroup clarke
4896    * @{
4897    */
4898 
4899   /**
4900    *
4901    * @brief  Floating-point Clarke transform
4902    * @param[in]       Ia       input three-phase coordinate <code>a</code>
4903    * @param[in]       Ib       input three-phase coordinate <code>b</code>
4904    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
4905    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
4906    * @return none.
4907    */
4908 
arm_clarke_f32(float32_t Ia,float32_t Ib,float32_t * pIalpha,float32_t * pIbeta)4909   __STATIC_INLINE void arm_clarke_f32(
4910 				      float32_t Ia,
4911 				      float32_t Ib,
4912 				      float32_t * pIalpha,
4913 				      float32_t * pIbeta)
4914   {
4915     /* Calculate pIalpha using the equation, pIalpha = Ia */
4916     *pIalpha = Ia;
4917 
4918     /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
4919     *pIbeta = ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
4920 
4921   }
4922 
4923   /**
4924    * @brief  Clarke transform for Q31 version
4925    * @param[in]       Ia       input three-phase coordinate <code>a</code>
4926    * @param[in]       Ib       input three-phase coordinate <code>b</code>
4927    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
4928    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
4929    * @return none.
4930    *
4931    * <b>Scaling and Overflow Behavior:</b>
4932    * \par
4933    * The function is implemented using an internal 32-bit accumulator.
4934    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
4935    * There is saturation on the addition, hence there is no risk of overflow.
4936    */
4937 
arm_clarke_q31(q31_t Ia,q31_t Ib,q31_t * pIalpha,q31_t * pIbeta)4938   __STATIC_INLINE void arm_clarke_q31(
4939 				      q31_t Ia,
4940 				      q31_t Ib,
4941 				      q31_t * pIalpha,
4942 				      q31_t * pIbeta)
4943   {
4944     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
4945 
4946     /* Calculating pIalpha from Ia by equation pIalpha = Ia */
4947     *pIalpha = Ia;
4948 
4949     /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
4950     product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
4951 
4952     /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
4953     product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
4954 
4955     /* pIbeta is calculated by adding the intermediate products */
4956     *pIbeta = __QADD(product1, product2);
4957   }
4958 
4959   /**
4960    * @} end of clarke group
4961    */
4962 
4963   /**
4964    * @brief  Converts the elements of the Q7 vector to Q31 vector.
4965    * @param[in]  *pSrc     input pointer
4966    * @param[out]  *pDst    output pointer
4967    * @param[in]  blockSize number of samples to process
4968    * @return none.
4969    */
4970   void arm_q7_to_q31(
4971 		     q7_t * pSrc,
4972 		     q31_t * pDst,
4973 		     uint32_t blockSize);
4974 
4975 
4976 
4977 
4978   /**
4979    * @ingroup groupController
4980    */
4981 
4982   /**
4983    * @defgroup inv_clarke Vector Inverse Clarke Transform
4984    * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
4985    *
4986    * The function operates on a single sample of data and each call to the function returns the processed output.
4987    * The library provides separate functions for Q31 and floating-point data types.
4988    * \par Algorithm
4989    * \image html clarkeInvFormula.gif
4990    * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
4991    * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
4992    * \par Fixed-Point Behavior
4993    * Care must be taken when using the Q31 version of the Clarke transform.
4994    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
4995    * Refer to the function specific documentation below for usage guidelines.
4996    */
4997 
4998   /**
4999    * @addtogroup inv_clarke
5000    * @{
5001    */
5002 
5003    /**
5004    * @brief  Floating-point Inverse Clarke transform
5005    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
5006    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
5007    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
5008    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
5009    * @return none.
5010    */
5011 
5012 
arm_inv_clarke_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pIa,float32_t * pIb)5013   __STATIC_INLINE void arm_inv_clarke_f32(
5014 					  float32_t Ialpha,
5015 					  float32_t Ibeta,
5016 					  float32_t * pIa,
5017 					  float32_t * pIb)
5018   {
5019     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5020     *pIa = Ialpha;
5021 
5022     /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
5023     *pIb = -0.5 * Ialpha + (float32_t) 0.8660254039 *Ibeta;
5024 
5025   }
5026 
5027   /**
5028    * @brief  Inverse Clarke transform for Q31 version
5029    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
5030    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
5031    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
5032    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
5033    * @return none.
5034    *
5035    * <b>Scaling and Overflow Behavior:</b>
5036    * \par
5037    * The function is implemented using an internal 32-bit accumulator.
5038    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5039    * There is saturation on the subtraction, hence there is no risk of overflow.
5040    */
5041 
arm_inv_clarke_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pIa,q31_t * pIb)5042   __STATIC_INLINE void arm_inv_clarke_q31(
5043 					  q31_t Ialpha,
5044 					  q31_t Ibeta,
5045 					  q31_t * pIa,
5046 					  q31_t * pIb)
5047   {
5048     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5049 
5050     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5051     *pIa = Ialpha;
5052 
5053     /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
5054     product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
5055 
5056     /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
5057     product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
5058 
5059     /* pIb is calculated by subtracting the products */
5060     *pIb = __QSUB(product2, product1);
5061 
5062   }
5063 
5064   /**
5065    * @} end of inv_clarke group
5066    */
5067 
5068   /**
5069    * @brief  Converts the elements of the Q7 vector to Q15 vector.
5070    * @param[in]  *pSrc     input pointer
5071    * @param[out] *pDst     output pointer
5072    * @param[in]  blockSize number of samples to process
5073    * @return none.
5074    */
5075   void arm_q7_to_q15(
5076 		      q7_t * pSrc,
5077 		     q15_t * pDst,
5078 		     uint32_t blockSize);
5079 
5080 
5081 
5082   /**
5083    * @ingroup groupController
5084    */
5085 
5086   /**
5087    * @defgroup park Vector Park Transform
5088    *
5089    * Forward Park transform converts the input two-coordinate vector to flux and torque components.
5090    * The Park transform can be used to realize the transformation of the <code>Ialpha</code> and the <code>Ibeta</code> currents
5091    * from the stationary to the moving reference frame and control the spatial relationship between
5092    * the stator vector current and rotor flux vector.
5093    * If we consider the d axis aligned with the rotor flux, the diagram below shows the
5094    * current vector and the relationship from the two reference frames:
5095    * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
5096    *
5097    * The function operates on a single sample of data and each call to the function returns the processed output.
5098    * The library provides separate functions for Q31 and floating-point data types.
5099    * \par Algorithm
5100    * \image html parkFormula.gif
5101    * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
5102    * <code>pId</code> and <code>pIq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5103    * cosine and sine values of theta (rotor flux position).
5104    * \par Fixed-Point Behavior
5105    * Care must be taken when using the Q31 version of the Park transform.
5106    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5107    * Refer to the function specific documentation below for usage guidelines.
5108    */
5109 
5110   /**
5111    * @addtogroup park
5112    * @{
5113    */
5114 
5115   /**
5116    * @brief Floating-point Park transform
5117    * @param[in]       Ialpha input two-phase vector coordinate alpha
5118    * @param[in]       Ibeta  input two-phase vector coordinate beta
5119    * @param[out]      *pId   points to output	rotor reference frame d
5120    * @param[out]      *pIq   points to output	rotor reference frame q
5121    * @param[in]       sinVal sine value of rotation angle theta
5122    * @param[in]       cosVal cosine value of rotation angle theta
5123    * @return none.
5124    *
5125    * The function implements the forward Park transform.
5126    *
5127    */
5128 
arm_park_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pId,float32_t * pIq,float32_t sinVal,float32_t cosVal)5129   __STATIC_INLINE void arm_park_f32(
5130 				    float32_t Ialpha,
5131 				    float32_t Ibeta,
5132 				    float32_t * pId,
5133 				    float32_t * pIq,
5134 				    float32_t sinVal,
5135 				    float32_t cosVal)
5136   {
5137     /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
5138     *pId = Ialpha * cosVal + Ibeta * sinVal;
5139 
5140     /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
5141     *pIq = -Ialpha * sinVal + Ibeta * cosVal;
5142 
5143   }
5144 
5145   /**
5146    * @brief  Park transform for Q31 version
5147    * @param[in]       Ialpha input two-phase vector coordinate alpha
5148    * @param[in]       Ibeta  input two-phase vector coordinate beta
5149    * @param[out]      *pId   points to output rotor reference frame d
5150    * @param[out]      *pIq   points to output rotor reference frame q
5151    * @param[in]       sinVal sine value of rotation angle theta
5152    * @param[in]       cosVal cosine value of rotation angle theta
5153    * @return none.
5154    *
5155    * <b>Scaling and Overflow Behavior:</b>
5156    * \par
5157    * The function is implemented using an internal 32-bit accumulator.
5158    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5159    * There is saturation on the addition and subtraction, hence there is no risk of overflow.
5160    */
5161 
5162 
arm_park_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pId,q31_t * pIq,q31_t sinVal,q31_t cosVal)5163   __STATIC_INLINE void arm_park_q31(
5164 				    q31_t Ialpha,
5165 				    q31_t Ibeta,
5166 				    q31_t * pId,
5167 				    q31_t * pIq,
5168 				    q31_t sinVal,
5169 				    q31_t cosVal)
5170   {
5171     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5172     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5173 
5174     /* Intermediate product is calculated by (Ialpha * cosVal) */
5175     product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
5176 
5177     /* Intermediate product is calculated by (Ibeta * sinVal) */
5178     product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
5179 
5180 
5181     /* Intermediate product is calculated by (Ialpha * sinVal) */
5182     product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
5183 
5184     /* Intermediate product is calculated by (Ibeta * cosVal) */
5185     product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
5186 
5187     /* Calculate pId by adding the two intermediate products 1 and 2 */
5188     *pId = __QADD(product1, product2);
5189 
5190     /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
5191     *pIq = __QSUB(product4, product3);
5192   }
5193 
5194   /**
5195    * @} end of park group
5196    */
5197 
5198   /**
5199    * @brief  Converts the elements of the Q7 vector to floating-point vector.
5200    * @param[in]  *pSrc is input pointer
5201    * @param[out]  *pDst is output pointer
5202    * @param[in]  blockSize is the number of samples to process
5203    * @return none.
5204    */
5205   void arm_q7_to_float(
5206 		        q7_t * pSrc,
5207 		       float32_t * pDst,
5208 		       uint32_t blockSize);
5209 
5210 
5211   /**
5212    * @ingroup groupController
5213    */
5214 
5215   /**
5216    * @defgroup inv_park Vector Inverse Park transform
5217    * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
5218    *
5219    * The function operates on a single sample of data and each call to the function returns the processed output.
5220    * The library provides separate functions for Q31 and floating-point data types.
5221    * \par Algorithm
5222    * \image html parkInvFormula.gif
5223    * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
5224    * <code>Id</code> and <code>Iq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5225    * cosine and sine values of theta (rotor flux position).
5226    * \par Fixed-Point Behavior
5227    * Care must be taken when using the Q31 version of the Park transform.
5228    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5229    * Refer to the function specific documentation below for usage guidelines.
5230    */
5231 
5232   /**
5233    * @addtogroup inv_park
5234    * @{
5235    */
5236 
5237    /**
5238    * @brief  Floating-point Inverse Park transform
5239    * @param[in]       Id        input coordinate of rotor reference frame d
5240    * @param[in]       Iq        input coordinate of rotor reference frame q
5241    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
5242    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
5243    * @param[in]       sinVal    sine value of rotation angle theta
5244    * @param[in]       cosVal    cosine value of rotation angle theta
5245    * @return none.
5246    */
5247 
arm_inv_park_f32(float32_t Id,float32_t Iq,float32_t * pIalpha,float32_t * pIbeta,float32_t sinVal,float32_t cosVal)5248   __STATIC_INLINE void arm_inv_park_f32(
5249 					float32_t Id,
5250 					float32_t Iq,
5251 					float32_t * pIalpha,
5252 					float32_t * pIbeta,
5253 					float32_t sinVal,
5254 					float32_t cosVal)
5255   {
5256     /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
5257     *pIalpha = Id * cosVal - Iq * sinVal;
5258 
5259     /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
5260     *pIbeta = Id * sinVal + Iq * cosVal;
5261 
5262   }
5263 
5264 
5265   /**
5266    * @brief  Inverse Park transform for	Q31 version
5267    * @param[in]       Id        input coordinate of rotor reference frame d
5268    * @param[in]       Iq        input coordinate of rotor reference frame q
5269    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
5270    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
5271    * @param[in]       sinVal    sine value of rotation angle theta
5272    * @param[in]       cosVal    cosine value of rotation angle theta
5273    * @return none.
5274    *
5275    * <b>Scaling and Overflow Behavior:</b>
5276    * \par
5277    * The function is implemented using an internal 32-bit accumulator.
5278    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5279    * There is saturation on the addition, hence there is no risk of overflow.
5280    */
5281 
5282 
arm_inv_park_q31(q31_t Id,q31_t Iq,q31_t * pIalpha,q31_t * pIbeta,q31_t sinVal,q31_t cosVal)5283   __STATIC_INLINE void arm_inv_park_q31(
5284 					q31_t Id,
5285 					q31_t Iq,
5286 					q31_t * pIalpha,
5287 					q31_t * pIbeta,
5288 					q31_t sinVal,
5289 					q31_t cosVal)
5290   {
5291     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5292     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5293 
5294     /* Intermediate product is calculated by (Id * cosVal) */
5295     product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
5296 
5297     /* Intermediate product is calculated by (Iq * sinVal) */
5298     product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
5299 
5300 
5301     /* Intermediate product is calculated by (Id * sinVal) */
5302     product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
5303 
5304     /* Intermediate product is calculated by (Iq * cosVal) */
5305     product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
5306 
5307     /* Calculate pIalpha by using the two intermediate products 1 and 2 */
5308     *pIalpha = __QSUB(product1, product2);
5309 
5310     /* Calculate pIbeta by using the two intermediate products 3 and 4 */
5311     *pIbeta = __QADD(product4, product3);
5312 
5313   }
5314 
5315   /**
5316    * @} end of Inverse park group
5317    */
5318 
5319 
5320   /**
5321    * @brief  Converts the elements of the Q31 vector to floating-point vector.
5322    * @param[in]  *pSrc is input pointer
5323    * @param[out]  *pDst is output pointer
5324    * @param[in]  blockSize is the number of samples to process
5325    * @return none.
5326    */
5327   void arm_q31_to_float(
5328 			 q31_t * pSrc,
5329 			float32_t * pDst,
5330 			uint32_t blockSize);
5331 
5332   /**
5333    * @ingroup groupInterpolation
5334    */
5335 
5336   /**
5337    * @defgroup LinearInterpolate Linear Interpolation
5338    *
5339    * Linear interpolation is a method of curve fitting using linear polynomials.
5340    * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line
5341    *
5342    * \par
5343    * \image html LinearInterp.gif "Linear interpolation"
5344    *
5345    * \par
5346    * A  Linear Interpolate function calculates an output value(y), for the input(x)
5347    * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values)
5348    *
5349    * \par Algorithm:
5350    * <pre>
5351    *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
5352    *       where x0, x1 are nearest values of input x
5353    *             y0, y1 are nearest values to output y
5354    * </pre>
5355    *
5356    * \par
5357    * This set of functions implements Linear interpolation process
5358    * for Q7, Q15, Q31, and floating-point data types.  The functions operate on a single
5359    * sample of data and each call to the function returns a single processed value.
5360    * <code>S</code> points to an instance of the Linear Interpolate function data structure.
5361    * <code>x</code> is the input sample value. The functions returns the output value.
5362    *
5363    * \par
5364    * if x is outside of the table boundary, Linear interpolation returns first value of the table
5365    * if x is below input range and returns last value of table if x is above range.
5366    */
5367 
5368   /**
5369    * @addtogroup LinearInterpolate
5370    * @{
5371    */
5372 
5373   /**
5374    * @brief  Process function for the floating-point Linear Interpolation Function.
5375    * @param[in,out] *S is an instance of the floating-point Linear Interpolation structure
5376    * @param[in] x input sample to process
5377    * @return y processed output sample.
5378    *
5379    */
5380 
arm_linear_interp_f32(arm_linear_interp_instance_f32 * S,float32_t x)5381   __STATIC_INLINE float32_t arm_linear_interp_f32(
5382 						  arm_linear_interp_instance_f32 * S,
5383 						  float32_t x)
5384   {
5385 
5386 	  float32_t y;
5387 	  float32_t x0, x1;						/* Nearest input values */
5388 	  float32_t y0, y1;	  					/* Nearest output values */
5389 	  float32_t xSpacing = S->xSpacing;		/* spacing between input values */
5390 	  int32_t i;  							/* Index variable */
5391 	  float32_t *pYData = S->pYData;	    /* pointer to output table */
5392 
5393 	  /* Calculation of index */
5394 	  i =   (x - S->x1) / xSpacing;
5395 
5396 	  if(i < 0)
5397 	  {
5398 	     /* Iniatilize output for below specified range as least output value of table */
5399 		 y = pYData[0];
5400 	  }
5401 	  else if(i >= S->nValues)
5402 	  {
5403 	  	  /* Iniatilize output for above specified range as last output value of table */
5404 	  	  y = pYData[S->nValues-1];
5405 	  }
5406 	  else
5407 	  {
5408 	  	  /* Calculation of nearest input values */
5409 		  x0 = S->x1 + i * xSpacing;
5410 		  x1 = S->x1 + (i +1) * xSpacing;
5411 
5412 		 /* Read of nearest output values */
5413 		  y0 = pYData[i];
5414 		  y1 = pYData[i + 1];
5415 
5416 		  /* Calculation of output */
5417 		  y = y0 + (x - x0) * ((y1 - y0)/(x1-x0));
5418 
5419 	  }
5420 
5421       /* returns output value */
5422 	  return (y);
5423   }
5424 
5425    /**
5426    *
5427    * @brief  Process function for the Q31 Linear Interpolation Function.
5428    * @param[in] *pYData  pointer to Q31 Linear Interpolation table
5429    * @param[in] x input sample to process
5430    * @param[in] nValues number of table values
5431    * @return y processed output sample.
5432    *
5433    * \par
5434    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5435    * This function can support maximum of table size 2^12.
5436    *
5437    */
5438 
5439 
arm_linear_interp_q31(q31_t * pYData,q31_t x,uint32_t nValues)5440   __STATIC_INLINE q31_t arm_linear_interp_q31(q31_t *pYData,
5441 					      q31_t x, uint32_t nValues)
5442   {
5443     q31_t y;                                   /* output */
5444     q31_t y0, y1;                                /* Nearest output values */
5445     q31_t fract;                                 /* fractional part */
5446     int32_t index;                              /* Index to read nearest output values */
5447 
5448     /* Input is in 12.20 format */
5449     /* 12 bits for the table index */
5450     /* Index value calculation */
5451     index = ((x & 0xFFF00000) >> 20);
5452 
5453 	if(index >= (nValues - 1))
5454 	{
5455 		return(pYData[nValues - 1]);
5456 	}
5457 	else if(index < 0)
5458 	{
5459 		return(pYData[0]);
5460 	}
5461 	else
5462 	{
5463 
5464 	    /* 20 bits for the fractional part */
5465 	    /* shift left by 11 to keep fract in 1.31 format */
5466 	    fract = (x & 0x000FFFFF) << 11;
5467 
5468 	    /* Read two nearest output values from the index in 1.31(q31) format */
5469 	    y0 = pYData[index];
5470 	    y1 = pYData[index + 1u];
5471 
5472 	    /* Calculation of y0 * (1-fract) and y is in 2.30 format */
5473 	    y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32));
5474 
5475 	    /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
5476 	    y += ((q31_t) (((q63_t) y1 * fract) >> 32));
5477 
5478 	    /* Convert y to 1.31 format */
5479 	    return (y << 1u);
5480 
5481 	}
5482 
5483   }
5484 
5485   /**
5486    *
5487    * @brief  Process function for the Q15 Linear Interpolation Function.
5488    * @param[in] *pYData  pointer to Q15 Linear Interpolation table
5489    * @param[in] x input sample to process
5490    * @param[in] nValues number of table values
5491    * @return y processed output sample.
5492    *
5493    * \par
5494    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5495    * This function can support maximum of table size 2^12.
5496    *
5497    */
5498 
5499 
arm_linear_interp_q15(q15_t * pYData,q31_t x,uint32_t nValues)5500   __STATIC_INLINE q15_t arm_linear_interp_q15(q15_t *pYData, q31_t x, uint32_t nValues)
5501   {
5502     q63_t y;                                   /* output */
5503     q15_t y0, y1;                              /* Nearest output values */
5504     q31_t fract;                               /* fractional part */
5505     int32_t index;                            /* Index to read nearest output values */
5506 
5507     /* Input is in 12.20 format */
5508     /* 12 bits for the table index */
5509     /* Index value calculation */
5510     index = ((x & 0xFFF00000) >> 20u);
5511 
5512 	if(index >= (nValues - 1))
5513 	{
5514 		return(pYData[nValues - 1]);
5515 	}
5516 	else if(index < 0)
5517 	{
5518 		return(pYData[0]);
5519 	}
5520 	else
5521 	{
5522 	    /* 20 bits for the fractional part */
5523 	    /* fract is in 12.20 format */
5524 	    fract = (x & 0x000FFFFF);
5525 
5526 	    /* Read two nearest output values from the index */
5527 	    y0 = pYData[index];
5528 	    y1 = pYData[index + 1u];
5529 
5530 	    /* Calculation of y0 * (1-fract) and y is in 13.35 format */
5531 	    y = ((q63_t) y0 * (0xFFFFF - fract));
5532 
5533 	    /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
5534 	    y += ((q63_t) y1 * (fract));
5535 
5536 	    /* convert y to 1.15 format */
5537 	    return (y >> 20);
5538 	}
5539 
5540 
5541   }
5542 
5543   /**
5544    *
5545    * @brief  Process function for the Q7 Linear Interpolation Function.
5546    * @param[in] *pYData  pointer to Q7 Linear Interpolation table
5547    * @param[in] x input sample to process
5548    * @param[in] nValues number of table values
5549    * @return y processed output sample.
5550    *
5551    * \par
5552    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5553    * This function can support maximum of table size 2^12.
5554    */
5555 
5556 
arm_linear_interp_q7(q7_t * pYData,q31_t x,uint32_t nValues)5557   __STATIC_INLINE q7_t arm_linear_interp_q7(q7_t *pYData, q31_t x,  uint32_t nValues)
5558   {
5559     q31_t y;                                   /* output */
5560     q7_t y0, y1;                                 /* Nearest output values */
5561     q31_t fract;                                 /* fractional part */
5562     int32_t index;                              /* Index to read nearest output values */
5563 
5564     /* Input is in 12.20 format */
5565     /* 12 bits for the table index */
5566     /* Index value calculation */
5567     index = ((x & 0xFFF00000) >> 20u);
5568 
5569 
5570     if(index >= (nValues - 1))
5571 	{
5572 		return(pYData[nValues - 1]);
5573 	}
5574 	else if(index < 0)
5575 	{
5576 		return(pYData[0]);
5577 	}
5578 	else
5579 	{
5580 
5581 	    /* 20 bits for the fractional part */
5582 	    /* fract is in 12.20 format */
5583 	    fract = (x & 0x000FFFFF);
5584 
5585 	    /* Read two nearest output values from the index and are in 1.7(q7) format */
5586 	    y0 = pYData[index];
5587 	    y1 = pYData[index + 1u];
5588 
5589 	    /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */
5590 	    y = ((y0 * (0xFFFFF - fract)));
5591 
5592 	    /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
5593 	    y += (y1 * fract);
5594 
5595 	    /* convert y to 1.7(q7) format */
5596 	    return (y >> 20u);
5597 
5598 	}
5599 
5600   }
5601   /**
5602    * @} end of LinearInterpolate group
5603    */
5604 
5605   /**
5606    * @brief  Fast approximation to the trigonometric sine function for floating-point data.
5607    * @param[in] x input value in radians.
5608    * @return  sin(x).
5609    */
5610 
5611   float32_t arm_sin_f32(
5612 			 float32_t x);
5613 
5614   /**
5615    * @brief  Fast approximation to the trigonometric sine function for Q31 data.
5616    * @param[in] x Scaled input value in radians.
5617    * @return  sin(x).
5618    */
5619 
5620   q31_t arm_sin_q31(
5621 		     q31_t x);
5622 
5623   /**
5624    * @brief  Fast approximation to the trigonometric sine function for Q15 data.
5625    * @param[in] x Scaled input value in radians.
5626    * @return  sin(x).
5627    */
5628 
5629   q15_t arm_sin_q15(
5630 		     q15_t x);
5631 
5632   /**
5633    * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
5634    * @param[in] x input value in radians.
5635    * @return  cos(x).
5636    */
5637 
5638   float32_t arm_cos_f32(
5639 			 float32_t x);
5640 
5641   /**
5642    * @brief Fast approximation to the trigonometric cosine function for Q31 data.
5643    * @param[in] x Scaled input value in radians.
5644    * @return  cos(x).
5645    */
5646 
5647   q31_t arm_cos_q31(
5648 		     q31_t x);
5649 
5650   /**
5651    * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
5652    * @param[in] x Scaled input value in radians.
5653    * @return  cos(x).
5654    */
5655 
5656   q15_t arm_cos_q15(
5657 		     q15_t x);
5658 
5659 
5660   /**
5661    * @ingroup groupFastMath
5662    */
5663 
5664 
5665   /**
5666    * @defgroup SQRT Square Root
5667    *
5668    * Computes the square root of a number.
5669    * There are separate functions for Q15, Q31, and floating-point data types.
5670    * The square root function is computed using the Newton-Raphson algorithm.
5671    * This is an iterative algorithm of the form:
5672    * <pre>
5673    *      x1 = x0 - f(x0)/f'(x0)
5674    * </pre>
5675    * where <code>x1</code> is the current estimate,
5676    * <code>x0</code> is the previous estimate and
5677    * <code>f'(x0)</code> is the derivative of <code>f()</code> evaluated at <code>x0</code>.
5678    * For the square root function, the algorithm reduces to:
5679    * <pre>
5680    *     x0 = in/2                         [initial guess]
5681    *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
5682    * </pre>
5683    */
5684 
5685 
5686   /**
5687    * @addtogroup SQRT
5688    * @{
5689    */
5690 
5691   /**
5692    * @brief  Floating-point square root function.
5693    * @param[in]  in     input value.
5694    * @param[out] *pOut  square root of input value.
5695    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5696    * <code>in</code> is negative value and returns zero output for negative values.
5697    */
5698 
arm_sqrt_f32(float32_t in,float32_t * pOut)5699   __STATIC_INLINE arm_status  arm_sqrt_f32(
5700                       float32_t in, float32_t *pOut)
5701   {
5702     if(in > 0)
5703     {
5704 
5705 //    #if __FPU_USED
5706     #if (__FPU_USED == 1) && defined ( __CC_ARM   )
5707         *pOut = __sqrtf(in);
5708     #elif (__FPU_USED == 1) && defined ( __TMS_740 )
5709         *pOut = __builtin_sqrtf(in);
5710     #else
5711         *pOut = sqrtf(in);
5712     #endif
5713 
5714         return (ARM_MATH_SUCCESS);
5715     }
5716     else
5717     {
5718         *pOut = 0.0f;
5719         return (ARM_MATH_ARGUMENT_ERROR);
5720     }
5721 
5722   }
5723 
5724 
5725   /**
5726    * @brief Q31 square root function.
5727    * @param[in]   in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
5728    * @param[out]  *pOut square root of input value.
5729    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5730    * <code>in</code> is negative value and returns zero output for negative values.
5731    */
5732   arm_status arm_sqrt_q31(
5733 		      q31_t in, q31_t *pOut);
5734 
5735   /**
5736    * @brief  Q15 square root function.
5737    * @param[in]   in     input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
5738    * @param[out]  *pOut  square root of input value.
5739    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5740    * <code>in</code> is negative value and returns zero output for negative values.
5741    */
5742   arm_status arm_sqrt_q15(
5743 		      q15_t in, q15_t *pOut);
5744 
5745   /**
5746    * @} end of SQRT group
5747    */
5748 
5749 
5750 
5751 
5752 
5753 
5754   /**
5755    * @brief floating-point Circular write function.
5756    */
5757 
arm_circularWrite_f32(int32_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const int32_t * src,int32_t srcInc,uint32_t blockSize)5758   __STATIC_INLINE void arm_circularWrite_f32(
5759 					     int32_t * circBuffer,
5760 					     int32_t L,
5761 					     uint16_t * writeOffset,
5762 					     int32_t bufferInc,
5763 					     const int32_t * src,
5764 					     int32_t srcInc,
5765 					     uint32_t blockSize)
5766   {
5767     uint32_t i = 0u;
5768     int32_t wOffset;
5769 
5770     /* Copy the value of Index pointer that points
5771      * to the current location where the input samples to be copied */
5772     wOffset = *writeOffset;
5773 
5774     /* Loop over the blockSize */
5775     i = blockSize;
5776 
5777     while(i > 0u)
5778       {
5779 	/* copy the input sample to the circular buffer */
5780 	circBuffer[wOffset] = *src;
5781 
5782 	/* Update the input pointer */
5783 	src += srcInc;
5784 
5785 	/* Circularly update wOffset.  Watch out for positive and negative value */
5786 	wOffset += bufferInc;
5787 	if(wOffset >= L)
5788 	  wOffset -= L;
5789 
5790 	/* Decrement the loop counter */
5791 	i--;
5792       }
5793 
5794     /* Update the index pointer */
5795     *writeOffset = wOffset;
5796   }
5797 
5798 
5799 
5800   /**
5801    * @brief floating-point Circular Read function.
5802    */
arm_circularRead_f32(int32_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,int32_t * dst,int32_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)5803   __STATIC_INLINE void arm_circularRead_f32(
5804 					    int32_t * circBuffer,
5805 					    int32_t L,
5806 					    int32_t * readOffset,
5807 					    int32_t bufferInc,
5808 					    int32_t * dst,
5809 					    int32_t * dst_base,
5810 					    int32_t dst_length,
5811 					    int32_t dstInc,
5812 					    uint32_t blockSize)
5813   {
5814     uint32_t i = 0u;
5815     int32_t rOffset, dst_end;
5816 
5817     /* Copy the value of Index pointer that points
5818      * to the current location from where the input samples to be read */
5819     rOffset = *readOffset;
5820     dst_end = (int32_t) (dst_base + dst_length);
5821 
5822     /* Loop over the blockSize */
5823     i = blockSize;
5824 
5825     while(i > 0u)
5826       {
5827 	/* copy the sample from the circular buffer to the destination buffer */
5828 	*dst = circBuffer[rOffset];
5829 
5830 	/* Update the input pointer */
5831 	dst += dstInc;
5832 
5833 	if(dst == (int32_t *) dst_end)
5834 	  {
5835 	    dst = dst_base;
5836 	  }
5837 
5838 	/* Circularly update rOffset.  Watch out for positive and negative value  */
5839 	rOffset += bufferInc;
5840 
5841 	if(rOffset >= L)
5842 	  {
5843 	    rOffset -= L;
5844 	  }
5845 
5846 	/* Decrement the loop counter */
5847 	i--;
5848       }
5849 
5850     /* Update the index pointer */
5851     *readOffset = rOffset;
5852   }
5853 
5854   /**
5855    * @brief Q15 Circular write function.
5856    */
5857 
arm_circularWrite_q15(q15_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q15_t * src,int32_t srcInc,uint32_t blockSize)5858   __STATIC_INLINE void arm_circularWrite_q15(
5859 					     q15_t * circBuffer,
5860 					     int32_t L,
5861 					     uint16_t * writeOffset,
5862 					     int32_t bufferInc,
5863 					     const q15_t * src,
5864 					     int32_t srcInc,
5865 					     uint32_t blockSize)
5866   {
5867     uint32_t i = 0u;
5868     int32_t wOffset;
5869 
5870     /* Copy the value of Index pointer that points
5871      * to the current location where the input samples to be copied */
5872     wOffset = *writeOffset;
5873 
5874     /* Loop over the blockSize */
5875     i = blockSize;
5876 
5877     while(i > 0u)
5878       {
5879 	/* copy the input sample to the circular buffer */
5880 	circBuffer[wOffset] = *src;
5881 
5882 	/* Update the input pointer */
5883 	src += srcInc;
5884 
5885 	/* Circularly update wOffset.  Watch out for positive and negative value */
5886 	wOffset += bufferInc;
5887 	if(wOffset >= L)
5888 	  wOffset -= L;
5889 
5890 	/* Decrement the loop counter */
5891 	i--;
5892       }
5893 
5894     /* Update the index pointer */
5895     *writeOffset = wOffset;
5896   }
5897 
5898 
5899 
5900   /**
5901    * @brief Q15 Circular Read function.
5902    */
arm_circularRead_q15(q15_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q15_t * dst,q15_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)5903   __STATIC_INLINE void arm_circularRead_q15(
5904 					    q15_t * circBuffer,
5905 					    int32_t L,
5906 					    int32_t * readOffset,
5907 					    int32_t bufferInc,
5908 					    q15_t * dst,
5909 					    q15_t * dst_base,
5910 					    int32_t dst_length,
5911 					    int32_t dstInc,
5912 					    uint32_t blockSize)
5913   {
5914     uint32_t i = 0;
5915     int32_t rOffset, dst_end;
5916 
5917     /* Copy the value of Index pointer that points
5918      * to the current location from where the input samples to be read */
5919     rOffset = *readOffset;
5920 
5921     dst_end = (int32_t) (dst_base + dst_length);
5922 
5923     /* Loop over the blockSize */
5924     i = blockSize;
5925 
5926     while(i > 0u)
5927       {
5928 	/* copy the sample from the circular buffer to the destination buffer */
5929 	*dst = circBuffer[rOffset];
5930 
5931 	/* Update the input pointer */
5932 	dst += dstInc;
5933 
5934 	if(dst == (q15_t *) dst_end)
5935 	  {
5936 	    dst = dst_base;
5937 	  }
5938 
5939 	/* Circularly update wOffset.  Watch out for positive and negative value */
5940 	rOffset += bufferInc;
5941 
5942 	if(rOffset >= L)
5943 	  {
5944 	    rOffset -= L;
5945 	  }
5946 
5947 	/* Decrement the loop counter */
5948 	i--;
5949       }
5950 
5951     /* Update the index pointer */
5952     *readOffset = rOffset;
5953   }
5954 
5955 
5956   /**
5957    * @brief Q7 Circular write function.
5958    */
5959 
arm_circularWrite_q7(q7_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q7_t * src,int32_t srcInc,uint32_t blockSize)5960   __STATIC_INLINE void arm_circularWrite_q7(
5961 					    q7_t * circBuffer,
5962 					    int32_t L,
5963 					    uint16_t * writeOffset,
5964 					    int32_t bufferInc,
5965 					    const q7_t * src,
5966 					    int32_t srcInc,
5967 					    uint32_t blockSize)
5968   {
5969     uint32_t i = 0u;
5970     int32_t wOffset;
5971 
5972     /* Copy the value of Index pointer that points
5973      * to the current location where the input samples to be copied */
5974     wOffset = *writeOffset;
5975 
5976     /* Loop over the blockSize */
5977     i = blockSize;
5978 
5979     while(i > 0u)
5980       {
5981 	/* copy the input sample to the circular buffer */
5982 	circBuffer[wOffset] = *src;
5983 
5984 	/* Update the input pointer */
5985 	src += srcInc;
5986 
5987 	/* Circularly update wOffset.  Watch out for positive and negative value */
5988 	wOffset += bufferInc;
5989 	if(wOffset >= L)
5990 	  wOffset -= L;
5991 
5992 	/* Decrement the loop counter */
5993 	i--;
5994       }
5995 
5996     /* Update the index pointer */
5997     *writeOffset = wOffset;
5998   }
5999 
6000 
6001 
6002   /**
6003    * @brief Q7 Circular Read function.
6004    */
arm_circularRead_q7(q7_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q7_t * dst,q7_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)6005   __STATIC_INLINE void arm_circularRead_q7(
6006 					   q7_t * circBuffer,
6007 					   int32_t L,
6008 					   int32_t * readOffset,
6009 					   int32_t bufferInc,
6010 					   q7_t * dst,
6011 					   q7_t * dst_base,
6012 					   int32_t dst_length,
6013 					   int32_t dstInc,
6014 					   uint32_t blockSize)
6015   {
6016     uint32_t i = 0;
6017     int32_t rOffset, dst_end;
6018 
6019     /* Copy the value of Index pointer that points
6020      * to the current location from where the input samples to be read */
6021     rOffset = *readOffset;
6022 
6023     dst_end = (int32_t) (dst_base + dst_length);
6024 
6025     /* Loop over the blockSize */
6026     i = blockSize;
6027 
6028     while(i > 0u)
6029       {
6030 	/* copy the sample from the circular buffer to the destination buffer */
6031 	*dst = circBuffer[rOffset];
6032 
6033 	/* Update the input pointer */
6034 	dst += dstInc;
6035 
6036 	if(dst == (q7_t *) dst_end)
6037 	  {
6038 	    dst = dst_base;
6039 	  }
6040 
6041 	/* Circularly update rOffset.  Watch out for positive and negative value */
6042 	rOffset += bufferInc;
6043 
6044 	if(rOffset >= L)
6045 	  {
6046 	    rOffset -= L;
6047 	  }
6048 
6049 	/* Decrement the loop counter */
6050 	i--;
6051       }
6052 
6053     /* Update the index pointer */
6054     *readOffset = rOffset;
6055   }
6056 
6057 
6058   /**
6059    * @brief  Sum of the squares of the elements of a Q31 vector.
6060    * @param[in]  *pSrc is input pointer
6061    * @param[in]  blockSize is the number of samples to process
6062    * @param[out]  *pResult is output value.
6063    * @return none.
6064    */
6065 
6066   void arm_power_q31(
6067 		      q31_t * pSrc,
6068 		     uint32_t blockSize,
6069 		     q63_t * pResult);
6070 
6071   /**
6072    * @brief  Sum of the squares of the elements of a floating-point vector.
6073    * @param[in]  *pSrc is input pointer
6074    * @param[in]  blockSize is the number of samples to process
6075    * @param[out]  *pResult is output value.
6076    * @return none.
6077    */
6078 
6079   void arm_power_f32(
6080 		      float32_t * pSrc,
6081 		     uint32_t blockSize,
6082 		     float32_t * pResult);
6083 
6084   /**
6085    * @brief  Sum of the squares of the elements of a Q15 vector.
6086    * @param[in]  *pSrc is input pointer
6087    * @param[in]  blockSize is the number of samples to process
6088    * @param[out]  *pResult is output value.
6089    * @return none.
6090    */
6091 
6092   void arm_power_q15(
6093 		      q15_t * pSrc,
6094 		     uint32_t blockSize,
6095 		     q63_t * pResult);
6096 
6097   /**
6098    * @brief  Sum of the squares of the elements of a Q7 vector.
6099    * @param[in]  *pSrc is input pointer
6100    * @param[in]  blockSize is the number of samples to process
6101    * @param[out]  *pResult is output value.
6102    * @return none.
6103    */
6104 
6105   void arm_power_q7(
6106 		     q7_t * pSrc,
6107 		    uint32_t blockSize,
6108 		    q31_t * pResult);
6109 
6110   /**
6111    * @brief  Mean value of a Q7 vector.
6112    * @param[in]  *pSrc is input pointer
6113    * @param[in]  blockSize is the number of samples to process
6114    * @param[out]  *pResult is output value.
6115    * @return none.
6116    */
6117 
6118   void arm_mean_q7(
6119 		    q7_t * pSrc,
6120 		   uint32_t blockSize,
6121 		   q7_t * pResult);
6122 
6123   /**
6124    * @brief  Mean value of a Q15 vector.
6125    * @param[in]  *pSrc is input pointer
6126    * @param[in]  blockSize is the number of samples to process
6127    * @param[out]  *pResult is output value.
6128    * @return none.
6129    */
6130   void arm_mean_q15(
6131 		     q15_t * pSrc,
6132 		    uint32_t blockSize,
6133 		    q15_t * pResult);
6134 
6135   /**
6136    * @brief  Mean value of a Q31 vector.
6137    * @param[in]  *pSrc is input pointer
6138    * @param[in]  blockSize is the number of samples to process
6139    * @param[out]  *pResult is output value.
6140    * @return none.
6141    */
6142   void arm_mean_q31(
6143 		     q31_t * pSrc,
6144 		    uint32_t blockSize,
6145 		    q31_t * pResult);
6146 
6147   /**
6148    * @brief  Mean value of a floating-point vector.
6149    * @param[in]  *pSrc is input pointer
6150    * @param[in]  blockSize is the number of samples to process
6151    * @param[out]  *pResult is output value.
6152    * @return none.
6153    */
6154   void arm_mean_f32(
6155 		     float32_t * pSrc,
6156 		    uint32_t blockSize,
6157 		    float32_t * pResult);
6158 
6159   /**
6160    * @brief  Variance of the elements of a floating-point vector.
6161    * @param[in]  *pSrc is input pointer
6162    * @param[in]  blockSize is the number of samples to process
6163    * @param[out]  *pResult is output value.
6164    * @return none.
6165    */
6166 
6167   void arm_var_f32(
6168 		    float32_t * pSrc,
6169 		   uint32_t blockSize,
6170 		   float32_t * pResult);
6171 
6172   /**
6173    * @brief  Variance of the elements of a Q31 vector.
6174    * @param[in]  *pSrc is input pointer
6175    * @param[in]  blockSize is the number of samples to process
6176    * @param[out]  *pResult is output value.
6177    * @return none.
6178    */
6179 
6180   void arm_var_q31(
6181 		    q31_t * pSrc,
6182 		   uint32_t blockSize,
6183 		   q63_t * pResult);
6184 
6185   /**
6186    * @brief  Variance of the elements of a Q15 vector.
6187    * @param[in]  *pSrc is input pointer
6188    * @param[in]  blockSize is the number of samples to process
6189    * @param[out]  *pResult is output value.
6190    * @return none.
6191    */
6192 
6193   void arm_var_q15(
6194 		    q15_t * pSrc,
6195 		   uint32_t blockSize,
6196 		   q31_t * pResult);
6197 
6198   /**
6199    * @brief  Root Mean Square of the elements of a floating-point vector.
6200    * @param[in]  *pSrc is input pointer
6201    * @param[in]  blockSize is the number of samples to process
6202    * @param[out]  *pResult is output value.
6203    * @return none.
6204    */
6205 
6206   void arm_rms_f32(
6207 		    float32_t * pSrc,
6208 		   uint32_t blockSize,
6209 		   float32_t * pResult);
6210 
6211   /**
6212    * @brief  Root Mean Square of the elements of a Q31 vector.
6213    * @param[in]  *pSrc is input pointer
6214    * @param[in]  blockSize is the number of samples to process
6215    * @param[out]  *pResult is output value.
6216    * @return none.
6217    */
6218 
6219   void arm_rms_q31(
6220 		    q31_t * pSrc,
6221 		   uint32_t blockSize,
6222 		   q31_t * pResult);
6223 
6224   /**
6225    * @brief  Root Mean Square of the elements of a Q15 vector.
6226    * @param[in]  *pSrc is input pointer
6227    * @param[in]  blockSize is the number of samples to process
6228    * @param[out]  *pResult is output value.
6229    * @return none.
6230    */
6231 
6232   void arm_rms_q15(
6233 		    q15_t * pSrc,
6234 		   uint32_t blockSize,
6235 		   q15_t * pResult);
6236 
6237   /**
6238    * @brief  Standard deviation of the elements of a floating-point vector.
6239    * @param[in]  *pSrc is input pointer
6240    * @param[in]  blockSize is the number of samples to process
6241    * @param[out]  *pResult is output value.
6242    * @return none.
6243    */
6244 
6245   void arm_std_f32(
6246 		    float32_t * pSrc,
6247 		   uint32_t blockSize,
6248 		   float32_t * pResult);
6249 
6250   /**
6251    * @brief  Standard deviation of the elements of a Q31 vector.
6252    * @param[in]  *pSrc is input pointer
6253    * @param[in]  blockSize is the number of samples to process
6254    * @param[out]  *pResult is output value.
6255    * @return none.
6256    */
6257 
6258   void arm_std_q31(
6259 		    q31_t * pSrc,
6260 		   uint32_t blockSize,
6261 		   q31_t * pResult);
6262 
6263   /**
6264    * @brief  Standard deviation of the elements of a Q15 vector.
6265    * @param[in]  *pSrc is input pointer
6266    * @param[in]  blockSize is the number of samples to process
6267    * @param[out]  *pResult is output value.
6268    * @return none.
6269    */
6270 
6271   void arm_std_q15(
6272 		    q15_t * pSrc,
6273 		   uint32_t blockSize,
6274 		   q15_t * pResult);
6275 
6276   /**
6277    * @brief  Floating-point complex magnitude
6278    * @param[in]  *pSrc points to the complex input vector
6279    * @param[out]  *pDst points to the real output vector
6280    * @param[in]  numSamples number of complex samples in the input vector
6281    * @return none.
6282    */
6283 
6284   void arm_cmplx_mag_f32(
6285 			  float32_t * pSrc,
6286 			 float32_t * pDst,
6287 			 uint32_t numSamples);
6288 
6289   /**
6290    * @brief  Q31 complex magnitude
6291    * @param[in]  *pSrc points to the complex input vector
6292    * @param[out]  *pDst points to the real output vector
6293    * @param[in]  numSamples number of complex samples in the input vector
6294    * @return none.
6295    */
6296 
6297   void arm_cmplx_mag_q31(
6298 			  q31_t * pSrc,
6299 			 q31_t * pDst,
6300 			 uint32_t numSamples);
6301 
6302   /**
6303    * @brief  Q15 complex magnitude
6304    * @param[in]  *pSrc points to the complex input vector
6305    * @param[out]  *pDst points to the real output vector
6306    * @param[in]  numSamples number of complex samples in the input vector
6307    * @return none.
6308    */
6309 
6310   void arm_cmplx_mag_q15(
6311 			  q15_t * pSrc,
6312 			 q15_t * pDst,
6313 			 uint32_t numSamples);
6314 
6315   /**
6316    * @brief  Q15 complex dot product
6317    * @param[in]  *pSrcA points to the first input vector
6318    * @param[in]  *pSrcB points to the second input vector
6319    * @param[in]  numSamples number of complex samples in each vector
6320    * @param[out]  *realResult real part of the result returned here
6321    * @param[out]  *imagResult imaginary part of the result returned here
6322    * @return none.
6323    */
6324 
6325   void arm_cmplx_dot_prod_q15(
6326 			       q15_t * pSrcA,
6327 			       q15_t * pSrcB,
6328 			      uint32_t numSamples,
6329 			      q31_t * realResult,
6330 			      q31_t * imagResult);
6331 
6332   /**
6333    * @brief  Q31 complex dot product
6334    * @param[in]  *pSrcA points to the first input vector
6335    * @param[in]  *pSrcB points to the second input vector
6336    * @param[in]  numSamples number of complex samples in each vector
6337    * @param[out]  *realResult real part of the result returned here
6338    * @param[out]  *imagResult imaginary part of the result returned here
6339    * @return none.
6340    */
6341 
6342   void arm_cmplx_dot_prod_q31(
6343 			       q31_t * pSrcA,
6344 			       q31_t * pSrcB,
6345 			      uint32_t numSamples,
6346 			      q63_t * realResult,
6347 			      q63_t * imagResult);
6348 
6349   /**
6350    * @brief  Floating-point complex dot product
6351    * @param[in]  *pSrcA points to the first input vector
6352    * @param[in]  *pSrcB points to the second input vector
6353    * @param[in]  numSamples number of complex samples in each vector
6354    * @param[out]  *realResult real part of the result returned here
6355    * @param[out]  *imagResult imaginary part of the result returned here
6356    * @return none.
6357    */
6358 
6359   void arm_cmplx_dot_prod_f32(
6360 			       float32_t * pSrcA,
6361 			       float32_t * pSrcB,
6362 			      uint32_t numSamples,
6363 			      float32_t * realResult,
6364 			      float32_t * imagResult);
6365 
6366   /**
6367    * @brief  Q15 complex-by-real multiplication
6368    * @param[in]  *pSrcCmplx points to the complex input vector
6369    * @param[in]  *pSrcReal points to the real input vector
6370    * @param[out]  *pCmplxDst points to the complex output vector
6371    * @param[in]  numSamples number of samples in each vector
6372    * @return none.
6373    */
6374 
6375   void arm_cmplx_mult_real_q15(
6376 			        q15_t * pSrcCmplx,
6377 			        q15_t * pSrcReal,
6378 			       q15_t * pCmplxDst,
6379 			       uint32_t numSamples);
6380 
6381   /**
6382    * @brief  Q31 complex-by-real multiplication
6383    * @param[in]  *pSrcCmplx points to the complex input vector
6384    * @param[in]  *pSrcReal points to the real input vector
6385    * @param[out]  *pCmplxDst points to the complex output vector
6386    * @param[in]  numSamples number of samples in each vector
6387    * @return none.
6388    */
6389 
6390   void arm_cmplx_mult_real_q31(
6391 			        q31_t * pSrcCmplx,
6392 			        q31_t * pSrcReal,
6393 			       q31_t * pCmplxDst,
6394 			       uint32_t numSamples);
6395 
6396   /**
6397    * @brief  Floating-point complex-by-real multiplication
6398    * @param[in]  *pSrcCmplx points to the complex input vector
6399    * @param[in]  *pSrcReal points to the real input vector
6400    * @param[out]  *pCmplxDst points to the complex output vector
6401    * @param[in]  numSamples number of samples in each vector
6402    * @return none.
6403    */
6404 
6405   void arm_cmplx_mult_real_f32(
6406 			        float32_t * pSrcCmplx,
6407 			        float32_t * pSrcReal,
6408 			       float32_t * pCmplxDst,
6409 			       uint32_t numSamples);
6410 
6411   /**
6412    * @brief  Minimum value of a Q7 vector.
6413    * @param[in]  *pSrc is input pointer
6414    * @param[in]  blockSize is the number of samples to process
6415    * @param[out]  *result is output pointer
6416    * @param[in]  index is the array index of the minimum value in the input buffer.
6417    * @return none.
6418    */
6419 
6420   void arm_min_q7(
6421 		   q7_t * pSrc,
6422 		  uint32_t blockSize,
6423 		  q7_t * result,
6424 		  uint32_t * index);
6425 
6426   /**
6427    * @brief  Minimum value of a Q15 vector.
6428    * @param[in]  *pSrc is input pointer
6429    * @param[in]  blockSize is the number of samples to process
6430    * @param[out]  *pResult is output pointer
6431    * @param[in]  *pIndex is the array index of the minimum value in the input buffer.
6432    * @return none.
6433    */
6434 
6435   void arm_min_q15(
6436 		    q15_t * pSrc,
6437 		   uint32_t blockSize,
6438 		   q15_t * pResult,
6439 		   uint32_t * pIndex);
6440 
6441   /**
6442    * @brief  Minimum value of a Q31 vector.
6443    * @param[in]  *pSrc is input pointer
6444    * @param[in]  blockSize is the number of samples to process
6445    * @param[out]  *pResult is output pointer
6446    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
6447    * @return none.
6448    */
6449   void arm_min_q31(
6450 		    q31_t * pSrc,
6451 		   uint32_t blockSize,
6452 		   q31_t * pResult,
6453 		   uint32_t * pIndex);
6454 
6455   /**
6456    * @brief  Minimum value of a floating-point vector.
6457    * @param[in]  *pSrc is input pointer
6458    * @param[in]  blockSize is the number of samples to process
6459    * @param[out]  *pResult is output pointer
6460    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
6461    * @return none.
6462    */
6463 
6464   void arm_min_f32(
6465 		    float32_t * pSrc,
6466 		   uint32_t blockSize,
6467 		   float32_t * pResult,
6468 		   uint32_t * pIndex);
6469 
6470 /**
6471  * @brief Maximum value of a Q7 vector.
6472  * @param[in]       *pSrc points to the input buffer
6473  * @param[in]       blockSize length of the input vector
6474  * @param[out]      *pResult maximum value returned here
6475  * @param[out]      *pIndex index of maximum value returned here
6476  * @return none.
6477  */
6478 
6479   void arm_max_q7(
6480 		   q7_t * pSrc,
6481 		  uint32_t blockSize,
6482 		  q7_t * pResult,
6483 		  uint32_t * pIndex);
6484 
6485 /**
6486  * @brief Maximum value of a Q15 vector.
6487  * @param[in]       *pSrc points to the input buffer
6488  * @param[in]       blockSize length of the input vector
6489  * @param[out]      *pResult maximum value returned here
6490  * @param[out]      *pIndex index of maximum value returned here
6491  * @return none.
6492  */
6493 
6494   void arm_max_q15(
6495 		    q15_t * pSrc,
6496 		   uint32_t blockSize,
6497 		   q15_t * pResult,
6498 		   uint32_t * pIndex);
6499 
6500 /**
6501  * @brief Maximum value of a Q31 vector.
6502  * @param[in]       *pSrc points to the input buffer
6503  * @param[in]       blockSize length of the input vector
6504  * @param[out]      *pResult maximum value returned here
6505  * @param[out]      *pIndex index of maximum value returned here
6506  * @return none.
6507  */
6508 
6509   void arm_max_q31(
6510 		    q31_t * pSrc,
6511 		   uint32_t blockSize,
6512 		   q31_t * pResult,
6513 		   uint32_t * pIndex);
6514 
6515 /**
6516  * @brief Maximum value of a floating-point vector.
6517  * @param[in]       *pSrc points to the input buffer
6518  * @param[in]       blockSize length of the input vector
6519  * @param[out]      *pResult maximum value returned here
6520  * @param[out]      *pIndex index of maximum value returned here
6521  * @return none.
6522  */
6523 
6524   void arm_max_f32(
6525 		    float32_t * pSrc,
6526 		   uint32_t blockSize,
6527 		   float32_t * pResult,
6528 		   uint32_t * pIndex);
6529 
6530   /**
6531    * @brief  Q15 complex-by-complex multiplication
6532    * @param[in]  *pSrcA points to the first input vector
6533    * @param[in]  *pSrcB points to the second input vector
6534    * @param[out]  *pDst  points to the output vector
6535    * @param[in]  numSamples number of complex samples in each vector
6536    * @return none.
6537    */
6538 
6539   void arm_cmplx_mult_cmplx_q15(
6540 			        q15_t * pSrcA,
6541 			        q15_t * pSrcB,
6542 			       q15_t * pDst,
6543 			       uint32_t numSamples);
6544 
6545   /**
6546    * @brief  Q31 complex-by-complex multiplication
6547    * @param[in]  *pSrcA points to the first input vector
6548    * @param[in]  *pSrcB points to the second input vector
6549    * @param[out]  *pDst  points to the output vector
6550    * @param[in]  numSamples number of complex samples in each vector
6551    * @return none.
6552    */
6553 
6554   void arm_cmplx_mult_cmplx_q31(
6555 			        q31_t * pSrcA,
6556 			        q31_t * pSrcB,
6557 			       q31_t * pDst,
6558 			       uint32_t numSamples);
6559 
6560   /**
6561    * @brief  Floating-point complex-by-complex multiplication
6562    * @param[in]  *pSrcA points to the first input vector
6563    * @param[in]  *pSrcB points to the second input vector
6564    * @param[out]  *pDst  points to the output vector
6565    * @param[in]  numSamples number of complex samples in each vector
6566    * @return none.
6567    */
6568 
6569   void arm_cmplx_mult_cmplx_f32(
6570 			        float32_t * pSrcA,
6571 			        float32_t * pSrcB,
6572 			       float32_t * pDst,
6573 			       uint32_t numSamples);
6574 
6575   /**
6576    * @brief Converts the elements of the floating-point vector to Q31 vector.
6577    * @param[in]       *pSrc points to the floating-point input vector
6578    * @param[out]      *pDst points to the Q31 output vector
6579    * @param[in]       blockSize length of the input vector
6580    * @return none.
6581    */
6582   void arm_float_to_q31(
6583 			       float32_t * pSrc,
6584 			      q31_t * pDst,
6585 			      uint32_t blockSize);
6586 
6587   /**
6588    * @brief Converts the elements of the floating-point vector to Q15 vector.
6589    * @param[in]       *pSrc points to the floating-point input vector
6590    * @param[out]      *pDst points to the Q15 output vector
6591    * @param[in]       blockSize length of the input vector
6592    * @return          none
6593    */
6594   void arm_float_to_q15(
6595 			       float32_t * pSrc,
6596 			      q15_t * pDst,
6597 			      uint32_t blockSize);
6598 
6599   /**
6600    * @brief Converts the elements of the floating-point vector to Q7 vector.
6601    * @param[in]       *pSrc points to the floating-point input vector
6602    * @param[out]      *pDst points to the Q7 output vector
6603    * @param[in]       blockSize length of the input vector
6604    * @return          none
6605    */
6606   void arm_float_to_q7(
6607 			      float32_t * pSrc,
6608 			     q7_t * pDst,
6609 			     uint32_t blockSize);
6610 
6611 
6612   /**
6613    * @brief  Converts the elements of the Q31 vector to Q15 vector.
6614    * @param[in]  *pSrc is input pointer
6615    * @param[out]  *pDst is output pointer
6616    * @param[in]  blockSize is the number of samples to process
6617    * @return none.
6618    */
6619   void arm_q31_to_q15(
6620 		       q31_t * pSrc,
6621 		      q15_t * pDst,
6622 		      uint32_t blockSize);
6623 
6624   /**
6625    * @brief  Converts the elements of the Q31 vector to Q7 vector.
6626    * @param[in]  *pSrc is input pointer
6627    * @param[out]  *pDst is output pointer
6628    * @param[in]  blockSize is the number of samples to process
6629    * @return none.
6630    */
6631   void arm_q31_to_q7(
6632 		      q31_t * pSrc,
6633 		     q7_t * pDst,
6634 		     uint32_t blockSize);
6635 
6636   /**
6637    * @brief  Converts the elements of the Q15 vector to floating-point vector.
6638    * @param[in]  *pSrc is input pointer
6639    * @param[out]  *pDst is output pointer
6640    * @param[in]  blockSize is the number of samples to process
6641    * @return none.
6642    */
6643   void arm_q15_to_float(
6644 			 q15_t * pSrc,
6645 			float32_t * pDst,
6646 			uint32_t blockSize);
6647 
6648 
6649   /**
6650    * @brief  Converts the elements of the Q15 vector to Q31 vector.
6651    * @param[in]  *pSrc is input pointer
6652    * @param[out]  *pDst is output pointer
6653    * @param[in]  blockSize is the number of samples to process
6654    * @return none.
6655    */
6656   void arm_q15_to_q31(
6657 		       q15_t * pSrc,
6658 		      q31_t * pDst,
6659 		      uint32_t blockSize);
6660 
6661 
6662   /**
6663    * @brief  Converts the elements of the Q15 vector to Q7 vector.
6664    * @param[in]  *pSrc is input pointer
6665    * @param[out]  *pDst is output pointer
6666    * @param[in]  blockSize is the number of samples to process
6667    * @return none.
6668    */
6669   void arm_q15_to_q7(
6670 		      q15_t * pSrc,
6671 		     q7_t * pDst,
6672 		     uint32_t blockSize);
6673 
6674 
6675   /**
6676    * @ingroup groupInterpolation
6677    */
6678 
6679   /**
6680    * @defgroup BilinearInterpolate Bilinear Interpolation
6681    *
6682    * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
6683    * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
6684    * determines values between the grid points.
6685    * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension.
6686    * Bilinear interpolation is often used in image processing to rescale images.
6687    * The CMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
6688    *
6689    * <b>Algorithm</b>
6690    * \par
6691    * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
6692    * For floating-point, the instance structure is defined as:
6693    * <pre>
6694    *   typedef struct
6695    *   {
6696    *     uint16_t numRows;
6697    *     uint16_t numCols;
6698    *     float32_t *pData;
6699    * } arm_bilinear_interp_instance_f32;
6700    * </pre>
6701    *
6702    * \par
6703    * where <code>numRows</code> specifies the number of rows in the table;
6704    * <code>numCols</code> specifies the number of columns in the table;
6705    * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
6706    * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
6707    * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
6708    *
6709    * \par
6710    * Let <code>(x, y)</code> specify the desired interpolation point.  Then define:
6711    * <pre>
6712    *     XF = floor(x)
6713    *     YF = floor(y)
6714    * </pre>
6715    * \par
6716    * The interpolated output point is computed as:
6717    * <pre>
6718    *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
6719    *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
6720    *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
6721    *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
6722    * </pre>
6723    * Note that the coordinates (x, y) contain integer and fractional components.
6724    * The integer components specify which portion of the table to use while the
6725    * fractional components control the interpolation processor.
6726    *
6727    * \par
6728    * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
6729    */
6730 
6731   /**
6732    * @addtogroup BilinearInterpolate
6733    * @{
6734    */
6735 
6736   /**
6737   *
6738   * @brief  Floating-point bilinear interpolation.
6739   * @param[in,out] *S points to an instance of the interpolation structure.
6740   * @param[in] X interpolation coordinate.
6741   * @param[in] Y interpolation coordinate.
6742   * @return out interpolated value.
6743   */
6744 
6745 
arm_bilinear_interp_f32(const arm_bilinear_interp_instance_f32 * S,float32_t X,float32_t Y)6746   __STATIC_INLINE float32_t arm_bilinear_interp_f32(
6747 						    const arm_bilinear_interp_instance_f32 * S,
6748 						    float32_t X,
6749 						    float32_t Y)
6750   {
6751     float32_t out;
6752     float32_t f00, f01, f10, f11;
6753     float32_t *pData = S->pData;
6754     int32_t xIndex, yIndex, index;
6755     float32_t xdiff, ydiff;
6756     float32_t b1, b2, b3, b4;
6757 
6758     xIndex = (int32_t) X;
6759     yIndex = (int32_t) Y;
6760 
6761 	/* Care taken for table outside boundary */
6762 	/* Returns zero output when values are outside table boundary */
6763 	if(xIndex < 0 || xIndex > (S->numRows-1) || yIndex < 0  || yIndex > ( S->numCols-1))
6764 	{
6765 		return(0);
6766 	}
6767 
6768     /* Calculation of index for two nearest points in X-direction */
6769     index = (xIndex - 1) + (yIndex-1) *  S->numCols ;
6770 
6771 
6772     /* Read two nearest points in X-direction */
6773     f00 = pData[index];
6774     f01 = pData[index + 1];
6775 
6776     /* Calculation of index for two nearest points in Y-direction */
6777     index = (xIndex-1) + (yIndex) * S->numCols;
6778 
6779 
6780     /* Read two nearest points in Y-direction */
6781     f10 = pData[index];
6782     f11 = pData[index + 1];
6783 
6784     /* Calculation of intermediate values */
6785     b1 = f00;
6786     b2 = f01 - f00;
6787     b3 = f10 - f00;
6788     b4 = f00 - f01 - f10 + f11;
6789 
6790     /* Calculation of fractional part in X */
6791     xdiff = X - xIndex;
6792 
6793     /* Calculation of fractional part in Y */
6794     ydiff = Y - yIndex;
6795 
6796     /* Calculation of bi-linear interpolated output */
6797      out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
6798 
6799    /* return to application */
6800     return (out);
6801 
6802   }
6803 
6804   /**
6805   *
6806   * @brief  Q31 bilinear interpolation.
6807   * @param[in,out] *S points to an instance of the interpolation structure.
6808   * @param[in] X interpolation coordinate in 12.20 format.
6809   * @param[in] Y interpolation coordinate in 12.20 format.
6810   * @return out interpolated value.
6811   */
6812 
arm_bilinear_interp_q31(arm_bilinear_interp_instance_q31 * S,q31_t X,q31_t Y)6813   __STATIC_INLINE q31_t arm_bilinear_interp_q31(
6814 						arm_bilinear_interp_instance_q31 * S,
6815 						q31_t X,
6816 						q31_t Y)
6817   {
6818     q31_t out;                                   /* Temporary output */
6819     q31_t acc = 0;                               /* output */
6820     q31_t xfract, yfract;                        /* X, Y fractional parts */
6821     q31_t x1, x2, y1, y2;                        /* Nearest output values */
6822     int32_t rI, cI;                             /* Row and column indices */
6823     q31_t *pYData = S->pData;                    /* pointer to output table values */
6824     uint32_t nCols = S->numCols;                 /* num of rows */
6825 
6826 
6827     /* Input is in 12.20 format */
6828     /* 12 bits for the table index */
6829     /* Index value calculation */
6830     rI = ((X & 0xFFF00000) >> 20u);
6831 
6832     /* Input is in 12.20 format */
6833     /* 12 bits for the table index */
6834     /* Index value calculation */
6835     cI = ((Y & 0xFFF00000) >> 20u);
6836 
6837 	/* Care taken for table outside boundary */
6838 	/* Returns zero output when values are outside table boundary */
6839 	if(rI < 0 || rI > (S->numRows-1) || cI < 0  || cI > ( S->numCols-1))
6840 	{
6841 		return(0);
6842 	}
6843 
6844     /* 20 bits for the fractional part */
6845     /* shift left xfract by 11 to keep 1.31 format */
6846     xfract = (X & 0x000FFFFF) << 11u;
6847 
6848     /* Read two nearest output values from the index */
6849     x1 = pYData[(rI) + nCols * (cI)];
6850     x2 = pYData[(rI) + nCols * (cI) + 1u];
6851 
6852     /* 20 bits for the fractional part */
6853     /* shift left yfract by 11 to keep 1.31 format */
6854     yfract = (Y & 0x000FFFFF) << 11u;
6855 
6856     /* Read two nearest output values from the index */
6857     y1 = pYData[(rI) + nCols * (cI + 1)];
6858     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
6859 
6860     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */
6861     out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32));
6862     acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32));
6863 
6864     /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
6865     out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32));
6866     acc += ((q31_t) ((q63_t) out * (xfract) >> 32));
6867 
6868     /* y1 * (1 - xfract) * (yfract)  in 3.29(q29) and adding to acc */
6869     out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32));
6870     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
6871 
6872     /* y2 * (xfract) * (yfract)  in 3.29(q29) and adding to acc */
6873     out = ((q31_t) ((q63_t) y2 * (xfract) >> 32));
6874     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
6875 
6876     /* Convert acc to 1.31(q31) format */
6877     return (acc << 2u);
6878 
6879   }
6880 
6881   /**
6882   * @brief  Q15 bilinear interpolation.
6883   * @param[in,out] *S points to an instance of the interpolation structure.
6884   * @param[in] X interpolation coordinate in 12.20 format.
6885   * @param[in] Y interpolation coordinate in 12.20 format.
6886   * @return out interpolated value.
6887   */
6888 
arm_bilinear_interp_q15(arm_bilinear_interp_instance_q15 * S,q31_t X,q31_t Y)6889   __STATIC_INLINE q15_t arm_bilinear_interp_q15(
6890 						arm_bilinear_interp_instance_q15 * S,
6891 						q31_t X,
6892 						q31_t Y)
6893   {
6894     q63_t acc = 0;                               /* output */
6895     q31_t out;                                   /* Temporary output */
6896     q15_t x1, x2, y1, y2;                        /* Nearest output values */
6897     q31_t xfract, yfract;                        /* X, Y fractional parts */
6898     int32_t rI, cI;                             /* Row and column indices */
6899     q15_t *pYData = S->pData;                    /* pointer to output table values */
6900     uint32_t nCols = S->numCols;                 /* num of rows */
6901 
6902     /* Input is in 12.20 format */
6903     /* 12 bits for the table index */
6904     /* Index value calculation */
6905     rI = ((X & 0xFFF00000) >> 20);
6906 
6907     /* Input is in 12.20 format */
6908     /* 12 bits for the table index */
6909     /* Index value calculation */
6910     cI = ((Y & 0xFFF00000) >> 20);
6911 
6912 	/* Care taken for table outside boundary */
6913 	/* Returns zero output when values are outside table boundary */
6914 	if(rI < 0 || rI > (S->numRows-1) || cI < 0  || cI > ( S->numCols-1))
6915 	{
6916 		return(0);
6917 	}
6918 
6919     /* 20 bits for the fractional part */
6920     /* xfract should be in 12.20 format */
6921     xfract = (X & 0x000FFFFF);
6922 
6923     /* Read two nearest output values from the index */
6924     x1 = pYData[(rI) + nCols * (cI)];
6925     x2 = pYData[(rI) + nCols * (cI) + 1u];
6926 
6927 
6928     /* 20 bits for the fractional part */
6929     /* yfract should be in 12.20 format */
6930     yfract = (Y & 0x000FFFFF);
6931 
6932     /* Read two nearest output values from the index */
6933     y1 = pYData[(rI) + nCols * (cI + 1)];
6934     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
6935 
6936     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */
6937 
6938     /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
6939     /* convert 13.35 to 13.31 by right shifting  and out is in 1.31 */
6940     out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
6941     acc = ((q63_t) out * (0xFFFFF - yfract));
6942 
6943     /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
6944     out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
6945     acc += ((q63_t) out * (xfract));
6946 
6947     /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
6948     out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
6949     acc += ((q63_t) out * (yfract));
6950 
6951     /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
6952     out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
6953     acc += ((q63_t) out * (yfract));
6954 
6955     /* acc is in 13.51 format and down shift acc by 36 times */
6956     /* Convert out to 1.15 format */
6957     return (acc >> 36);
6958 
6959   }
6960 
6961   /**
6962   * @brief  Q7 bilinear interpolation.
6963   * @param[in,out] *S points to an instance of the interpolation structure.
6964   * @param[in] X interpolation coordinate in 12.20 format.
6965   * @param[in] Y interpolation coordinate in 12.20 format.
6966   * @return out interpolated value.
6967   */
6968 
arm_bilinear_interp_q7(arm_bilinear_interp_instance_q7 * S,q31_t X,q31_t Y)6969   __STATIC_INLINE q7_t arm_bilinear_interp_q7(
6970 					      arm_bilinear_interp_instance_q7 * S,
6971 					      q31_t X,
6972 					      q31_t Y)
6973   {
6974     q63_t acc = 0;                               /* output */
6975     q31_t out;                                   /* Temporary output */
6976     q31_t xfract, yfract;                        /* X, Y fractional parts */
6977     q7_t x1, x2, y1, y2;                         /* Nearest output values */
6978     int32_t rI, cI;                             /* Row and column indices */
6979     q7_t *pYData = S->pData;                     /* pointer to output table values */
6980     uint32_t nCols = S->numCols;                 /* num of rows */
6981 
6982     /* Input is in 12.20 format */
6983     /* 12 bits for the table index */
6984     /* Index value calculation */
6985     rI = ((X & 0xFFF00000) >> 20);
6986 
6987     /* Input is in 12.20 format */
6988     /* 12 bits for the table index */
6989     /* Index value calculation */
6990     cI = ((Y & 0xFFF00000) >> 20);
6991 
6992 	/* Care taken for table outside boundary */
6993 	/* Returns zero output when values are outside table boundary */
6994 	if(rI < 0 || rI > (S->numRows-1) || cI < 0  || cI > ( S->numCols-1))
6995 	{
6996 		return(0);
6997 	}
6998 
6999     /* 20 bits for the fractional part */
7000     /* xfract should be in 12.20 format */
7001     xfract = (X & 0x000FFFFF);
7002 
7003     /* Read two nearest output values from the index */
7004     x1 = pYData[(rI) + nCols * (cI)];
7005     x2 = pYData[(rI) + nCols * (cI) + 1u];
7006 
7007 
7008     /* 20 bits for the fractional part */
7009     /* yfract should be in 12.20 format */
7010     yfract = (Y & 0x000FFFFF);
7011 
7012     /* Read two nearest output values from the index */
7013     y1 = pYData[(rI) + nCols * (cI + 1)];
7014     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7015 
7016     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */
7017     out = ((x1 * (0xFFFFF - xfract)));
7018     acc = (((q63_t) out * (0xFFFFF - yfract)));
7019 
7020     /* x2 * (xfract) * (1-yfract)  in 2.22 and adding to acc */
7021     out = ((x2 * (0xFFFFF - yfract)));
7022     acc += (((q63_t) out * (xfract)));
7023 
7024     /* y1 * (1 - xfract) * (yfract)  in 2.22 and adding to acc */
7025     out = ((y1 * (0xFFFFF - xfract)));
7026     acc += (((q63_t) out * (yfract)));
7027 
7028     /* y2 * (xfract) * (yfract)  in 2.22 and adding to acc */
7029     out = ((y2 * (yfract)));
7030     acc += (((q63_t) out * (xfract)));
7031 
7032     /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
7033     return (acc >> 40);
7034 
7035   }
7036 
7037   /**
7038    * @} end of BilinearInterpolate group
7039    */
7040 
7041 
7042 
7043 
7044 
7045 
7046 #ifdef	__cplusplus
7047 }
7048 #endif
7049 
7050 
7051 #endif /* _ARM_MATH_H */
7052 
7053 
7054 /**
7055  *
7056  * End of file.
7057  */
7058