1 /* ----------------------------------------------------------------------
2  * Copyright (C) 2010 ARM Limited. All rights reserved.
3  *
4  * $Date:        15. July 2011
5  * $Revision: 	V1.0.10
6  *
7  * Project: 	    CMSIS DSP Library
8  * Title:	     arm_math.h
9  *
10  * Description:	 Public header file for CMSIS DSP Library
11  *
12  * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13  *
14  * Version 1.0.10 2011/7/15
15  *    Big Endian support added and Merged M0 and M3/M4 Source code.
16  *
17  * Version 1.0.3 2010/11/29
18  *    Re-organized the CMSIS folders and updated documentation.
19  *
20  * Version 1.0.2 2010/11/11
21  *    Documentation updated.
22  *
23  * Version 1.0.1 2010/10/05
24  *    Production release and review comments incorporated.
25  *
26  * Version 1.0.0 2010/09/20
27  *    Production release and review comments incorporated.
28  * -------------------------------------------------------------------- */
29 
30 /**
31    \mainpage CMSIS DSP Software Library
32    *
33    * <b>Introduction</b>
34    *
35    * This user manual describes the CMSIS DSP software library,
36    * a suite of common signal processing functions for use on Cortex-M processor based devices.
37    *
38    * The library is divided into a number of modules each covering a specific category:
39    * - Basic math functions
40    * - Fast math functions
41    * - Complex math functions
42    * - Filters
43    * - Matrix functions
44    * - Transforms
45    * - Motor control functions
46    * - Statistical functions
47    * - Support functions
48    * - Interpolation functions
49    *
50    * The library has separate functions for operating on 8-bit integers, 16-bit integers,
51    * 32-bit integer and 32-bit floating-point values.
52    *
53    * <b>Processor Support</b>
54    *
55    * The library is completely written in C and is fully CMSIS compliant.
56    * High performance is achieved through maximum use of Cortex-M4 intrinsics.
57    *
58    * The supplied library source code also builds and runs on the Cortex-M3 and Cortex-M0 processor,
59    * with the DSP intrinsics being emulated through software.
60    *
61    *
62    * <b>Toolchain Support</b>
63    *
64    * The library has been developed and tested with MDK-ARM version 4.21.
65    * The library is being tested in GCC and IAR toolchains and updates on this activity will be made available shortly.
66    *
67    * <b>Using the Library</b>
68    *
69    * The library installer contains prebuilt versions of the libraries in the <code>Lib</code> folder.
70    * - arm_cortexM4lf_math.lib (Little endian and Floating Point Unit on Cortex-M4)
71    * - arm_cortexM4bf_math.lib (Big endian and Floating Point Unit on Cortex-M4)
72    * - arm_cortexM4l_math.lib (Little endian on Cortex-M4)
73    * - arm_cortexM4b_math.lib (Big endian on Cortex-M4)
74    * - arm_cortexM3l_math.lib (Little endian on Cortex-M3)
75    * - arm_cortexM3b_math.lib (Big endian on Cortex-M3)
76    * - arm_cortexM0l_math.lib (Little endian on Cortex-M0)
77    * - arm_cortexM0b_math.lib (Big endian on Cortex-M3)
78    *
79    * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
80    * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single
81    * public header file <code> arm_math.h</code> for Cortex-M4/M3/M0 with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
82    * Define the appropriate pre processor MACRO ARM_MATH_CM4 or  ARM_MATH_CM3 or
83    * ARM_MATH_CM0 depending on the target processor in the application.
84    *
85    * <b>Examples</b>
86    *
87    * The library ships with a number of examples which demonstrate how to use the library functions.
88    *
89    * <b>Building the Library</b>
90    *
91    * The library installer contains project files to re build libraries on MDK Tool chain in the <code>CMSIS\DSP_Lib\Source\ARM</code> folder.
92    * - arm_cortexM0b_math.uvproj
93    * - arm_cortexM0l_math.uvproj
94    * - arm_cortexM3b_math.uvproj
95    * - arm_cortexM3l_math.uvproj
96    * - arm_cortexM4b_math.uvproj
97    * - arm_cortexM4l_math.uvproj
98    * - arm_cortexM4bf_math.uvproj
99    * - arm_cortexM4lf_math.uvproj
100    *
101    * Each library project have differant pre-processor macros.
102    *
103    * <b>ARM_MATH_CMx:</b>
104    * Define macro ARM_MATH_CM4 for building the library on Cortex-M4 target, ARM_MATH_CM3 for building library on Cortex-M3 target
105    * and ARM_MATH_CM0 for building library on cortex-M0 target.
106    *
107    * <b>ARM_MATH_BIG_ENDIAN:</b>
108    * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
109    *
110    * <b>ARM_MATH_MATRIX_CHECK:</b>
111    * Define macro for checking on the input and output sizes of matrices
112    *
113    * <b>ARM_MATH_ROUNDING:</b>
114    * Define macro for rounding on support functions
115    *
116    * <b>__FPU_PRESENT:</b>
117    * Initialize macro __FPU_PRESENT = 1 when building on FPU supported Targets. Enable this macro for M4bf and M4lf libraries
118    *
119    *
120    * The project can be built by opening the appropriate project in MDK-ARM 4.21 chain and defining the optional pre processor MACROs detailed above.
121    *
122    * <b>Copyright Notice</b>
123    *
124    * Copyright (C) 2010 ARM Limited. All rights reserved.
125    */
126 
127 
128 /**
129  * @defgroup groupMath Basic Math Functions
130  */
131 
132 /**
133  * @defgroup groupFastMath Fast Math Functions
134  * This set of functions provides a fast approximation to sine, cosine, and square root.
135  * As compared to most of the other functions in the CMSIS math library, the fast math functions
136  * operate on individual values and not arrays.
137  * There are separate functions for Q15, Q31, and floating-point data.
138  *
139  */
140 
141 /**
142  * @defgroup groupCmplxMath Complex Math Functions
143  * This set of functions operates on complex data vectors.
144  * The data in the complex arrays is stored in an interleaved fashion
145  * (real, imag, real, imag, ...).
146  * In the API functions, the number of samples in a complex array refers
147  * to the number of complex values; the array contains twice this number of
148  * real values.
149  */
150 
151 /**
152  * @defgroup groupFilters Filtering Functions
153  */
154 
155 /**
156  * @defgroup groupMatrix Matrix Functions
157  *
158  * This set of functions provides basic matrix math operations.
159  * The functions operate on matrix data structures.  For example,
160  * the type
161  * definition for the floating-point matrix structure is shown
162  * below:
163  * <pre>
164  *     typedef struct
165  *     {
166  *       uint16_t numRows;     // number of rows of the matrix.
167  *       uint16_t numCols;     // number of columns of the matrix.
168  *       float32_t *pData;     // points to the data of the matrix.
169  *     } arm_matrix_instance_f32;
170  * </pre>
171  * There are similar definitions for Q15 and Q31 data types.
172  *
173  * The structure specifies the size of the matrix and then points to
174  * an array of data.  The array is of size <code>numRows X numCols</code>
175  * and the values are arranged in row order.  That is, the
176  * matrix element (i, j) is stored at:
177  * <pre>
178  *     pData[i*numCols + j]
179  * </pre>
180  *
181  * \par Init Functions
182  * There is an associated initialization function for each type of matrix
183  * data structure.
184  * The initialization function sets the values of the internal structure fields.
185  * Refer to the function <code>arm_mat_init_f32()</code>, <code>arm_mat_init_q31()</code>
186  * and <code>arm_mat_init_q15()</code> for floating-point, Q31 and Q15 types,  respectively.
187  *
188  * \par
189  * Use of the initialization function is optional. However, if initialization function is used
190  * then the instance structure cannot be placed into a const data section.
191  * To place the instance structure in a const data
192  * section, manually initialize the data structure.  For example:
193  * <pre>
194  * <code>arm_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
195  * <code>arm_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
196  * <code>arm_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
197  * </pre>
198  * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
199  * specifies the number of columns, and <code>pData</code> points to the
200  * data array.
201  *
202  * \par Size Checking
203  * By default all of the matrix functions perform size checking on the input and
204  * output matrices.  For example, the matrix addition function verifies that the
205  * two input matrices and the output matrix all have the same number of rows and
206  * columns.  If the size check fails the functions return:
207  * <pre>
208  *     ARM_MATH_SIZE_MISMATCH
209  * </pre>
210  * Otherwise the functions return
211  * <pre>
212  *     ARM_MATH_SUCCESS
213  * </pre>
214  * There is some overhead associated with this matrix size checking.
215  * The matrix size checking is enabled via the #define
216  * <pre>
217  *     ARM_MATH_MATRIX_CHECK
218  * </pre>
219  * within the library project settings.  By default this macro is defined
220  * and size checking is enabled.  By changing the project settings and
221  * undefining this macro size checking is eliminated and the functions
222  * run a bit faster.  With size checking disabled the functions always
223  * return <code>ARM_MATH_SUCCESS</code>.
224  */
225 
226 /**
227  * @defgroup groupTransforms Transform Functions
228  */
229 
230 /**
231  * @defgroup groupController Controller Functions
232  */
233 
234 /**
235  * @defgroup groupStats Statistics Functions
236  */
237 /**
238  * @defgroup groupSupport Support Functions
239  */
240 
241 /**
242  * @defgroup groupInterpolation Interpolation Functions
243  * These functions perform 1- and 2-dimensional interpolation of data.
244  * Linear interpolation is used for 1-dimensional data and
245  * bilinear interpolation is used for 2-dimensional data.
246  */
247 
248 /**
249  * @defgroup groupExamples Examples
250  */
251 #ifndef _ARM_MATH_H
252 #define _ARM_MATH_H
253 
254 #define __CMSIS_GENERIC              /* disable NVIC and Systick functions */
255 
256 #if defined (ARM_MATH_CM4)
257   #include "core_cm4.h"
258 #elif defined (ARM_MATH_CM3)
259   #include "core_cm3.h"
260 #elif defined (ARM_MATH_CM0)
261   #include "core_cm0.h"
262 #else
263 #include "ARMCM4.h"
264 #warning "Define either ARM_MATH_CM4 OR ARM_MATH_CM3...By Default building on ARM_MATH_CM4....."
265 #endif
266 
267 #undef  __CMSIS_GENERIC              /* enable NVIC and Systick functions */
268 #include "string.h"
269     #include "math.h"
270 #ifdef	__cplusplus
271 extern "C"
272 {
273 #endif
274 
275 
276   /**
277    * @brief Macros required for reciprocal calculation in Normalized LMS
278    */
279 
280 #define DELTA_Q31 			(0x100)
281 #define DELTA_Q15 			0x5
282 #define INDEX_MASK 			0x0000003F
283 #define PI					3.14159265358979f
284 
285   /**
286    * @brief Macros required for SINE and COSINE Fast math approximations
287    */
288 
289 #define TABLE_SIZE			256
290 #define TABLE_SPACING_Q31	0x800000
291 #define TABLE_SPACING_Q15	0x80
292 
293   /**
294    * @brief Macros required for SINE and COSINE Controller functions
295    */
296   /* 1.31(q31) Fixed value of 2/360 */
297   /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
298 #define INPUT_SPACING			0xB60B61
299 
300 
301   /**
302    * @brief Error status returned by some functions in the library.
303    */
304 
305   typedef enum
306     {
307       ARM_MATH_SUCCESS = 0,              /**< No error */
308       ARM_MATH_ARGUMENT_ERROR = -1,      /**< One or more arguments are incorrect */
309       ARM_MATH_LENGTH_ERROR = -2,        /**< Length of data buffer is incorrect */
310       ARM_MATH_SIZE_MISMATCH = -3,       /**< Size of matrices is not compatible with the operation. */
311       ARM_MATH_NANINF = -4,              /**< Not-a-number (NaN) or infinity is generated */
312       ARM_MATH_SINGULAR = -5,            /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
313       ARM_MATH_TEST_FAILURE = -6         /**< Test Failed  */
314     } arm_status;
315 
316   /**
317    * @brief 8-bit fractional data type in 1.7 format.
318    */
319   typedef int8_t q7_t;
320 
321   /**
322    * @brief 16-bit fractional data type in 1.15 format.
323    */
324   typedef int16_t q15_t;
325 
326   /**
327    * @brief 32-bit fractional data type in 1.31 format.
328    */
329   typedef int32_t q31_t;
330 
331   /**
332    * @brief 64-bit fractional data type in 1.63 format.
333    */
334   typedef int64_t q63_t;
335 
336   /**
337    * @brief 32-bit floating-point type definition.
338    */
339   typedef float float32_t;
340 
341   /**
342    * @brief 64-bit floating-point type definition.
343    */
344   typedef double float64_t;
345 
346   /**
347    * @brief definition to read/write two 16 bit values.
348    */
349 #define __SIMD32(addr)  (*(int32_t **) & (addr))
350 
351 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0)
352   /**
353    * @brief definition to pack two 16 bit values.
354    */
355 #define __PKHBT(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0x0000FFFF) | \
356                                          (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
357 
358 #endif
359 
360 
361    /**
362    * @brief definition to pack four 8 bit values.
363    */
364 #ifndef ARM_MATH_BIG_ENDIAN
365 
366 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) |	\
367                                 (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) |	\
368 							    (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) |	\
369 							    (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
370 #else
371 
372 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) |	\
373                                 (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) |	\
374 							    (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) |	\
375 							    (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
376 
377 #endif
378 
379 
380   /**
381    * @brief Clips Q63 to Q31 values.
382    */
clip_q63_to_q31(q63_t x)383   static __INLINE q31_t clip_q63_to_q31(
384 					q63_t x)
385   {
386     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
387       ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
388   }
389 
390   /**
391    * @brief Clips Q63 to Q15 values.
392    */
clip_q63_to_q15(q63_t x)393   static __INLINE q15_t clip_q63_to_q15(
394 					q63_t x)
395   {
396     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
397       ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
398   }
399 
400   /**
401    * @brief Clips Q31 to Q7 values.
402    */
clip_q31_to_q7(q31_t x)403   static __INLINE q7_t clip_q31_to_q7(
404 				      q31_t x)
405   {
406     return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
407       ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
408   }
409 
410   /**
411    * @brief Clips Q31 to Q15 values.
412    */
clip_q31_to_q15(q31_t x)413   static __INLINE q15_t clip_q31_to_q15(
414 					q31_t x)
415   {
416     return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
417       ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
418   }
419 
420   /**
421    * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
422    */
423 
mult32x64(q63_t x,q31_t y)424   static __INLINE q63_t mult32x64(
425 				  q63_t x,
426 				  q31_t y)
427   {
428     return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
429             (((q63_t) (x >> 32) * y)));
430   }
431 
432 
433 #if defined (ARM_MATH_CM0) && defined ( __CC_ARM   )
434 #define __CLZ __clz
435 #endif
436 
437 #if defined (ARM_MATH_CM0) && ((defined (__ICCARM__)) ||(defined (__GNUC__)) || defined (__TASKING__) )
438 
439   static __INLINE  uint32_t __CLZ(q31_t data);
440 
441 
__CLZ(q31_t data)442   static __INLINE uint32_t __CLZ(q31_t data)
443   {
444 	  uint32_t count = 0;
445 	  uint32_t mask = 0x80000000;
446 
447 	  while((data & mask) ==  0)
448 	  {
449 		  count += 1u;
450 		  mask = mask >> 1u;
451 	  }
452 
453 	  return(count);
454 
455   }
456 
457 #endif
458 
459   /**
460    * @brief Function to Calculates 1/in(reciprocal) value of Q31 Data type.
461    */
462 
arm_recip_q31(q31_t in,q31_t * dst,q31_t * pRecipTable)463   static __INLINE uint32_t arm_recip_q31(
464 					 q31_t in,
465 					 q31_t * dst,
466 					 q31_t * pRecipTable)
467   {
468 
469     uint32_t out, tempVal;
470     uint32_t index, i;
471     uint32_t signBits;
472 
473     if(in > 0)
474       {
475 	signBits = __CLZ(in) - 1;
476       }
477     else
478       {
479 	signBits = __CLZ(-in) - 1;
480       }
481 
482     /* Convert input sample to 1.31 format */
483     in = in << signBits;
484 
485     /* calculation of index for initial approximated Val */
486     index = (uint32_t) (in >> 24u);
487     index = (index & INDEX_MASK);
488 
489     /* 1.31 with exp 1 */
490     out = pRecipTable[index];
491 
492     /* calculation of reciprocal value */
493     /* running approximation for two iterations */
494     for (i = 0u; i < 2u; i++)
495       {
496 	tempVal = (q31_t) (((q63_t) in * out) >> 31u);
497 	tempVal = 0x7FFFFFFF - tempVal;
498 	/*      1.31 with exp 1 */
499 	//out = (q31_t) (((q63_t) out * tempVal) >> 30u);
500 	out = (q31_t) clip_q63_to_q31(((q63_t) out * tempVal) >> 30u);
501       }
502 
503     /* write output */
504     *dst = out;
505 
506     /* return num of signbits of out = 1/in value */
507     return (signBits + 1u);
508 
509   }
510 
511   /**
512    * @brief Function to Calculates 1/in(reciprocal) value of Q15 Data type.
513    */
arm_recip_q15(q15_t in,q15_t * dst,q15_t * pRecipTable)514   static __INLINE uint32_t arm_recip_q15(
515 					 q15_t in,
516 					 q15_t * dst,
517 					 q15_t * pRecipTable)
518   {
519 
520     uint32_t out = 0, tempVal = 0;
521     uint32_t index = 0, i = 0;
522     uint32_t signBits = 0;
523 
524     if(in > 0)
525       {
526 	signBits = __CLZ(in) - 17;
527       }
528     else
529       {
530 	signBits = __CLZ(-in) - 17;
531       }
532 
533     /* Convert input sample to 1.15 format */
534     in = in << signBits;
535 
536     /* calculation of index for initial approximated Val */
537     index = in >> 8;
538     index = (index & INDEX_MASK);
539 
540     /*      1.15 with exp 1  */
541     out = pRecipTable[index];
542 
543     /* calculation of reciprocal value */
544     /* running approximation for two iterations */
545     for (i = 0; i < 2; i++)
546       {
547 	tempVal = (q15_t) (((q31_t) in * out) >> 15);
548 	tempVal = 0x7FFF - tempVal;
549 	/*      1.15 with exp 1 */
550 	out = (q15_t) (((q31_t) out * tempVal) >> 14);
551       }
552 
553     /* write output */
554     *dst = out;
555 
556     /* return num of signbits of out = 1/in value */
557     return (signBits + 1);
558 
559   }
560 
561 
562   /*
563    * @brief C custom defined intrinisic function for only M0 processors
564    */
565 #if defined(ARM_MATH_CM0)
566 
__SSAT(q31_t x,uint32_t y)567   static __INLINE q31_t __SSAT(
568 			       q31_t x,
569 			       uint32_t y)
570   {
571     int32_t posMax, negMin;
572     uint32_t i;
573 
574     posMax = 1;
575     for (i = 0; i < (y - 1); i++)
576       {
577 	posMax = posMax * 2;
578       }
579 
580     if(x > 0)
581       {
582 	posMax = (posMax - 1);
583 
584 	if(x > posMax)
585 	  {
586 	    x = posMax;
587 	  }
588       }
589     else
590       {
591 	negMin = -posMax;
592 
593 	if(x < negMin)
594 	  {
595 	    x = negMin;
596 	  }
597       }
598     return (x);
599 
600 
601   }
602 
603 #endif /* end of ARM_MATH_CM0 */
604 
605 
606 
607   /*
608    * @brief C custom defined intrinsic function for M3 and M0 processors
609    */
610 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0)
611 
612   /*
613    * @brief C custom defined QADD8 for M3 and M0 processors
614    */
__QADD8(q31_t x,q31_t y)615   static __INLINE q31_t __QADD8(
616 				q31_t x,
617 				q31_t y)
618   {
619 
620     q31_t sum;
621     q7_t r, s, t, u;
622 
623     r = (char) x;
624     s = (char) y;
625 
626     r = __SSAT((q31_t) (r + s), 8);
627     s = __SSAT(((q31_t) (((x << 16) >> 24) + ((y << 16) >> 24))), 8);
628     t = __SSAT(((q31_t) (((x << 8) >> 24) + ((y << 8) >> 24))), 8);
629     u = __SSAT(((q31_t) ((x >> 24) + (y >> 24))), 8);
630 
631     sum = (((q31_t) u << 24) & 0xFF000000) | (((q31_t) t << 16) & 0x00FF0000) |
632       (((q31_t) s << 8) & 0x0000FF00) | (r & 0x000000FF);
633 
634     return sum;
635 
636   }
637 
638   /*
639    * @brief C custom defined QSUB8 for M3 and M0 processors
640    */
__QSUB8(q31_t x,q31_t y)641   static __INLINE q31_t __QSUB8(
642 				q31_t x,
643 				q31_t y)
644   {
645 
646     q31_t sum;
647     q31_t r, s, t, u;
648 
649     r = (char) x;
650     s = (char) y;
651 
652     r = __SSAT((r - s), 8);
653     s = __SSAT(((q31_t) (((x << 16) >> 24) - ((y << 16) >> 24))), 8) << 8;
654     t = __SSAT(((q31_t) (((x << 8) >> 24) - ((y << 8) >> 24))), 8) << 16;
655     u = __SSAT(((q31_t) ((x >> 24) - (y >> 24))), 8) << 24;
656 
657     sum =
658       (u & 0xFF000000) | (t & 0x00FF0000) | (s & 0x0000FF00) | (r & 0x000000FF);
659 
660     return sum;
661   }
662 
663   /*
664    * @brief C custom defined QADD16 for M3 and M0 processors
665    */
666 
667   /*
668    * @brief C custom defined QADD16 for M3 and M0 processors
669    */
__QADD16(q31_t x,q31_t y)670   static __INLINE q31_t __QADD16(
671 				 q31_t x,
672 				 q31_t y)
673   {
674 
675     q31_t sum;
676     q31_t r, s;
677 
678     r = (short) x;
679     s = (short) y;
680 
681     r = __SSAT(r + s, 16);
682     s = __SSAT(((q31_t) ((x >> 16) + (y >> 16))), 16) << 16;
683 
684     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
685 
686     return sum;
687 
688   }
689 
690   /*
691    * @brief C custom defined SHADD16 for M3 and M0 processors
692    */
__SHADD16(q31_t x,q31_t y)693   static __INLINE q31_t __SHADD16(
694 				  q31_t x,
695 				  q31_t y)
696   {
697 
698     q31_t sum;
699     q31_t r, s;
700 
701     r = (short) x;
702     s = (short) y;
703 
704     r = ((r >> 1) + (s >> 1));
705     s = ((q31_t) ((x >> 17) + (y >> 17))) << 16;
706 
707     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
708 
709     return sum;
710 
711   }
712 
713   /*
714    * @brief C custom defined QSUB16 for M3 and M0 processors
715    */
__QSUB16(q31_t x,q31_t y)716   static __INLINE q31_t __QSUB16(
717 				 q31_t x,
718 				 q31_t y)
719   {
720 
721     q31_t sum;
722     q31_t r, s;
723 
724     r = (short) x;
725     s = (short) y;
726 
727     r = __SSAT(r - s, 16);
728     s = __SSAT(((q31_t) ((x >> 16) - (y >> 16))), 16) << 16;
729 
730     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
731 
732     return sum;
733   }
734 
735   /*
736    * @brief C custom defined SHSUB16 for M3 and M0 processors
737    */
__SHSUB16(q31_t x,q31_t y)738   static __INLINE q31_t __SHSUB16(
739 				  q31_t x,
740 				  q31_t y)
741   {
742 
743     q31_t diff;
744     q31_t r, s;
745 
746     r = (short) x;
747     s = (short) y;
748 
749     r = ((r >> 1) - (s >> 1));
750     s = (((x >> 17) - (y >> 17)) << 16);
751 
752     diff = (s & 0xFFFF0000) | (r & 0x0000FFFF);
753 
754     return diff;
755   }
756 
757   /*
758    * @brief C custom defined QASX for M3 and M0 processors
759    */
__QASX(q31_t x,q31_t y)760   static __INLINE q31_t __QASX(
761 			       q31_t x,
762 			       q31_t y)
763   {
764 
765     q31_t sum = 0;
766 
767     sum = ((sum + clip_q31_to_q15((q31_t) ((short) (x >> 16) + (short) y))) << 16) +
768       clip_q31_to_q15((q31_t) ((short) x - (short) (y >> 16)));
769 
770     return sum;
771   }
772 
773   /*
774    * @brief C custom defined SHASX for M3 and M0 processors
775    */
__SHASX(q31_t x,q31_t y)776   static __INLINE q31_t __SHASX(
777 				q31_t x,
778 				q31_t y)
779   {
780 
781     q31_t sum;
782     q31_t r, s;
783 
784     r = (short) x;
785     s = (short) y;
786 
787     r = ((r >> 1) - (y >> 17));
788     s = (((x >> 17) + (s >> 1)) << 16);
789 
790     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
791 
792     return sum;
793   }
794 
795 
796   /*
797    * @brief C custom defined QSAX for M3 and M0 processors
798    */
__QSAX(q31_t x,q31_t y)799   static __INLINE q31_t __QSAX(
800 			       q31_t x,
801 			       q31_t y)
802   {
803 
804     q31_t sum = 0;
805 
806     sum = ((sum + clip_q31_to_q15((q31_t) ((short) (x >> 16) - (short) y))) << 16) +
807       clip_q31_to_q15((q31_t) ((short) x + (short) (y >> 16)));
808 
809     return sum;
810   }
811 
812   /*
813    * @brief C custom defined SHSAX for M3 and M0 processors
814    */
__SHSAX(q31_t x,q31_t y)815   static __INLINE q31_t __SHSAX(
816 				q31_t x,
817 				q31_t y)
818   {
819 
820     q31_t sum;
821     q31_t r, s;
822 
823     r = (short) x;
824     s = (short) y;
825 
826     r = ((r >> 1) + (y >> 17));
827     s = (((x >> 17) - (s >> 1)) << 16);
828 
829     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
830 
831     return sum;
832   }
833 
834   /*
835    * @brief C custom defined SMUSDX for M3 and M0 processors
836    */
__SMUSDX(q31_t x,q31_t y)837   static __INLINE q31_t __SMUSDX(
838 				 q31_t x,
839 				 q31_t y)
840   {
841 
842     return ((q31_t)(((short) x * (short) (y >> 16)) -
843 		    ((short) (x >> 16) * (short) y)));
844   }
845 
846   /*
847    * @brief C custom defined SMUADX for M3 and M0 processors
848    */
__SMUADX(q31_t x,q31_t y)849   static __INLINE q31_t __SMUADX(
850 				 q31_t x,
851 				 q31_t y)
852   {
853 
854     return ((q31_t)(((short) x * (short) (y >> 16)) +
855 		    ((short) (x >> 16) * (short) y)));
856   }
857 
858   /*
859    * @brief C custom defined QADD for M3 and M0 processors
860    */
__QADD(q31_t x,q31_t y)861   static __INLINE q31_t __QADD(
862 			       q31_t x,
863 			       q31_t y)
864   {
865     return clip_q63_to_q31((q63_t) x + y);
866   }
867 
868   /*
869    * @brief C custom defined QSUB for M3 and M0 processors
870    */
__QSUB(q31_t x,q31_t y)871   static __INLINE q31_t __QSUB(
872 			       q31_t x,
873 			       q31_t y)
874   {
875     return clip_q63_to_q31((q63_t) x - y);
876   }
877 
878   /*
879    * @brief C custom defined SMLAD for M3 and M0 processors
880    */
__SMLAD(q31_t x,q31_t y,q31_t sum)881   static __INLINE q31_t __SMLAD(
882 				q31_t x,
883 				q31_t y,
884 				q31_t sum)
885   {
886 
887     return (sum + ((short) (x >> 16) * (short) (y >> 16)) +
888             ((short) x * (short) y));
889   }
890 
891   /*
892    * @brief C custom defined SMLADX for M3 and M0 processors
893    */
__SMLADX(q31_t x,q31_t y,q31_t sum)894   static __INLINE q31_t __SMLADX(
895 				 q31_t x,
896 				 q31_t y,
897 				 q31_t sum)
898   {
899 
900     return (sum + ((short) (x >> 16) * (short) (y)) +
901             ((short) x * (short) (y >> 16)));
902   }
903 
904   /*
905    * @brief C custom defined SMLSDX for M3 and M0 processors
906    */
__SMLSDX(q31_t x,q31_t y,q31_t sum)907   static __INLINE q31_t __SMLSDX(
908 				 q31_t x,
909 				 q31_t y,
910 				 q31_t sum)
911   {
912 
913     return (sum - ((short) (x >> 16) * (short) (y)) +
914             ((short) x * (short) (y >> 16)));
915   }
916 
917   /*
918    * @brief C custom defined SMLALD for M3 and M0 processors
919    */
__SMLALD(q31_t x,q31_t y,q63_t sum)920   static __INLINE q63_t __SMLALD(
921 				 q31_t x,
922 				 q31_t y,
923 				 q63_t sum)
924   {
925 
926     return (sum + ((short) (x >> 16) * (short) (y >> 16)) +
927             ((short) x * (short) y));
928   }
929 
930   /*
931    * @brief C custom defined SMLALDX for M3 and M0 processors
932    */
__SMLALDX(q31_t x,q31_t y,q63_t sum)933   static __INLINE q63_t __SMLALDX(
934 				  q31_t x,
935 				  q31_t y,
936 				  q63_t sum)
937   {
938 
939     return (sum + ((short) (x >> 16) * (short) y)) +
940       ((short) x * (short) (y >> 16));
941   }
942 
943   /*
944    * @brief C custom defined SMUAD for M3 and M0 processors
945    */
__SMUAD(q31_t x,q31_t y)946   static __INLINE q31_t __SMUAD(
947 				q31_t x,
948 				q31_t y)
949   {
950 
951     return (((x >> 16) * (y >> 16)) +
952             (((x << 16) >> 16) * ((y << 16) >> 16)));
953   }
954 
955   /*
956    * @brief C custom defined SMUSD for M3 and M0 processors
957    */
__SMUSD(q31_t x,q31_t y)958   static __INLINE q31_t __SMUSD(
959 				q31_t x,
960 				q31_t y)
961   {
962 
963     return (-((x >> 16) * (y >> 16)) +
964             (((x << 16) >> 16) * ((y << 16) >> 16)));
965   }
966 
967 
968 
969 
970 #endif /* (ARM_MATH_CM3) || defined (ARM_MATH_CM0) */
971 
972 
973   /**
974    * @brief Instance structure for the Q7 FIR filter.
975    */
976   typedef struct
977   {
978     uint16_t numTaps;        /**< number of filter coefficients in the filter. */
979     q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
980     q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
981   } arm_fir_instance_q7;
982 
983   /**
984    * @brief Instance structure for the Q15 FIR filter.
985    */
986   typedef struct
987   {
988     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
989     q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
990     q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
991   } arm_fir_instance_q15;
992 
993   /**
994    * @brief Instance structure for the Q31 FIR filter.
995    */
996   typedef struct
997   {
998     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
999     q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1000     q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
1001   } arm_fir_instance_q31;
1002 
1003   /**
1004    * @brief Instance structure for the floating-point FIR filter.
1005    */
1006   typedef struct
1007   {
1008     uint16_t numTaps;     /**< number of filter coefficients in the filter. */
1009     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1010     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
1011   } arm_fir_instance_f32;
1012 
1013 
1014   /**
1015    * @brief Processing function for the Q7 FIR filter.
1016    * @param[in] *S points to an instance of the Q7 FIR filter structure.
1017    * @param[in] *pSrc points to the block of input data.
1018    * @param[out] *pDst points to the block of output data.
1019    * @param[in] blockSize number of samples to process.
1020    * @return none.
1021    */
1022   void arm_fir_q7(
1023 		  const arm_fir_instance_q7 * S,
1024 		   q7_t * pSrc,
1025 		  q7_t * pDst,
1026 		  uint32_t blockSize);
1027 
1028 
1029   /**
1030    * @brief  Initialization function for the Q7 FIR filter.
1031    * @param[in,out] *S points to an instance of the Q7 FIR structure.
1032    * @param[in] numTaps  Number of filter coefficients in the filter.
1033    * @param[in] *pCoeffs points to the filter coefficients.
1034    * @param[in] *pState points to the state buffer.
1035    * @param[in] blockSize number of samples that are processed.
1036    * @return none
1037    */
1038   void arm_fir_init_q7(
1039 		       arm_fir_instance_q7 * S,
1040 		       uint16_t numTaps,
1041 		       q7_t * pCoeffs,
1042 		       q7_t * pState,
1043 		       uint32_t blockSize);
1044 
1045 
1046   /**
1047    * @brief Processing function for the Q15 FIR filter.
1048    * @param[in] *S points to an instance of the Q15 FIR structure.
1049    * @param[in] *pSrc points to the block of input data.
1050    * @param[out] *pDst points to the block of output data.
1051    * @param[in] blockSize number of samples to process.
1052    * @return none.
1053    */
1054   void arm_fir_q15(
1055 		   const arm_fir_instance_q15 * S,
1056 		    q15_t * pSrc,
1057 		   q15_t * pDst,
1058 		   uint32_t blockSize);
1059 
1060   /**
1061    * @brief Processing function for the fast Q15 FIR filter for Cortex-M3 and Cortex-M4.
1062    * @param[in] *S points to an instance of the Q15 FIR filter structure.
1063    * @param[in] *pSrc points to the block of input data.
1064    * @param[out] *pDst points to the block of output data.
1065    * @param[in] blockSize number of samples to process.
1066    * @return none.
1067    */
1068   void arm_fir_fast_q15(
1069 			const arm_fir_instance_q15 * S,
1070 			 q15_t * pSrc,
1071 			q15_t * pDst,
1072 			uint32_t blockSize);
1073 
1074   /**
1075    * @brief  Initialization function for the Q15 FIR filter.
1076    * @param[in,out] *S points to an instance of the Q15 FIR filter structure.
1077    * @param[in] numTaps  Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
1078    * @param[in] *pCoeffs points to the filter coefficients.
1079    * @param[in] *pState points to the state buffer.
1080    * @param[in] blockSize number of samples that are processed at a time.
1081    * @return The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_ARGUMENT_ERROR if
1082    * <code>numTaps</code> is not a supported value.
1083    */
1084 
1085        arm_status arm_fir_init_q15(
1086 			      arm_fir_instance_q15 * S,
1087 			      uint16_t numTaps,
1088 			      q15_t * pCoeffs,
1089 			      q15_t * pState,
1090 			      uint32_t blockSize);
1091 
1092   /**
1093    * @brief Processing function for the Q31 FIR filter.
1094    * @param[in] *S points to an instance of the Q31 FIR filter structure.
1095    * @param[in] *pSrc points to the block of input data.
1096    * @param[out] *pDst points to the block of output data.
1097    * @param[in] blockSize number of samples to process.
1098    * @return none.
1099    */
1100   void arm_fir_q31(
1101 		   const arm_fir_instance_q31 * S,
1102 		    q31_t * pSrc,
1103 		   q31_t * pDst,
1104 		   uint32_t blockSize);
1105 
1106   /**
1107    * @brief Processing function for the fast Q31 FIR filter for Cortex-M3 and Cortex-M4.
1108    * @param[in] *S points to an instance of the Q31 FIR structure.
1109    * @param[in] *pSrc points to the block of input data.
1110    * @param[out] *pDst points to the block of output data.
1111    * @param[in] blockSize number of samples to process.
1112    * @return none.
1113    */
1114   void arm_fir_fast_q31(
1115 			const arm_fir_instance_q31 * S,
1116 			 q31_t * pSrc,
1117 			q31_t * pDst,
1118 			uint32_t blockSize);
1119 
1120   /**
1121    * @brief  Initialization function for the Q31 FIR filter.
1122    * @param[in,out] *S points to an instance of the Q31 FIR structure.
1123    * @param[in] 	numTaps  Number of filter coefficients in the filter.
1124    * @param[in] 	*pCoeffs points to the filter coefficients.
1125    * @param[in] 	*pState points to the state buffer.
1126    * @param[in] 	blockSize number of samples that are processed at a time.
1127    * @return 		none.
1128    */
1129   void arm_fir_init_q31(
1130 			arm_fir_instance_q31 * S,
1131 			uint16_t numTaps,
1132 			q31_t * pCoeffs,
1133 			q31_t * pState,
1134 			uint32_t blockSize);
1135 
1136   /**
1137    * @brief Processing function for the floating-point FIR filter.
1138    * @param[in] *S points to an instance of the floating-point FIR structure.
1139    * @param[in] *pSrc points to the block of input data.
1140    * @param[out] *pDst points to the block of output data.
1141    * @param[in] blockSize number of samples to process.
1142    * @return none.
1143    */
1144   void arm_fir_f32(
1145 		   const arm_fir_instance_f32 * S,
1146 		    float32_t * pSrc,
1147 		   float32_t * pDst,
1148 		   uint32_t blockSize);
1149 
1150   /**
1151    * @brief  Initialization function for the floating-point FIR filter.
1152    * @param[in,out] *S points to an instance of the floating-point FIR filter structure.
1153    * @param[in] 	numTaps  Number of filter coefficients in the filter.
1154    * @param[in] 	*pCoeffs points to the filter coefficients.
1155    * @param[in] 	*pState points to the state buffer.
1156    * @param[in] 	blockSize number of samples that are processed at a time.
1157    * @return    	none.
1158    */
1159   void arm_fir_init_f32(
1160 			arm_fir_instance_f32 * S,
1161 			uint16_t numTaps,
1162 			float32_t * pCoeffs,
1163 			float32_t * pState,
1164 			uint32_t blockSize);
1165 
1166 
1167   /**
1168    * @brief Instance structure for the Q15 Biquad cascade filter.
1169    */
1170   typedef struct
1171   {
1172     int8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1173     q15_t *pState;            /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1174     q15_t *pCoeffs;           /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1175     int8_t postShift;         /**< Additional shift, in bits, applied to each output sample. */
1176 
1177   } arm_biquad_casd_df1_inst_q15;
1178 
1179 
1180   /**
1181    * @brief Instance structure for the Q31 Biquad cascade filter.
1182    */
1183   typedef struct
1184   {
1185     uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1186     q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1187     q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1188     uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
1189 
1190   } arm_biquad_casd_df1_inst_q31;
1191 
1192   /**
1193    * @brief Instance structure for the floating-point Biquad cascade filter.
1194    */
1195   typedef struct
1196   {
1197     uint32_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1198     float32_t *pState;          /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1199     float32_t *pCoeffs;         /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1200 
1201 
1202   } arm_biquad_casd_df1_inst_f32;
1203 
1204 
1205 
1206   /**
1207    * @brief Processing function for the Q15 Biquad cascade filter.
1208    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
1209    * @param[in]  *pSrc points to the block of input data.
1210    * @param[out] *pDst points to the block of output data.
1211    * @param[in]  blockSize number of samples to process.
1212    * @return     none.
1213    */
1214 
1215   void arm_biquad_cascade_df1_q15(
1216 				  const arm_biquad_casd_df1_inst_q15 * S,
1217 				   q15_t * pSrc,
1218 				  q15_t * pDst,
1219 				  uint32_t blockSize);
1220 
1221   /**
1222    * @brief  Initialization function for the Q15 Biquad cascade filter.
1223    * @param[in,out] *S           points to an instance of the Q15 Biquad cascade structure.
1224    * @param[in]     numStages    number of 2nd order stages in the filter.
1225    * @param[in]     *pCoeffs     points to the filter coefficients.
1226    * @param[in]     *pState      points to the state buffer.
1227    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
1228    * @return        none
1229    */
1230 
1231   void arm_biquad_cascade_df1_init_q15(
1232 				       arm_biquad_casd_df1_inst_q15 * S,
1233 				       uint8_t numStages,
1234 				       q15_t * pCoeffs,
1235 				       q15_t * pState,
1236 				       int8_t postShift);
1237 
1238 
1239   /**
1240    * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1241    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
1242    * @param[in]  *pSrc points to the block of input data.
1243    * @param[out] *pDst points to the block of output data.
1244    * @param[in]  blockSize number of samples to process.
1245    * @return     none.
1246    */
1247 
1248   void arm_biquad_cascade_df1_fast_q15(
1249 				       const arm_biquad_casd_df1_inst_q15 * S,
1250 				        q15_t * pSrc,
1251 				       q15_t * pDst,
1252 				       uint32_t blockSize);
1253 
1254 
1255   /**
1256    * @brief Processing function for the Q31 Biquad cascade filter
1257    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
1258    * @param[in]  *pSrc      points to the block of input data.
1259    * @param[out] *pDst      points to the block of output data.
1260    * @param[in]  blockSize  number of samples to process.
1261    * @return     none.
1262    */
1263 
1264   void arm_biquad_cascade_df1_q31(
1265 				  const arm_biquad_casd_df1_inst_q31 * S,
1266 				   q31_t * pSrc,
1267 				  q31_t * pDst,
1268 				  uint32_t blockSize);
1269 
1270   /**
1271    * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1272    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
1273    * @param[in]  *pSrc      points to the block of input data.
1274    * @param[out] *pDst      points to the block of output data.
1275    * @param[in]  blockSize  number of samples to process.
1276    * @return     none.
1277    */
1278 
1279   void arm_biquad_cascade_df1_fast_q31(
1280 				       const arm_biquad_casd_df1_inst_q31 * S,
1281 				        q31_t * pSrc,
1282 				       q31_t * pDst,
1283 				       uint32_t blockSize);
1284 
1285   /**
1286    * @brief  Initialization function for the Q31 Biquad cascade filter.
1287    * @param[in,out] *S           points to an instance of the Q31 Biquad cascade structure.
1288    * @param[in]     numStages      number of 2nd order stages in the filter.
1289    * @param[in]     *pCoeffs     points to the filter coefficients.
1290    * @param[in]     *pState      points to the state buffer.
1291    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
1292    * @return        none
1293    */
1294 
1295   void arm_biquad_cascade_df1_init_q31(
1296 				       arm_biquad_casd_df1_inst_q31 * S,
1297 				       uint8_t numStages,
1298 				       q31_t * pCoeffs,
1299 				       q31_t * pState,
1300 				       int8_t postShift);
1301 
1302   /**
1303    * @brief Processing function for the floating-point Biquad cascade filter.
1304    * @param[in]  *S         points to an instance of the floating-point Biquad cascade structure.
1305    * @param[in]  *pSrc      points to the block of input data.
1306    * @param[out] *pDst      points to the block of output data.
1307    * @param[in]  blockSize  number of samples to process.
1308    * @return     none.
1309    */
1310 
1311   void arm_biquad_cascade_df1_f32(
1312 				  const arm_biquad_casd_df1_inst_f32 * S,
1313 				   float32_t * pSrc,
1314 				  float32_t * pDst,
1315 				  uint32_t blockSize);
1316 
1317   /**
1318    * @brief  Initialization function for the floating-point Biquad cascade filter.
1319    * @param[in,out] *S           points to an instance of the floating-point Biquad cascade structure.
1320    * @param[in]     numStages    number of 2nd order stages in the filter.
1321    * @param[in]     *pCoeffs     points to the filter coefficients.
1322    * @param[in]     *pState      points to the state buffer.
1323    * @return        none
1324    */
1325 
1326   void arm_biquad_cascade_df1_init_f32(
1327 				       arm_biquad_casd_df1_inst_f32 * S,
1328 				       uint8_t numStages,
1329 				       float32_t * pCoeffs,
1330 				       float32_t * pState);
1331 
1332 
1333   /**
1334    * @brief Instance structure for the floating-point matrix structure.
1335    */
1336 
1337   typedef struct
1338   {
1339     uint16_t numRows;     /**< number of rows of the matrix.     */
1340     uint16_t numCols;     /**< number of columns of the matrix.  */
1341     float32_t *pData;     /**< points to the data of the matrix. */
1342   } arm_matrix_instance_f32;
1343 
1344   /**
1345    * @brief Instance structure for the Q15 matrix structure.
1346    */
1347 
1348   typedef struct
1349   {
1350     uint16_t numRows;     /**< number of rows of the matrix.     */
1351     uint16_t numCols;     /**< number of columns of the matrix.  */
1352     q15_t *pData;         /**< points to the data of the matrix. */
1353 
1354   } arm_matrix_instance_q15;
1355 
1356   /**
1357    * @brief Instance structure for the Q31 matrix structure.
1358    */
1359 
1360   typedef struct
1361   {
1362     uint16_t numRows;     /**< number of rows of the matrix.     */
1363     uint16_t numCols;     /**< number of columns of the matrix.  */
1364     q31_t *pData;         /**< points to the data of the matrix. */
1365 
1366   } arm_matrix_instance_q31;
1367 
1368 
1369 
1370   /**
1371    * @brief Floating-point matrix addition.
1372    * @param[in]       *pSrcA points to the first input matrix structure
1373    * @param[in]       *pSrcB points to the second input matrix structure
1374    * @param[out]      *pDst points to output matrix structure
1375    * @return     The function returns either
1376    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1377    */
1378 
1379   arm_status arm_mat_add_f32(
1380 			     const arm_matrix_instance_f32 * pSrcA,
1381 			     const arm_matrix_instance_f32 * pSrcB,
1382 			     arm_matrix_instance_f32 * pDst);
1383 
1384   /**
1385    * @brief Q15 matrix addition.
1386    * @param[in]       *pSrcA points to the first input matrix structure
1387    * @param[in]       *pSrcB points to the second input matrix structure
1388    * @param[out]      *pDst points to output matrix structure
1389    * @return     The function returns either
1390    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1391    */
1392 
1393   arm_status arm_mat_add_q15(
1394 			     const arm_matrix_instance_q15 * pSrcA,
1395 			     const arm_matrix_instance_q15 * pSrcB,
1396 			     arm_matrix_instance_q15 * pDst);
1397 
1398   /**
1399    * @brief Q31 matrix addition.
1400    * @param[in]       *pSrcA points to the first input matrix structure
1401    * @param[in]       *pSrcB points to the second input matrix structure
1402    * @param[out]      *pDst points to output matrix structure
1403    * @return     The function returns either
1404    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1405    */
1406 
1407   arm_status arm_mat_add_q31(
1408 			     const arm_matrix_instance_q31 * pSrcA,
1409 			     const arm_matrix_instance_q31 * pSrcB,
1410 			     arm_matrix_instance_q31 * pDst);
1411 
1412 
1413   /**
1414    * @brief Floating-point matrix transpose.
1415    * @param[in]  *pSrc points to the input matrix
1416    * @param[out] *pDst points to the output matrix
1417    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1418    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1419    */
1420 
1421   arm_status arm_mat_trans_f32(
1422 			       const arm_matrix_instance_f32 * pSrc,
1423 			       arm_matrix_instance_f32 * pDst);
1424 
1425 
1426   /**
1427    * @brief Q15 matrix transpose.
1428    * @param[in]  *pSrc points to the input matrix
1429    * @param[out] *pDst points to the output matrix
1430    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1431    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1432    */
1433 
1434   arm_status arm_mat_trans_q15(
1435 			       const arm_matrix_instance_q15 * pSrc,
1436 			       arm_matrix_instance_q15 * pDst);
1437 
1438   /**
1439    * @brief Q31 matrix transpose.
1440    * @param[in]  *pSrc points to the input matrix
1441    * @param[out] *pDst points to the output matrix
1442    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1443    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1444    */
1445 
1446   arm_status arm_mat_trans_q31(
1447 			       const arm_matrix_instance_q31 * pSrc,
1448 			       arm_matrix_instance_q31 * pDst);
1449 
1450 
1451   /**
1452    * @brief Floating-point matrix multiplication
1453    * @param[in]       *pSrcA points to the first input matrix structure
1454    * @param[in]       *pSrcB points to the second input matrix structure
1455    * @param[out]      *pDst points to output matrix structure
1456    * @return     The function returns either
1457    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1458    */
1459 
1460   arm_status arm_mat_mult_f32(
1461 			      const arm_matrix_instance_f32 * pSrcA,
1462 			      const arm_matrix_instance_f32 * pSrcB,
1463 			      arm_matrix_instance_f32 * pDst);
1464 
1465   /**
1466    * @brief Q15 matrix multiplication
1467    * @param[in]       *pSrcA points to the first input matrix structure
1468    * @param[in]       *pSrcB points to the second input matrix structure
1469    * @param[out]      *pDst points to output matrix structure
1470    * @return     The function returns either
1471    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1472    */
1473 
1474   arm_status arm_mat_mult_q15(
1475 			      const arm_matrix_instance_q15 * pSrcA,
1476 			      const arm_matrix_instance_q15 * pSrcB,
1477 			      arm_matrix_instance_q15 * pDst,
1478 			      q15_t * pState);
1479 
1480   /**
1481    * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1482    * @param[in]       *pSrcA  points to the first input matrix structure
1483    * @param[in]       *pSrcB  points to the second input matrix structure
1484    * @param[out]      *pDst   points to output matrix structure
1485    * @param[in]		  *pState points to the array for storing intermediate results
1486    * @return     The function returns either
1487    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1488    */
1489 
1490   arm_status arm_mat_mult_fast_q15(
1491 				   const arm_matrix_instance_q15 * pSrcA,
1492 				   const arm_matrix_instance_q15 * pSrcB,
1493 				   arm_matrix_instance_q15 * pDst,
1494 				   q15_t * pState);
1495 
1496   /**
1497    * @brief Q31 matrix multiplication
1498    * @param[in]       *pSrcA points to the first input matrix structure
1499    * @param[in]       *pSrcB points to the second input matrix structure
1500    * @param[out]      *pDst points to output matrix structure
1501    * @return     The function returns either
1502    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1503    */
1504 
1505   arm_status arm_mat_mult_q31(
1506 			      const arm_matrix_instance_q31 * pSrcA,
1507 			      const arm_matrix_instance_q31 * pSrcB,
1508 			      arm_matrix_instance_q31 * pDst);
1509 
1510   /**
1511    * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1512    * @param[in]       *pSrcA points to the first input matrix structure
1513    * @param[in]       *pSrcB points to the second input matrix structure
1514    * @param[out]      *pDst points to output matrix structure
1515    * @return     The function returns either
1516    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1517    */
1518 
1519   arm_status arm_mat_mult_fast_q31(
1520 				   const arm_matrix_instance_q31 * pSrcA,
1521 				   const arm_matrix_instance_q31 * pSrcB,
1522 				   arm_matrix_instance_q31 * pDst);
1523 
1524 
1525   /**
1526    * @brief Floating-point matrix subtraction
1527    * @param[in]       *pSrcA points to the first input matrix structure
1528    * @param[in]       *pSrcB points to the second input matrix structure
1529    * @param[out]      *pDst points to output matrix structure
1530    * @return     The function returns either
1531    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1532    */
1533 
1534   arm_status arm_mat_sub_f32(
1535 			     const arm_matrix_instance_f32 * pSrcA,
1536 			     const arm_matrix_instance_f32 * pSrcB,
1537 			     arm_matrix_instance_f32 * pDst);
1538 
1539   /**
1540    * @brief Q15 matrix subtraction
1541    * @param[in]       *pSrcA points to the first input matrix structure
1542    * @param[in]       *pSrcB points to the second input matrix structure
1543    * @param[out]      *pDst points to output matrix structure
1544    * @return     The function returns either
1545    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1546    */
1547 
1548   arm_status arm_mat_sub_q15(
1549 			     const arm_matrix_instance_q15 * pSrcA,
1550 			     const arm_matrix_instance_q15 * pSrcB,
1551 			     arm_matrix_instance_q15 * pDst);
1552 
1553   /**
1554    * @brief Q31 matrix subtraction
1555    * @param[in]       *pSrcA points to the first input matrix structure
1556    * @param[in]       *pSrcB points to the second input matrix structure
1557    * @param[out]      *pDst points to output matrix structure
1558    * @return     The function returns either
1559    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1560    */
1561 
1562   arm_status arm_mat_sub_q31(
1563 			     const arm_matrix_instance_q31 * pSrcA,
1564 			     const arm_matrix_instance_q31 * pSrcB,
1565 			     arm_matrix_instance_q31 * pDst);
1566 
1567   /**
1568    * @brief Floating-point matrix scaling.
1569    * @param[in]  *pSrc points to the input matrix
1570    * @param[in]  scale scale factor
1571    * @param[out] *pDst points to the output matrix
1572    * @return     The function returns either
1573    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1574    */
1575 
1576   arm_status arm_mat_scale_f32(
1577 			       const arm_matrix_instance_f32 * pSrc,
1578 			       float32_t scale,
1579 			       arm_matrix_instance_f32 * pDst);
1580 
1581   /**
1582    * @brief Q15 matrix scaling.
1583    * @param[in]       *pSrc points to input matrix
1584    * @param[in]       scaleFract fractional portion of the scale factor
1585    * @param[in]       shift number of bits to shift the result by
1586    * @param[out]      *pDst points to output matrix
1587    * @return     The function returns either
1588    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1589    */
1590 
1591   arm_status arm_mat_scale_q15(
1592 			       const arm_matrix_instance_q15 * pSrc,
1593 			       q15_t scaleFract,
1594 			       int32_t shift,
1595 			       arm_matrix_instance_q15 * pDst);
1596 
1597   /**
1598    * @brief Q31 matrix scaling.
1599    * @param[in]       *pSrc points to input matrix
1600    * @param[in]       scaleFract fractional portion of the scale factor
1601    * @param[in]       shift number of bits to shift the result by
1602    * @param[out]      *pDst points to output matrix structure
1603    * @return     The function returns either
1604    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1605    */
1606 
1607   arm_status arm_mat_scale_q31(
1608 			       const arm_matrix_instance_q31 * pSrc,
1609 			       q31_t scaleFract,
1610 			       int32_t shift,
1611 			       arm_matrix_instance_q31 * pDst);
1612 
1613 
1614   /**
1615    * @brief  Q31 matrix initialization.
1616    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1617    * @param[in]     nRows          number of rows in the matrix.
1618    * @param[in]     nColumns       number of columns in the matrix.
1619    * @param[in]     *pData	       points to the matrix data array.
1620    * @return        none
1621    */
1622 
1623   void arm_mat_init_q31(
1624 			arm_matrix_instance_q31 * S,
1625 			uint16_t nRows,
1626 			uint16_t nColumns,
1627 			q31_t   *pData);
1628 
1629   /**
1630    * @brief  Q15 matrix initialization.
1631    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1632    * @param[in]     nRows          number of rows in the matrix.
1633    * @param[in]     nColumns       number of columns in the matrix.
1634    * @param[in]     *pData	       points to the matrix data array.
1635    * @return        none
1636    */
1637 
1638   void arm_mat_init_q15(
1639 			arm_matrix_instance_q15 * S,
1640 			uint16_t nRows,
1641 			uint16_t nColumns,
1642 			q15_t    *pData);
1643 
1644   /**
1645    * @brief  Floating-point matrix initialization.
1646    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1647    * @param[in]     nRows          number of rows in the matrix.
1648    * @param[in]     nColumns       number of columns in the matrix.
1649    * @param[in]     *pData	       points to the matrix data array.
1650    * @return        none
1651    */
1652 
1653   void arm_mat_init_f32(
1654 			arm_matrix_instance_f32 * S,
1655 			uint16_t nRows,
1656 			uint16_t nColumns,
1657 			float32_t   *pData);
1658 
1659 
1660 
1661   /**
1662    * @brief Instance structure for the Q15 PID Control.
1663    */
1664   typedef struct
1665   {
1666     q15_t A0; 	 /**< The derived gain, A0 = Kp + Ki + Kd . */
1667 	#ifdef ARM_MATH_CM0
1668 	q15_t A1;
1669 	q15_t A2;
1670 	#else
1671     q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
1672 	#endif
1673     q15_t state[3];       /**< The state array of length 3. */
1674     q15_t Kp;           /**< The proportional gain. */
1675     q15_t Ki;           /**< The integral gain. */
1676     q15_t Kd;           /**< The derivative gain. */
1677   } arm_pid_instance_q15;
1678 
1679   /**
1680    * @brief Instance structure for the Q31 PID Control.
1681    */
1682   typedef struct
1683   {
1684     q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
1685     q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
1686     q31_t A2;            /**< The derived gain, A2 = Kd . */
1687     q31_t state[3];      /**< The state array of length 3. */
1688     q31_t Kp;            /**< The proportional gain. */
1689     q31_t Ki;            /**< The integral gain. */
1690     q31_t Kd;            /**< The derivative gain. */
1691 
1692   } arm_pid_instance_q31;
1693 
1694   /**
1695    * @brief Instance structure for the floating-point PID Control.
1696    */
1697   typedef struct
1698   {
1699     float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
1700     float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
1701     float32_t A2;          /**< The derived gain, A2 = Kd . */
1702     float32_t state[3];    /**< The state array of length 3. */
1703     float32_t Kp;               /**< The proportional gain. */
1704     float32_t Ki;               /**< The integral gain. */
1705     float32_t Kd;               /**< The derivative gain. */
1706   } arm_pid_instance_f32;
1707 
1708 
1709 
1710   /**
1711    * @brief  Initialization function for the floating-point PID Control.
1712    * @param[in,out] *S      points to an instance of the PID structure.
1713    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1714    * @return none.
1715    */
1716   void arm_pid_init_f32(
1717 			arm_pid_instance_f32 * S,
1718 			int32_t resetStateFlag);
1719 
1720   /**
1721    * @brief  Reset function for the floating-point PID Control.
1722    * @param[in,out] *S is an instance of the floating-point PID Control structure
1723    * @return none
1724    */
1725   void arm_pid_reset_f32(
1726 			 arm_pid_instance_f32 * S);
1727 
1728 
1729   /**
1730    * @brief  Initialization function for the Q31 PID Control.
1731    * @param[in,out] *S points to an instance of the Q15 PID structure.
1732    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1733    * @return none.
1734    */
1735   void arm_pid_init_q31(
1736 			arm_pid_instance_q31 * S,
1737 			int32_t resetStateFlag);
1738 
1739 
1740   /**
1741    * @brief  Reset function for the Q31 PID Control.
1742    * @param[in,out] *S points to an instance of the Q31 PID Control structure
1743    * @return none
1744    */
1745 
1746   void arm_pid_reset_q31(
1747 			 arm_pid_instance_q31 * S);
1748 
1749   /**
1750    * @brief  Initialization function for the Q15 PID Control.
1751    * @param[in,out] *S points to an instance of the Q15 PID structure.
1752    * @param[in] resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1753    * @return none.
1754    */
1755   void arm_pid_init_q15(
1756 			arm_pid_instance_q15 * S,
1757 			int32_t resetStateFlag);
1758 
1759   /**
1760    * @brief  Reset function for the Q15 PID Control.
1761    * @param[in,out] *S points to an instance of the q15 PID Control structure
1762    * @return none
1763    */
1764   void arm_pid_reset_q15(
1765 			 arm_pid_instance_q15 * S);
1766 
1767 
1768   /**
1769    * @brief Instance structure for the floating-point Linear Interpolate function.
1770    */
1771   typedef struct
1772   {
1773     uint32_t nValues;
1774     float32_t x1;
1775     float32_t xSpacing;
1776     float32_t *pYData;          /**< pointer to the table of Y values */
1777   } arm_linear_interp_instance_f32;
1778 
1779   /**
1780    * @brief Instance structure for the floating-point bilinear interpolation function.
1781    */
1782 
1783   typedef struct
1784   {
1785     uint16_t numRows;	/**< number of rows in the data table. */
1786     uint16_t numCols;	/**< number of columns in the data table. */
1787     float32_t *pData;	/**< points to the data table. */
1788   } arm_bilinear_interp_instance_f32;
1789 
1790    /**
1791    * @brief Instance structure for the Q31 bilinear interpolation function.
1792    */
1793 
1794   typedef struct
1795   {
1796     uint16_t numRows;	/**< number of rows in the data table. */
1797     uint16_t numCols;	/**< number of columns in the data table. */
1798     q31_t *pData;	/**< points to the data table. */
1799   } arm_bilinear_interp_instance_q31;
1800 
1801    /**
1802    * @brief Instance structure for the Q15 bilinear interpolation function.
1803    */
1804 
1805   typedef struct
1806   {
1807     uint16_t numRows;	/**< number of rows in the data table. */
1808     uint16_t numCols;	/**< number of columns in the data table. */
1809     q15_t *pData;	/**< points to the data table. */
1810   } arm_bilinear_interp_instance_q15;
1811 
1812    /**
1813    * @brief Instance structure for the Q15 bilinear interpolation function.
1814    */
1815 
1816   typedef struct
1817   {
1818     uint16_t numRows; 	/**< number of rows in the data table. */
1819     uint16_t numCols;	/**< number of columns in the data table. */
1820     q7_t *pData;		/**< points to the data table. */
1821   } arm_bilinear_interp_instance_q7;
1822 
1823 
1824   /**
1825    * @brief Q7 vector multiplication.
1826    * @param[in]       *pSrcA points to the first input vector
1827    * @param[in]       *pSrcB points to the second input vector
1828    * @param[out]      *pDst  points to the output vector
1829    * @param[in]       blockSize number of samples in each vector
1830    * @return none.
1831    */
1832 
1833   void arm_mult_q7(
1834 		    q7_t * pSrcA,
1835 		    q7_t * pSrcB,
1836 		   q7_t * pDst,
1837 		   uint32_t blockSize);
1838 
1839   /**
1840    * @brief Q15 vector multiplication.
1841    * @param[in]       *pSrcA points to the first input vector
1842    * @param[in]       *pSrcB points to the second input vector
1843    * @param[out]      *pDst  points to the output vector
1844    * @param[in]       blockSize number of samples in each vector
1845    * @return none.
1846    */
1847 
1848   void arm_mult_q15(
1849 		     q15_t * pSrcA,
1850 		     q15_t * pSrcB,
1851 		    q15_t * pDst,
1852 		    uint32_t blockSize);
1853 
1854   /**
1855    * @brief Q31 vector multiplication.
1856    * @param[in]       *pSrcA points to the first input vector
1857    * @param[in]       *pSrcB points to the second input vector
1858    * @param[out]      *pDst points to the output vector
1859    * @param[in]       blockSize number of samples in each vector
1860    * @return none.
1861    */
1862 
1863   void arm_mult_q31(
1864 		     q31_t * pSrcA,
1865 		     q31_t * pSrcB,
1866 		    q31_t * pDst,
1867 		    uint32_t blockSize);
1868 
1869   /**
1870    * @brief Floating-point vector multiplication.
1871    * @param[in]       *pSrcA points to the first input vector
1872    * @param[in]       *pSrcB points to the second input vector
1873    * @param[out]      *pDst points to the output vector
1874    * @param[in]       blockSize number of samples in each vector
1875    * @return none.
1876    */
1877 
1878   void arm_mult_f32(
1879 		     float32_t * pSrcA,
1880 		     float32_t * pSrcB,
1881 		    float32_t * pDst,
1882 		    uint32_t blockSize);
1883 
1884 
1885   /**
1886    * @brief Instance structure for the Q15 CFFT/CIFFT function.
1887    */
1888 
1889   typedef struct
1890   {
1891     uint16_t  fftLen;                /**< length of the FFT. */
1892     uint8_t   ifftFlag;              /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1893     uint8_t   bitReverseFlag;        /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1894     q15_t     *pTwiddle;             /**< points to the twiddle factor table. */
1895     uint16_t  *pBitRevTable;         /**< points to the bit reversal table. */
1896     uint16_t  twidCoefModifier;      /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1897     uint16_t  bitRevFactor;          /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1898   } arm_cfft_radix4_instance_q15;
1899 
1900   /**
1901    * @brief Instance structure for the Q31 CFFT/CIFFT function.
1902    */
1903 
1904   typedef struct
1905   {
1906     uint16_t    fftLen;              /**< length of the FFT. */
1907     uint8_t     ifftFlag;            /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1908     uint8_t     bitReverseFlag;      /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1909     q31_t       *pTwiddle;           /**< points to the twiddle factor table. */
1910     uint16_t    *pBitRevTable;       /**< points to the bit reversal table. */
1911     uint16_t    twidCoefModifier;    /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1912     uint16_t    bitRevFactor;        /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1913   } arm_cfft_radix4_instance_q31;
1914 
1915   /**
1916    * @brief Instance structure for the floating-point CFFT/CIFFT function.
1917    */
1918 
1919   typedef struct
1920   {
1921     uint16_t     fftLen;               /**< length of the FFT. */
1922     uint8_t      ifftFlag;             /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1923     uint8_t      bitReverseFlag;       /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1924     float32_t    *pTwiddle;            /**< points to the twiddle factor table. */
1925     uint16_t     *pBitRevTable;        /**< points to the bit reversal table. */
1926     uint16_t     twidCoefModifier;     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1927     uint16_t     bitRevFactor;         /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1928 	float32_t    onebyfftLen;          /**< value of 1/fftLen. */
1929   } arm_cfft_radix4_instance_f32;
1930 
1931   /**
1932    * @brief Processing function for the Q15 CFFT/CIFFT.
1933    * @param[in]      *S    points to an instance of the Q15 CFFT/CIFFT structure.
1934    * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
1935    * @return none.
1936    */
1937 
1938   void arm_cfft_radix4_q15(
1939 			   const arm_cfft_radix4_instance_q15 * S,
1940 			   q15_t * pSrc);
1941 
1942   /**
1943    * @brief Initialization function for the Q15 CFFT/CIFFT.
1944    * @param[in,out] *S             points to an instance of the Q15 CFFT/CIFFT structure.
1945    * @param[in]     fftLen         length of the FFT.
1946    * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
1947    * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
1948    * @return        arm_status     function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
1949    */
1950 
1951   arm_status arm_cfft_radix4_init_q15(
1952 				      arm_cfft_radix4_instance_q15 * S,
1953 				      uint16_t fftLen,
1954 				      uint8_t ifftFlag,
1955 				      uint8_t bitReverseFlag);
1956 
1957   /**
1958    * @brief Processing function for the Q31 CFFT/CIFFT.
1959    * @param[in]      *S    points to an instance of the Q31 CFFT/CIFFT structure.
1960    * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
1961    * @return none.
1962    */
1963 
1964   void arm_cfft_radix4_q31(
1965 			   const arm_cfft_radix4_instance_q31 * S,
1966 			   q31_t * pSrc);
1967 
1968   /**
1969    * @brief  Initialization function for the Q31 CFFT/CIFFT.
1970    * @param[in,out] *S             points to an instance of the Q31 CFFT/CIFFT structure.
1971    * @param[in]     fftLen         length of the FFT.
1972    * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
1973    * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
1974    * @return        arm_status     function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
1975    */
1976 
1977   arm_status arm_cfft_radix4_init_q31(
1978 				      arm_cfft_radix4_instance_q31 * S,
1979 				      uint16_t fftLen,
1980 				      uint8_t ifftFlag,
1981 				      uint8_t bitReverseFlag);
1982 
1983   /**
1984    * @brief Processing function for the floating-point CFFT/CIFFT.
1985    * @param[in]      *S    points to an instance of the floating-point CFFT/CIFFT structure.
1986    * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
1987    * @return none.
1988    */
1989 
1990   void arm_cfft_radix4_f32(
1991 			   const arm_cfft_radix4_instance_f32 * S,
1992 			   float32_t * pSrc);
1993 
1994   /**
1995    * @brief  Initialization function for the floating-point CFFT/CIFFT.
1996    * @param[in,out] *S             points to an instance of the floating-point CFFT/CIFFT structure.
1997    * @param[in]     fftLen         length of the FFT.
1998    * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
1999    * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
2000    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
2001    */
2002 
2003   arm_status arm_cfft_radix4_init_f32(
2004 				      arm_cfft_radix4_instance_f32 * S,
2005 				      uint16_t fftLen,
2006 				      uint8_t ifftFlag,
2007 				      uint8_t bitReverseFlag);
2008 
2009 
2010 
2011   /*----------------------------------------------------------------------
2012    *		Internal functions prototypes FFT function
2013    ----------------------------------------------------------------------*/
2014 
2015   /**
2016    * @brief  Core function for the floating-point CFFT butterfly process.
2017    * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
2018    * @param[in]      fftLen           length of the FFT.
2019    * @param[in]      *pCoef           points to the twiddle coefficient buffer.
2020    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
2021    * @return none.
2022    */
2023 
2024   void arm_radix4_butterfly_f32(
2025 				float32_t * pSrc,
2026 				uint16_t fftLen,
2027 				float32_t * pCoef,
2028 				uint16_t twidCoefModifier);
2029 
2030   /**
2031    * @brief  Core function for the floating-point CIFFT butterfly process.
2032    * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
2033    * @param[in]      fftLen           length of the FFT.
2034    * @param[in]      *pCoef           points to twiddle coefficient buffer.
2035    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
2036    * @param[in]      onebyfftLen      value of 1/fftLen.
2037    * @return none.
2038    */
2039 
2040   void arm_radix4_butterfly_inverse_f32(
2041 					float32_t * pSrc,
2042 					uint16_t fftLen,
2043 					float32_t * pCoef,
2044 					uint16_t twidCoefModifier,
2045 					float32_t onebyfftLen);
2046 
2047   /**
2048    * @brief  In-place bit reversal function.
2049    * @param[in, out] *pSrc        points to the in-place buffer of floating-point data type.
2050    * @param[in]      fftSize      length of the FFT.
2051    * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table.
2052    * @param[in]      *pBitRevTab  points to the bit reversal table.
2053    * @return none.
2054    */
2055 
2056   void arm_bitreversal_f32(
2057 			   float32_t *pSrc,
2058 			   uint16_t fftSize,
2059 			   uint16_t bitRevFactor,
2060 			   uint16_t *pBitRevTab);
2061 
2062   /**
2063    * @brief  Core function for the Q31 CFFT butterfly process.
2064    * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.
2065    * @param[in]      fftLen           length of the FFT.
2066    * @param[in]      *pCoef           points to twiddle coefficient buffer.
2067    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
2068    * @return none.
2069    */
2070 
2071   void arm_radix4_butterfly_q31(
2072 				q31_t *pSrc,
2073 				uint32_t fftLen,
2074 				q31_t *pCoef,
2075 				uint32_t twidCoefModifier);
2076 
2077   /**
2078    * @brief  Core function for the Q31 CIFFT butterfly process.
2079    * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.
2080    * @param[in]      fftLen           length of the FFT.
2081    * @param[in]      *pCoef           points to twiddle coefficient buffer.
2082    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
2083    * @return none.
2084    */
2085 
2086   void arm_radix4_butterfly_inverse_q31(
2087 					q31_t * pSrc,
2088 					uint32_t fftLen,
2089 					q31_t * pCoef,
2090 					uint32_t twidCoefModifier);
2091 
2092   /**
2093    * @brief  In-place bit reversal function.
2094    * @param[in, out] *pSrc        points to the in-place buffer of Q31 data type.
2095    * @param[in]      fftLen       length of the FFT.
2096    * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
2097    * @param[in]      *pBitRevTab  points to bit reversal table.
2098    * @return none.
2099    */
2100 
2101   void arm_bitreversal_q31(
2102 			   q31_t * pSrc,
2103 			   uint32_t fftLen,
2104 			   uint16_t bitRevFactor,
2105 			   uint16_t *pBitRevTab);
2106 
2107   /**
2108    * @brief  Core function for the Q15 CFFT butterfly process.
2109    * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.
2110    * @param[in]      fftLen           length of the FFT.
2111    * @param[in]      *pCoef16         points to twiddle coefficient buffer.
2112    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
2113    * @return none.
2114    */
2115 
2116   void arm_radix4_butterfly_q15(
2117 				q15_t *pSrc16,
2118 				uint32_t fftLen,
2119 				q15_t *pCoef16,
2120 				uint32_t twidCoefModifier);
2121 
2122   /**
2123    * @brief  Core function for the Q15 CIFFT butterfly process.
2124    * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.
2125    * @param[in]      fftLen           length of the FFT.
2126    * @param[in]      *pCoef16         points to twiddle coefficient buffer.
2127    * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
2128    * @return none.
2129    */
2130 
2131   void arm_radix4_butterfly_inverse_q15(
2132 					q15_t *pSrc16,
2133 					uint32_t fftLen,
2134 					q15_t *pCoef16,
2135 					uint32_t twidCoefModifier);
2136 
2137   /**
2138    * @brief  In-place bit reversal function.
2139    * @param[in, out] *pSrc        points to the in-place buffer of Q15 data type.
2140    * @param[in]      fftLen       length of the FFT.
2141    * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
2142    * @param[in]      *pBitRevTab  points to bit reversal table.
2143    * @return none.
2144    */
2145 
2146   void arm_bitreversal_q15(
2147 			   q15_t * pSrc,
2148 			   uint32_t fftLen,
2149 			   uint16_t bitRevFactor,
2150 			   uint16_t *pBitRevTab);
2151 
2152   /**
2153    * @brief Instance structure for the Q15 RFFT/RIFFT function.
2154    */
2155 
2156   typedef struct
2157   {
2158     uint32_t fftLenReal;                      /**< length of the real FFT. */
2159     uint32_t fftLenBy2;                       /**< length of the complex FFT. */
2160     uint8_t  ifftFlagR;                       /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2161 	uint8_t  bitReverseFlagR;                 /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2162     uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2163     q15_t    *pTwiddleAReal;                  /**< points to the real twiddle factor table. */
2164     q15_t    *pTwiddleBReal;                  /**< points to the imag twiddle factor table. */
2165     arm_cfft_radix4_instance_q15 *pCfft;	  /**< points to the complex FFT instance. */
2166   } arm_rfft_instance_q15;
2167 
2168   /**
2169    * @brief Instance structure for the Q31 RFFT/RIFFT function.
2170    */
2171 
2172   typedef struct
2173   {
2174     uint32_t fftLenReal;                        /**< length of the real FFT. */
2175     uint32_t fftLenBy2;                         /**< length of the complex FFT. */
2176     uint8_t  ifftFlagR;                         /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2177 	uint8_t  bitReverseFlagR;                   /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2178     uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2179     q31_t    *pTwiddleAReal;                    /**< points to the real twiddle factor table. */
2180     q31_t    *pTwiddleBReal;                    /**< points to the imag twiddle factor table. */
2181     arm_cfft_radix4_instance_q31 *pCfft;        /**< points to the complex FFT instance. */
2182   } arm_rfft_instance_q31;
2183 
2184   /**
2185    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2186    */
2187 
2188   typedef struct
2189   {
2190     uint32_t  fftLenReal;                       /**< length of the real FFT. */
2191     uint16_t  fftLenBy2;                        /**< length of the complex FFT. */
2192     uint8_t   ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2193     uint8_t   bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2194 	uint32_t  twidCoefRModifier;                /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2195     float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
2196     float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
2197     arm_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
2198   } arm_rfft_instance_f32;
2199 
2200   /**
2201    * @brief Processing function for the Q15 RFFT/RIFFT.
2202    * @param[in]  *S    points to an instance of the Q15 RFFT/RIFFT structure.
2203    * @param[in]  *pSrc points to the input buffer.
2204    * @param[out] *pDst points to the output buffer.
2205    * @return none.
2206    */
2207 
2208   void arm_rfft_q15(
2209 		    const arm_rfft_instance_q15 * S,
2210 		    q15_t * pSrc,
2211 		    q15_t * pDst);
2212 
2213   /**
2214    * @brief  Initialization function for the Q15 RFFT/RIFFT.
2215    * @param[in, out] *S             points to an instance of the Q15 RFFT/RIFFT structure.
2216    * @param[in]      *S_CFFT        points to an instance of the Q15 CFFT/CIFFT structure.
2217    * @param[in]      fftLenReal     length of the FFT.
2218    * @param[in]      ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform.
2219    * @param[in]      bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
2220    * @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value.
2221    */
2222 
2223   arm_status arm_rfft_init_q15(
2224 			       arm_rfft_instance_q15 * S,
2225 			       arm_cfft_radix4_instance_q15 * S_CFFT,
2226 			       uint32_t fftLenReal,
2227 			       uint32_t ifftFlagR,
2228 			       uint32_t bitReverseFlag);
2229 
2230   /**
2231    * @brief Processing function for the Q31 RFFT/RIFFT.
2232    * @param[in]  *S    points to an instance of the Q31 RFFT/RIFFT structure.
2233    * @param[in]  *pSrc points to the input buffer.
2234    * @param[out] *pDst points to the output buffer.
2235    * @return none.
2236    */
2237 
2238   void arm_rfft_q31(
2239 		    const arm_rfft_instance_q31 * S,
2240 		    q31_t * pSrc,
2241 		    q31_t * pDst);
2242 
2243   /**
2244    * @brief  Initialization function for the Q31 RFFT/RIFFT.
2245    * @param[in, out] *S             points to an instance of the Q31 RFFT/RIFFT structure.
2246    * @param[in, out] *S_CFFT        points to an instance of the Q31 CFFT/CIFFT structure.
2247    * @param[in]      fftLenReal     length of the FFT.
2248    * @param[in]      ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform.
2249    * @param[in]      bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
2250    * @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value.
2251    */
2252 
2253   arm_status arm_rfft_init_q31(
2254 			       arm_rfft_instance_q31 * S,
2255 			       arm_cfft_radix4_instance_q31 * S_CFFT,
2256 			       uint32_t fftLenReal,
2257 			       uint32_t ifftFlagR,
2258 			       uint32_t bitReverseFlag);
2259 
2260   /**
2261    * @brief  Initialization function for the floating-point RFFT/RIFFT.
2262    * @param[in,out] *S             points to an instance of the floating-point RFFT/RIFFT structure.
2263    * @param[in,out] *S_CFFT        points to an instance of the floating-point CFFT/CIFFT structure.
2264    * @param[in]     fftLenReal     length of the FFT.
2265    * @param[in]     ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform.
2266    * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
2267    * @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value.
2268    */
2269 
2270   arm_status arm_rfft_init_f32(
2271 			       arm_rfft_instance_f32 * S,
2272 			       arm_cfft_radix4_instance_f32 * S_CFFT,
2273 			       uint32_t fftLenReal,
2274 			       uint32_t ifftFlagR,
2275 			       uint32_t bitReverseFlag);
2276 
2277   /**
2278    * @brief Processing function for the floating-point RFFT/RIFFT.
2279    * @param[in]  *S    points to an instance of the floating-point RFFT/RIFFT structure.
2280    * @param[in]  *pSrc points to the input buffer.
2281    * @param[out] *pDst points to the output buffer.
2282    * @return none.
2283    */
2284 
2285   void arm_rfft_f32(
2286 		    const arm_rfft_instance_f32 * S,
2287 		    float32_t * pSrc,
2288 		    float32_t * pDst);
2289 
2290   /**
2291    * @brief Instance structure for the floating-point DCT4/IDCT4 function.
2292    */
2293 
2294   typedef struct
2295   {
2296     uint16_t N;                         /**< length of the DCT4. */
2297     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2298     float32_t normalize;                /**< normalizing factor. */
2299     float32_t *pTwiddle;                /**< points to the twiddle factor table. */
2300     float32_t *pCosFactor;              /**< points to the cosFactor table. */
2301     arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
2302     arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
2303   } arm_dct4_instance_f32;
2304 
2305   /**
2306    * @brief  Initialization function for the floating-point DCT4/IDCT4.
2307    * @param[in,out] *S         points to an instance of floating-point DCT4/IDCT4 structure.
2308    * @param[in]     *S_RFFT    points to an instance of floating-point RFFT/RIFFT structure.
2309    * @param[in]     *S_CFFT    points to an instance of floating-point CFFT/CIFFT structure.
2310    * @param[in]     N          length of the DCT4.
2311    * @param[in]     Nby2       half of the length of the DCT4.
2312    * @param[in]     normalize  normalizing factor.
2313    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
2314    */
2315 
2316   arm_status arm_dct4_init_f32(
2317 			       arm_dct4_instance_f32 * S,
2318 			       arm_rfft_instance_f32 * S_RFFT,
2319 			       arm_cfft_radix4_instance_f32 * S_CFFT,
2320 			       uint16_t N,
2321 			       uint16_t Nby2,
2322 			       float32_t normalize);
2323 
2324   /**
2325    * @brief Processing function for the floating-point DCT4/IDCT4.
2326    * @param[in]       *S             points to an instance of the floating-point DCT4/IDCT4 structure.
2327    * @param[in]       *pState        points to state buffer.
2328    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2329    * @return none.
2330    */
2331 
2332   void arm_dct4_f32(
2333 		    const arm_dct4_instance_f32 * S,
2334 		    float32_t * pState,
2335 		    float32_t * pInlineBuffer);
2336 
2337   /**
2338    * @brief Instance structure for the Q31 DCT4/IDCT4 function.
2339    */
2340 
2341   typedef struct
2342   {
2343     uint16_t N;                         /**< length of the DCT4. */
2344     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2345     q31_t normalize;                    /**< normalizing factor. */
2346     q31_t *pTwiddle;                    /**< points to the twiddle factor table. */
2347     q31_t *pCosFactor;                  /**< points to the cosFactor table. */
2348     arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
2349     arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
2350   } arm_dct4_instance_q31;
2351 
2352   /**
2353    * @brief  Initialization function for the Q31 DCT4/IDCT4.
2354    * @param[in,out] *S         points to an instance of Q31 DCT4/IDCT4 structure.
2355    * @param[in]     *S_RFFT    points to an instance of Q31 RFFT/RIFFT structure
2356    * @param[in]     *S_CFFT    points to an instance of Q31 CFFT/CIFFT structure
2357    * @param[in]     N          length of the DCT4.
2358    * @param[in]     Nby2       half of the length of the DCT4.
2359    * @param[in]     normalize  normalizing factor.
2360    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2361    */
2362 
2363   arm_status arm_dct4_init_q31(
2364 			       arm_dct4_instance_q31 * S,
2365 			       arm_rfft_instance_q31 * S_RFFT,
2366 			       arm_cfft_radix4_instance_q31 * S_CFFT,
2367 			       uint16_t N,
2368 			       uint16_t Nby2,
2369 			       q31_t normalize);
2370 
2371   /**
2372    * @brief Processing function for the Q31 DCT4/IDCT4.
2373    * @param[in]       *S             points to an instance of the Q31 DCT4 structure.
2374    * @param[in]       *pState        points to state buffer.
2375    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2376    * @return none.
2377    */
2378 
2379   void arm_dct4_q31(
2380 		    const arm_dct4_instance_q31 * S,
2381 		    q31_t * pState,
2382 		    q31_t * pInlineBuffer);
2383 
2384   /**
2385    * @brief Instance structure for the Q15 DCT4/IDCT4 function.
2386    */
2387 
2388   typedef struct
2389   {
2390     uint16_t N;                         /**< length of the DCT4. */
2391     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2392     q15_t normalize;                    /**< normalizing factor. */
2393     q15_t *pTwiddle;                    /**< points to the twiddle factor table. */
2394     q15_t *pCosFactor;                  /**< points to the cosFactor table. */
2395     arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
2396     arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
2397   } arm_dct4_instance_q15;
2398 
2399   /**
2400    * @brief  Initialization function for the Q15 DCT4/IDCT4.
2401    * @param[in,out] *S         points to an instance of Q15 DCT4/IDCT4 structure.
2402    * @param[in]     *S_RFFT    points to an instance of Q15 RFFT/RIFFT structure.
2403    * @param[in]     *S_CFFT    points to an instance of Q15 CFFT/CIFFT structure.
2404    * @param[in]     N          length of the DCT4.
2405    * @param[in]     Nby2       half of the length of the DCT4.
2406    * @param[in]     normalize  normalizing factor.
2407    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2408    */
2409 
2410   arm_status arm_dct4_init_q15(
2411 			       arm_dct4_instance_q15 * S,
2412 			       arm_rfft_instance_q15 * S_RFFT,
2413 			       arm_cfft_radix4_instance_q15 * S_CFFT,
2414 			       uint16_t N,
2415 			       uint16_t Nby2,
2416 			       q15_t normalize);
2417 
2418   /**
2419    * @brief Processing function for the Q15 DCT4/IDCT4.
2420    * @param[in]       *S             points to an instance of the Q15 DCT4 structure.
2421    * @param[in]       *pState        points to state buffer.
2422    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2423    * @return none.
2424    */
2425 
2426   void arm_dct4_q15(
2427 		    const arm_dct4_instance_q15 * S,
2428 		    q15_t * pState,
2429 		    q15_t * pInlineBuffer);
2430 
2431   /**
2432    * @brief Floating-point vector addition.
2433    * @param[in]       *pSrcA points to the first input vector
2434    * @param[in]       *pSrcB points to the second input vector
2435    * @param[out]      *pDst points to the output vector
2436    * @param[in]       blockSize number of samples in each vector
2437    * @return none.
2438    */
2439 
2440   void arm_add_f32(
2441 		   float32_t * pSrcA,
2442 		   float32_t * pSrcB,
2443 		   float32_t * pDst,
2444 		   uint32_t blockSize);
2445 
2446   /**
2447    * @brief Q7 vector addition.
2448    * @param[in]       *pSrcA points to the first input vector
2449    * @param[in]       *pSrcB points to the second input vector
2450    * @param[out]      *pDst points to the output vector
2451    * @param[in]       blockSize number of samples in each vector
2452    * @return none.
2453    */
2454 
2455   void arm_add_q7(
2456 		  q7_t * pSrcA,
2457 		  q7_t * pSrcB,
2458 		  q7_t * pDst,
2459 		  uint32_t blockSize);
2460 
2461   /**
2462    * @brief Q15 vector addition.
2463    * @param[in]       *pSrcA points to the first input vector
2464    * @param[in]       *pSrcB points to the second input vector
2465    * @param[out]      *pDst points to the output vector
2466    * @param[in]       blockSize number of samples in each vector
2467    * @return none.
2468    */
2469 
2470   void arm_add_q15(
2471 		    q15_t * pSrcA,
2472 		    q15_t * pSrcB,
2473 		   q15_t * pDst,
2474 		   uint32_t blockSize);
2475 
2476   /**
2477    * @brief Q31 vector addition.
2478    * @param[in]       *pSrcA points to the first input vector
2479    * @param[in]       *pSrcB points to the second input vector
2480    * @param[out]      *pDst points to the output vector
2481    * @param[in]       blockSize number of samples in each vector
2482    * @return none.
2483    */
2484 
2485   void arm_add_q31(
2486 		    q31_t * pSrcA,
2487 		    q31_t * pSrcB,
2488 		   q31_t * pDst,
2489 		   uint32_t blockSize);
2490 
2491   /**
2492    * @brief Floating-point vector subtraction.
2493    * @param[in]       *pSrcA points to the first input vector
2494    * @param[in]       *pSrcB points to the second input vector
2495    * @param[out]      *pDst points to the output vector
2496    * @param[in]       blockSize number of samples in each vector
2497    * @return none.
2498    */
2499 
2500   void arm_sub_f32(
2501 		    float32_t * pSrcA,
2502 		    float32_t * pSrcB,
2503 		   float32_t * pDst,
2504 		   uint32_t blockSize);
2505 
2506   /**
2507    * @brief Q7 vector subtraction.
2508    * @param[in]       *pSrcA points to the first input vector
2509    * @param[in]       *pSrcB points to the second input vector
2510    * @param[out]      *pDst points to the output vector
2511    * @param[in]       blockSize number of samples in each vector
2512    * @return none.
2513    */
2514 
2515   void arm_sub_q7(
2516 		   q7_t * pSrcA,
2517 		   q7_t * pSrcB,
2518 		  q7_t * pDst,
2519 		  uint32_t blockSize);
2520 
2521   /**
2522    * @brief Q15 vector subtraction.
2523    * @param[in]       *pSrcA points to the first input vector
2524    * @param[in]       *pSrcB points to the second input vector
2525    * @param[out]      *pDst points to the output vector
2526    * @param[in]       blockSize number of samples in each vector
2527    * @return none.
2528    */
2529 
2530   void arm_sub_q15(
2531 		    q15_t * pSrcA,
2532 		    q15_t * pSrcB,
2533 		   q15_t * pDst,
2534 		   uint32_t blockSize);
2535 
2536   /**
2537    * @brief Q31 vector subtraction.
2538    * @param[in]       *pSrcA points to the first input vector
2539    * @param[in]       *pSrcB points to the second input vector
2540    * @param[out]      *pDst points to the output vector
2541    * @param[in]       blockSize number of samples in each vector
2542    * @return none.
2543    */
2544 
2545   void arm_sub_q31(
2546 		    q31_t * pSrcA,
2547 		    q31_t * pSrcB,
2548 		   q31_t * pDst,
2549 		   uint32_t blockSize);
2550 
2551   /**
2552    * @brief Multiplies a floating-point vector by a scalar.
2553    * @param[in]       *pSrc points to the input vector
2554    * @param[in]       scale scale factor to be applied
2555    * @param[out]      *pDst points to the output vector
2556    * @param[in]       blockSize number of samples in the vector
2557    * @return none.
2558    */
2559 
2560   void arm_scale_f32(
2561 		      float32_t * pSrc,
2562 		     float32_t scale,
2563 		     float32_t * pDst,
2564 		     uint32_t blockSize);
2565 
2566   /**
2567    * @brief Multiplies a Q7 vector by a scalar.
2568    * @param[in]       *pSrc points to the input vector
2569    * @param[in]       scaleFract fractional portion of the scale value
2570    * @param[in]       shift number of bits to shift the result by
2571    * @param[out]      *pDst points to the output vector
2572    * @param[in]       blockSize number of samples in the vector
2573    * @return none.
2574    */
2575 
2576   void arm_scale_q7(
2577 		     q7_t * pSrc,
2578 		    q7_t scaleFract,
2579 		    int8_t shift,
2580 		    q7_t * pDst,
2581 		    uint32_t blockSize);
2582 
2583   /**
2584    * @brief Multiplies a Q15 vector by a scalar.
2585    * @param[in]       *pSrc points to the input vector
2586    * @param[in]       scaleFract fractional portion of the scale value
2587    * @param[in]       shift number of bits to shift the result by
2588    * @param[out]      *pDst points to the output vector
2589    * @param[in]       blockSize number of samples in the vector
2590    * @return none.
2591    */
2592 
2593   void arm_scale_q15(
2594 		      q15_t * pSrc,
2595 		     q15_t scaleFract,
2596 		     int8_t shift,
2597 		     q15_t * pDst,
2598 		     uint32_t blockSize);
2599 
2600   /**
2601    * @brief Multiplies a Q31 vector by a scalar.
2602    * @param[in]       *pSrc points to the input vector
2603    * @param[in]       scaleFract fractional portion of the scale value
2604    * @param[in]       shift number of bits to shift the result by
2605    * @param[out]      *pDst points to the output vector
2606    * @param[in]       blockSize number of samples in the vector
2607    * @return none.
2608    */
2609 
2610   void arm_scale_q31(
2611 		      q31_t * pSrc,
2612 		     q31_t scaleFract,
2613 		     int8_t shift,
2614 		     q31_t * pDst,
2615 		     uint32_t blockSize);
2616 
2617   /**
2618    * @brief Q7 vector absolute value.
2619    * @param[in]       *pSrc points to the input buffer
2620    * @param[out]      *pDst points to the output buffer
2621    * @param[in]       blockSize number of samples in each vector
2622    * @return none.
2623    */
2624 
2625   void arm_abs_q7(
2626 		   q7_t * pSrc,
2627 		  q7_t * pDst,
2628 		  uint32_t blockSize);
2629 
2630   /**
2631    * @brief Floating-point vector absolute value.
2632    * @param[in]       *pSrc points to the input buffer
2633    * @param[out]      *pDst points to the output buffer
2634    * @param[in]       blockSize number of samples in each vector
2635    * @return none.
2636    */
2637 
2638   void arm_abs_f32(
2639 		    float32_t * pSrc,
2640 		   float32_t * pDst,
2641 		   uint32_t blockSize);
2642 
2643   /**
2644    * @brief Q15 vector absolute value.
2645    * @param[in]       *pSrc points to the input buffer
2646    * @param[out]      *pDst points to the output buffer
2647    * @param[in]       blockSize number of samples in each vector
2648    * @return none.
2649    */
2650 
2651   void arm_abs_q15(
2652 		    q15_t * pSrc,
2653 		   q15_t * pDst,
2654 		   uint32_t blockSize);
2655 
2656   /**
2657    * @brief Q31 vector absolute value.
2658    * @param[in]       *pSrc points to the input buffer
2659    * @param[out]      *pDst points to the output buffer
2660    * @param[in]       blockSize number of samples in each vector
2661    * @return none.
2662    */
2663 
2664   void arm_abs_q31(
2665 		    q31_t * pSrc,
2666 		   q31_t * pDst,
2667 		   uint32_t blockSize);
2668 
2669   /**
2670    * @brief Dot product of floating-point vectors.
2671    * @param[in]       *pSrcA points to the first input vector
2672    * @param[in]       *pSrcB points to the second input vector
2673    * @param[in]       blockSize number of samples in each vector
2674    * @param[out]      *result output result returned here
2675    * @return none.
2676    */
2677 
2678   void arm_dot_prod_f32(
2679 			 float32_t * pSrcA,
2680 			 float32_t * pSrcB,
2681 			uint32_t blockSize,
2682 			float32_t * result);
2683 
2684   /**
2685    * @brief Dot product of Q7 vectors.
2686    * @param[in]       *pSrcA points to the first input vector
2687    * @param[in]       *pSrcB points to the second input vector
2688    * @param[in]       blockSize number of samples in each vector
2689    * @param[out]      *result output result returned here
2690    * @return none.
2691    */
2692 
2693   void arm_dot_prod_q7(
2694 		        q7_t * pSrcA,
2695 		        q7_t * pSrcB,
2696 		       uint32_t blockSize,
2697 		       q31_t * result);
2698 
2699   /**
2700    * @brief Dot product of Q15 vectors.
2701    * @param[in]       *pSrcA points to the first input vector
2702    * @param[in]       *pSrcB points to the second input vector
2703    * @param[in]       blockSize number of samples in each vector
2704    * @param[out]      *result output result returned here
2705    * @return none.
2706    */
2707 
2708   void arm_dot_prod_q15(
2709 			 q15_t * pSrcA,
2710 			 q15_t * pSrcB,
2711 			uint32_t blockSize,
2712 			q63_t * result);
2713 
2714   /**
2715    * @brief Dot product of Q31 vectors.
2716    * @param[in]       *pSrcA points to the first input vector
2717    * @param[in]       *pSrcB points to the second input vector
2718    * @param[in]       blockSize number of samples in each vector
2719    * @param[out]      *result output result returned here
2720    * @return none.
2721    */
2722 
2723   void arm_dot_prod_q31(
2724 			 q31_t * pSrcA,
2725 			 q31_t * pSrcB,
2726 			uint32_t blockSize,
2727 			q63_t * result);
2728 
2729   /**
2730    * @brief  Shifts the elements of a Q7 vector a specified number of bits.
2731    * @param[in]  *pSrc points to the input vector
2732    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2733    * @param[out]  *pDst points to the output vector
2734    * @param[in]  blockSize number of samples in the vector
2735    * @return none.
2736    */
2737 
2738   void arm_shift_q7(
2739 		     q7_t * pSrc,
2740 		    int8_t shiftBits,
2741 		    q7_t * pDst,
2742 		    uint32_t blockSize);
2743 
2744   /**
2745    * @brief  Shifts the elements of a Q15 vector a specified number of bits.
2746    * @param[in]  *pSrc points to the input vector
2747    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2748    * @param[out]  *pDst points to the output vector
2749    * @param[in]  blockSize number of samples in the vector
2750    * @return none.
2751    */
2752 
2753   void arm_shift_q15(
2754 		      q15_t * pSrc,
2755 		     int8_t shiftBits,
2756 		     q15_t * pDst,
2757 		     uint32_t blockSize);
2758 
2759   /**
2760    * @brief  Shifts the elements of a Q31 vector a specified number of bits.
2761    * @param[in]  *pSrc points to the input vector
2762    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2763    * @param[out]  *pDst points to the output vector
2764    * @param[in]  blockSize number of samples in the vector
2765    * @return none.
2766    */
2767 
2768   void arm_shift_q31(
2769 		      q31_t * pSrc,
2770 		     int8_t shiftBits,
2771 		     q31_t * pDst,
2772 		     uint32_t blockSize);
2773 
2774   /**
2775    * @brief  Adds a constant offset to a floating-point vector.
2776    * @param[in]  *pSrc points to the input vector
2777    * @param[in]  offset is the offset to be added
2778    * @param[out]  *pDst points to the output vector
2779    * @param[in]  blockSize number of samples in the vector
2780    * @return none.
2781    */
2782 
2783   void arm_offset_f32(
2784 		       float32_t * pSrc,
2785 		      float32_t offset,
2786 		      float32_t * pDst,
2787 		      uint32_t blockSize);
2788 
2789   /**
2790    * @brief  Adds a constant offset to a Q7 vector.
2791    * @param[in]  *pSrc points to the input vector
2792    * @param[in]  offset is the offset to be added
2793    * @param[out]  *pDst points to the output vector
2794    * @param[in]  blockSize number of samples in the vector
2795    * @return none.
2796    */
2797 
2798   void arm_offset_q7(
2799 		      q7_t * pSrc,
2800 		     q7_t offset,
2801 		     q7_t * pDst,
2802 		     uint32_t blockSize);
2803 
2804   /**
2805    * @brief  Adds a constant offset to a Q15 vector.
2806    * @param[in]  *pSrc points to the input vector
2807    * @param[in]  offset is the offset to be added
2808    * @param[out]  *pDst points to the output vector
2809    * @param[in]  blockSize number of samples in the vector
2810    * @return none.
2811    */
2812 
2813   void arm_offset_q15(
2814 		       q15_t * pSrc,
2815 		      q15_t offset,
2816 		      q15_t * pDst,
2817 		      uint32_t blockSize);
2818 
2819   /**
2820    * @brief  Adds a constant offset to a Q31 vector.
2821    * @param[in]  *pSrc points to the input vector
2822    * @param[in]  offset is the offset to be added
2823    * @param[out]  *pDst points to the output vector
2824    * @param[in]  blockSize number of samples in the vector
2825    * @return none.
2826    */
2827 
2828   void arm_offset_q31(
2829 		       q31_t * pSrc,
2830 		      q31_t offset,
2831 		      q31_t * pDst,
2832 		      uint32_t blockSize);
2833 
2834   /**
2835    * @brief  Negates the elements of a floating-point vector.
2836    * @param[in]  *pSrc points to the input vector
2837    * @param[out]  *pDst points to the output vector
2838    * @param[in]  blockSize number of samples in the vector
2839    * @return none.
2840    */
2841 
2842   void arm_negate_f32(
2843 		       float32_t * pSrc,
2844 		      float32_t * pDst,
2845 		      uint32_t blockSize);
2846 
2847   /**
2848    * @brief  Negates the elements of a Q7 vector.
2849    * @param[in]  *pSrc points to the input vector
2850    * @param[out]  *pDst points to the output vector
2851    * @param[in]  blockSize number of samples in the vector
2852    * @return none.
2853    */
2854 
2855   void arm_negate_q7(
2856 		      q7_t * pSrc,
2857 		     q7_t * pDst,
2858 		     uint32_t blockSize);
2859 
2860   /**
2861    * @brief  Negates the elements of a Q15 vector.
2862    * @param[in]  *pSrc points to the input vector
2863    * @param[out]  *pDst points to the output vector
2864    * @param[in]  blockSize number of samples in the vector
2865    * @return none.
2866    */
2867 
2868   void arm_negate_q15(
2869 		       q15_t * pSrc,
2870 		      q15_t * pDst,
2871 		      uint32_t blockSize);
2872 
2873   /**
2874    * @brief  Negates the elements of a Q31 vector.
2875    * @param[in]  *pSrc points to the input vector
2876    * @param[out]  *pDst points to the output vector
2877    * @param[in]  blockSize number of samples in the vector
2878    * @return none.
2879    */
2880 
2881   void arm_negate_q31(
2882 		       q31_t * pSrc,
2883 		      q31_t * pDst,
2884 		      uint32_t blockSize);
2885   /**
2886    * @brief  Copies the elements of a floating-point vector.
2887    * @param[in]  *pSrc input pointer
2888    * @param[out]  *pDst output pointer
2889    * @param[in]  blockSize number of samples to process
2890    * @return none.
2891    */
2892   void arm_copy_f32(
2893 		     float32_t * pSrc,
2894 		    float32_t * pDst,
2895 		    uint32_t blockSize);
2896 
2897   /**
2898    * @brief  Copies the elements of a Q7 vector.
2899    * @param[in]  *pSrc input pointer
2900    * @param[out]  *pDst output pointer
2901    * @param[in]  blockSize number of samples to process
2902    * @return none.
2903    */
2904   void arm_copy_q7(
2905 		    q7_t * pSrc,
2906 		   q7_t * pDst,
2907 		   uint32_t blockSize);
2908 
2909   /**
2910    * @brief  Copies the elements of a Q15 vector.
2911    * @param[in]  *pSrc input pointer
2912    * @param[out]  *pDst output pointer
2913    * @param[in]  blockSize number of samples to process
2914    * @return none.
2915    */
2916   void arm_copy_q15(
2917 		     q15_t * pSrc,
2918 		    q15_t * pDst,
2919 		    uint32_t blockSize);
2920 
2921   /**
2922    * @brief  Copies the elements of a Q31 vector.
2923    * @param[in]  *pSrc input pointer
2924    * @param[out]  *pDst output pointer
2925    * @param[in]  blockSize number of samples to process
2926    * @return none.
2927    */
2928   void arm_copy_q31(
2929 		     q31_t * pSrc,
2930 		    q31_t * pDst,
2931 		    uint32_t blockSize);
2932   /**
2933    * @brief  Fills a constant value into a floating-point vector.
2934    * @param[in]  value input value to be filled
2935    * @param[out]  *pDst output pointer
2936    * @param[in]  blockSize number of samples to process
2937    * @return none.
2938    */
2939   void arm_fill_f32(
2940 		     float32_t value,
2941 		    float32_t * pDst,
2942 		    uint32_t blockSize);
2943 
2944   /**
2945    * @brief  Fills a constant value into a Q7 vector.
2946    * @param[in]  value input value to be filled
2947    * @param[out]  *pDst output pointer
2948    * @param[in]  blockSize number of samples to process
2949    * @return none.
2950    */
2951   void arm_fill_q7(
2952 		    q7_t value,
2953 		   q7_t * pDst,
2954 		   uint32_t blockSize);
2955 
2956   /**
2957    * @brief  Fills a constant value into a Q15 vector.
2958    * @param[in]  value input value to be filled
2959    * @param[out]  *pDst output pointer
2960    * @param[in]  blockSize number of samples to process
2961    * @return none.
2962    */
2963   void arm_fill_q15(
2964 		     q15_t value,
2965 		    q15_t * pDst,
2966 		    uint32_t blockSize);
2967 
2968   /**
2969    * @brief  Fills a constant value into a Q31 vector.
2970    * @param[in]  value input value to be filled
2971    * @param[out]  *pDst output pointer
2972    * @param[in]  blockSize number of samples to process
2973    * @return none.
2974    */
2975   void arm_fill_q31(
2976 		     q31_t value,
2977 		    q31_t * pDst,
2978 		    uint32_t blockSize);
2979 
2980 /**
2981  * @brief Convolution of floating-point sequences.
2982  * @param[in] *pSrcA points to the first input sequence.
2983  * @param[in] srcALen length of the first input sequence.
2984  * @param[in] *pSrcB points to the second input sequence.
2985  * @param[in] srcBLen length of the second input sequence.
2986  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
2987  * @return none.
2988  */
2989 
2990   void arm_conv_f32(
2991 		     float32_t * pSrcA,
2992 		    uint32_t srcALen,
2993 		     float32_t * pSrcB,
2994 		    uint32_t srcBLen,
2995 		    float32_t * pDst);
2996 
2997 /**
2998  * @brief Convolution of Q15 sequences.
2999  * @param[in] *pSrcA points to the first input sequence.
3000  * @param[in] srcALen length of the first input sequence.
3001  * @param[in] *pSrcB points to the second input sequence.
3002  * @param[in] srcBLen length of the second input sequence.
3003  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
3004  * @return none.
3005  */
3006 
3007   void arm_conv_q15(
3008 		     q15_t * pSrcA,
3009 		    uint32_t srcALen,
3010 		     q15_t * pSrcB,
3011 		    uint32_t srcBLen,
3012 		    q15_t * pDst);
3013 
3014   /**
3015    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3016    * @param[in] *pSrcA points to the first input sequence.
3017    * @param[in] srcALen length of the first input sequence.
3018    * @param[in] *pSrcB points to the second input sequence.
3019    * @param[in] srcBLen length of the second input sequence.
3020    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3021    * @return none.
3022    */
3023 
3024   void arm_conv_fast_q15(
3025 			  q15_t * pSrcA,
3026 			 uint32_t srcALen,
3027 			  q15_t * pSrcB,
3028 			 uint32_t srcBLen,
3029 			 q15_t * pDst);
3030 
3031   /**
3032    * @brief Convolution of Q31 sequences.
3033    * @param[in] *pSrcA points to the first input sequence.
3034    * @param[in] srcALen length of the first input sequence.
3035    * @param[in] *pSrcB points to the second input sequence.
3036    * @param[in] srcBLen length of the second input sequence.
3037    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3038    * @return none.
3039    */
3040 
3041   void arm_conv_q31(
3042 		     q31_t * pSrcA,
3043 		    uint32_t srcALen,
3044 		     q31_t * pSrcB,
3045 		    uint32_t srcBLen,
3046 		    q31_t * pDst);
3047 
3048   /**
3049    * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3050    * @param[in] *pSrcA points to the first input sequence.
3051    * @param[in] srcALen length of the first input sequence.
3052    * @param[in] *pSrcB points to the second input sequence.
3053    * @param[in] srcBLen length of the second input sequence.
3054    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3055    * @return none.
3056    */
3057 
3058   void arm_conv_fast_q31(
3059 			  q31_t * pSrcA,
3060 			 uint32_t srcALen,
3061 			  q31_t * pSrcB,
3062 			 uint32_t srcBLen,
3063 			 q31_t * pDst);
3064 
3065   /**
3066    * @brief Convolution of Q7 sequences.
3067    * @param[in] *pSrcA points to the first input sequence.
3068    * @param[in] srcALen length of the first input sequence.
3069    * @param[in] *pSrcB points to the second input sequence.
3070    * @param[in] srcBLen length of the second input sequence.
3071    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3072    * @return none.
3073    */
3074 
3075   void arm_conv_q7(
3076 		    q7_t * pSrcA,
3077 		   uint32_t srcALen,
3078 		    q7_t * pSrcB,
3079 		   uint32_t srcBLen,
3080 		   q7_t * pDst);
3081 
3082   /**
3083    * @brief Partial convolution of floating-point sequences.
3084    * @param[in]       *pSrcA points to the first input sequence.
3085    * @param[in]       srcALen length of the first input sequence.
3086    * @param[in]       *pSrcB points to the second input sequence.
3087    * @param[in]       srcBLen length of the second input sequence.
3088    * @param[out]      *pDst points to the block of output data
3089    * @param[in]       firstIndex is the first output sample to start with.
3090    * @param[in]       numPoints is the number of output points to be computed.
3091    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3092    */
3093 
3094   arm_status arm_conv_partial_f32(
3095 				   float32_t * pSrcA,
3096 				  uint32_t srcALen,
3097 				   float32_t * pSrcB,
3098 				  uint32_t srcBLen,
3099 				  float32_t * pDst,
3100 				  uint32_t firstIndex,
3101 				  uint32_t numPoints);
3102 
3103   /**
3104    * @brief Partial convolution of Q15 sequences.
3105    * @param[in]       *pSrcA points to the first input sequence.
3106    * @param[in]       srcALen length of the first input sequence.
3107    * @param[in]       *pSrcB points to the second input sequence.
3108    * @param[in]       srcBLen length of the second input sequence.
3109    * @param[out]      *pDst points to the block of output data
3110    * @param[in]       firstIndex is the first output sample to start with.
3111    * @param[in]       numPoints is the number of output points to be computed.
3112    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3113    */
3114 
3115   arm_status arm_conv_partial_q15(
3116 				   q15_t * pSrcA,
3117 				  uint32_t srcALen,
3118 				   q15_t * pSrcB,
3119 				  uint32_t srcBLen,
3120 				  q15_t * pDst,
3121 				  uint32_t firstIndex,
3122 				  uint32_t numPoints);
3123 
3124   /**
3125    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3126    * @param[in]       *pSrcA points to the first input sequence.
3127    * @param[in]       srcALen length of the first input sequence.
3128    * @param[in]       *pSrcB points to the second input sequence.
3129    * @param[in]       srcBLen length of the second input sequence.
3130    * @param[out]      *pDst points to the block of output data
3131    * @param[in]       firstIndex is the first output sample to start with.
3132    * @param[in]       numPoints is the number of output points to be computed.
3133    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3134    */
3135 
3136   arm_status arm_conv_partial_fast_q15(
3137 				        q15_t * pSrcA,
3138 				       uint32_t srcALen,
3139 				        q15_t * pSrcB,
3140 				       uint32_t srcBLen,
3141 				       q15_t * pDst,
3142 				       uint32_t firstIndex,
3143 				       uint32_t numPoints);
3144 
3145   /**
3146    * @brief Partial convolution of Q31 sequences.
3147    * @param[in]       *pSrcA points to the first input sequence.
3148    * @param[in]       srcALen length of the first input sequence.
3149    * @param[in]       *pSrcB points to the second input sequence.
3150    * @param[in]       srcBLen length of the second input sequence.
3151    * @param[out]      *pDst points to the block of output data
3152    * @param[in]       firstIndex is the first output sample to start with.
3153    * @param[in]       numPoints is the number of output points to be computed.
3154    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3155    */
3156 
3157   arm_status arm_conv_partial_q31(
3158 				   q31_t * pSrcA,
3159 				  uint32_t srcALen,
3160 				   q31_t * pSrcB,
3161 				  uint32_t srcBLen,
3162 				  q31_t * pDst,
3163 				  uint32_t firstIndex,
3164 				  uint32_t numPoints);
3165 
3166 
3167   /**
3168    * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3169    * @param[in]       *pSrcA points to the first input sequence.
3170    * @param[in]       srcALen length of the first input sequence.
3171    * @param[in]       *pSrcB points to the second input sequence.
3172    * @param[in]       srcBLen length of the second input sequence.
3173    * @param[out]      *pDst points to the block of output data
3174    * @param[in]       firstIndex is the first output sample to start with.
3175    * @param[in]       numPoints is the number of output points to be computed.
3176    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3177    */
3178 
3179   arm_status arm_conv_partial_fast_q31(
3180 				        q31_t * pSrcA,
3181 				       uint32_t srcALen,
3182 				        q31_t * pSrcB,
3183 				       uint32_t srcBLen,
3184 				       q31_t * pDst,
3185 				       uint32_t firstIndex,
3186 				       uint32_t numPoints);
3187 
3188   /**
3189    * @brief Partial convolution of Q7 sequences.
3190    * @param[in]       *pSrcA points to the first input sequence.
3191    * @param[in]       srcALen length of the first input sequence.
3192    * @param[in]       *pSrcB points to the second input sequence.
3193    * @param[in]       srcBLen length of the second input sequence.
3194    * @param[out]      *pDst points to the block of output data
3195    * @param[in]       firstIndex is the first output sample to start with.
3196    * @param[in]       numPoints is the number of output points to be computed.
3197    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3198    */
3199 
3200   arm_status arm_conv_partial_q7(
3201 				  q7_t * pSrcA,
3202 				 uint32_t srcALen,
3203 				  q7_t * pSrcB,
3204 				 uint32_t srcBLen,
3205 				 q7_t * pDst,
3206 				 uint32_t firstIndex,
3207 				 uint32_t numPoints);
3208 
3209 
3210   /**
3211    * @brief Instance structure for the Q15 FIR decimator.
3212    */
3213 
3214   typedef struct
3215   {
3216     uint8_t M;                      /**< decimation factor. */
3217     uint16_t numTaps;               /**< number of coefficients in the filter. */
3218     q15_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
3219     q15_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3220   } arm_fir_decimate_instance_q15;
3221 
3222   /**
3223    * @brief Instance structure for the Q31 FIR decimator.
3224    */
3225 
3226   typedef struct
3227   {
3228     uint8_t M;                  /**< decimation factor. */
3229     uint16_t numTaps;           /**< number of coefficients in the filter. */
3230     q31_t *pCoeffs;              /**< points to the coefficient array. The array is of length numTaps.*/
3231     q31_t *pState;               /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3232 
3233   } arm_fir_decimate_instance_q31;
3234 
3235   /**
3236    * @brief Instance structure for the floating-point FIR decimator.
3237    */
3238 
3239   typedef struct
3240   {
3241     uint8_t M;                          /**< decimation factor. */
3242     uint16_t numTaps;                   /**< number of coefficients in the filter. */
3243     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
3244     float32_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3245 
3246   } arm_fir_decimate_instance_f32;
3247 
3248 
3249 
3250   /**
3251    * @brief Processing function for the floating-point FIR decimator.
3252    * @param[in] *S points to an instance of the floating-point FIR decimator structure.
3253    * @param[in] *pSrc points to the block of input data.
3254    * @param[out] *pDst points to the block of output data
3255    * @param[in] blockSize number of input samples to process per call.
3256    * @return none
3257    */
3258 
3259   void arm_fir_decimate_f32(
3260 			    const arm_fir_decimate_instance_f32 * S,
3261 			     float32_t * pSrc,
3262 			    float32_t * pDst,
3263 			    uint32_t blockSize);
3264 
3265 
3266   /**
3267    * @brief  Initialization function for the floating-point FIR decimator.
3268    * @param[in,out] *S points to an instance of the floating-point FIR decimator structure.
3269    * @param[in] numTaps  number of coefficients in the filter.
3270    * @param[in] M  decimation factor.
3271    * @param[in] *pCoeffs points to the filter coefficients.
3272    * @param[in] *pState points to the state buffer.
3273    * @param[in] blockSize number of input samples to process per call.
3274    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3275    * <code>blockSize</code> is not a multiple of <code>M</code>.
3276    */
3277 
3278   arm_status arm_fir_decimate_init_f32(
3279 				       arm_fir_decimate_instance_f32 * S,
3280 				       uint16_t numTaps,
3281 				       uint8_t M,
3282 				       float32_t * pCoeffs,
3283 				       float32_t * pState,
3284 				       uint32_t blockSize);
3285 
3286   /**
3287    * @brief Processing function for the Q15 FIR decimator.
3288    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
3289    * @param[in] *pSrc points to the block of input data.
3290    * @param[out] *pDst points to the block of output data
3291    * @param[in] blockSize number of input samples to process per call.
3292    * @return none
3293    */
3294 
3295   void arm_fir_decimate_q15(
3296 			    const arm_fir_decimate_instance_q15 * S,
3297 			     q15_t * pSrc,
3298 			    q15_t * pDst,
3299 			    uint32_t blockSize);
3300 
3301   /**
3302    * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3303    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
3304    * @param[in] *pSrc points to the block of input data.
3305    * @param[out] *pDst points to the block of output data
3306    * @param[in] blockSize number of input samples to process per call.
3307    * @return none
3308    */
3309 
3310   void arm_fir_decimate_fast_q15(
3311 				 const arm_fir_decimate_instance_q15 * S,
3312 				  q15_t * pSrc,
3313 				 q15_t * pDst,
3314 				 uint32_t blockSize);
3315 
3316 
3317 
3318   /**
3319    * @brief  Initialization function for the Q15 FIR decimator.
3320    * @param[in,out] *S points to an instance of the Q15 FIR decimator structure.
3321    * @param[in] numTaps  number of coefficients in the filter.
3322    * @param[in] M  decimation factor.
3323    * @param[in] *pCoeffs points to the filter coefficients.
3324    * @param[in] *pState points to the state buffer.
3325    * @param[in] blockSize number of input samples to process per call.
3326    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3327    * <code>blockSize</code> is not a multiple of <code>M</code>.
3328    */
3329 
3330   arm_status arm_fir_decimate_init_q15(
3331 				       arm_fir_decimate_instance_q15 * S,
3332 				       uint16_t numTaps,
3333 				       uint8_t M,
3334 				       q15_t * pCoeffs,
3335 				       q15_t * pState,
3336 				       uint32_t blockSize);
3337 
3338   /**
3339    * @brief Processing function for the Q31 FIR decimator.
3340    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
3341    * @param[in] *pSrc points to the block of input data.
3342    * @param[out] *pDst points to the block of output data
3343    * @param[in] blockSize number of input samples to process per call.
3344    * @return none
3345    */
3346 
3347   void arm_fir_decimate_q31(
3348 			    const arm_fir_decimate_instance_q31 * S,
3349 			     q31_t * pSrc,
3350 			    q31_t * pDst,
3351 			    uint32_t blockSize);
3352 
3353   /**
3354    * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3355    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
3356    * @param[in] *pSrc points to the block of input data.
3357    * @param[out] *pDst points to the block of output data
3358    * @param[in] blockSize number of input samples to process per call.
3359    * @return none
3360    */
3361 
3362   void arm_fir_decimate_fast_q31(
3363 				 arm_fir_decimate_instance_q31 * S,
3364 				  q31_t * pSrc,
3365 				 q31_t * pDst,
3366 				 uint32_t blockSize);
3367 
3368 
3369   /**
3370    * @brief  Initialization function for the Q31 FIR decimator.
3371    * @param[in,out] *S points to an instance of the Q31 FIR decimator structure.
3372    * @param[in] numTaps  number of coefficients in the filter.
3373    * @param[in] M  decimation factor.
3374    * @param[in] *pCoeffs points to the filter coefficients.
3375    * @param[in] *pState points to the state buffer.
3376    * @param[in] blockSize number of input samples to process per call.
3377    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3378    * <code>blockSize</code> is not a multiple of <code>M</code>.
3379    */
3380 
3381   arm_status arm_fir_decimate_init_q31(
3382 				       arm_fir_decimate_instance_q31 * S,
3383 				       uint16_t numTaps,
3384 				       uint8_t M,
3385 				       q31_t * pCoeffs,
3386 				       q31_t * pState,
3387 				       uint32_t blockSize);
3388 
3389 
3390 
3391   /**
3392    * @brief Instance structure for the Q15 FIR interpolator.
3393    */
3394 
3395   typedef struct
3396   {
3397     uint8_t L;                      /**< upsample factor. */
3398     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3399     q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
3400     q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3401   } arm_fir_interpolate_instance_q15;
3402 
3403   /**
3404    * @brief Instance structure for the Q31 FIR interpolator.
3405    */
3406 
3407   typedef struct
3408   {
3409     uint8_t L;                      /**< upsample factor. */
3410     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3411     q31_t *pCoeffs;                  /**< points to the coefficient array. The array is of length L*phaseLength. */
3412     q31_t *pState;                   /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3413   } arm_fir_interpolate_instance_q31;
3414 
3415   /**
3416    * @brief Instance structure for the floating-point FIR interpolator.
3417    */
3418 
3419   typedef struct
3420   {
3421     uint8_t L;                     /**< upsample factor. */
3422     uint16_t phaseLength;          /**< length of each polyphase filter component. */
3423     float32_t *pCoeffs;             /**< points to the coefficient array. The array is of length L*phaseLength. */
3424     float32_t *pState;              /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
3425   } arm_fir_interpolate_instance_f32;
3426 
3427 
3428   /**
3429    * @brief Processing function for the Q15 FIR interpolator.
3430    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
3431    * @param[in] *pSrc     points to the block of input data.
3432    * @param[out] *pDst    points to the block of output data.
3433    * @param[in] blockSize number of input samples to process per call.
3434    * @return none.
3435    */
3436 
3437   void arm_fir_interpolate_q15(
3438 			       const arm_fir_interpolate_instance_q15 * S,
3439 			        q15_t * pSrc,
3440 			       q15_t * pDst,
3441 			       uint32_t blockSize);
3442 
3443 
3444   /**
3445    * @brief  Initialization function for the Q15 FIR interpolator.
3446    * @param[in,out] *S        points to an instance of the Q15 FIR interpolator structure.
3447    * @param[in]     L         upsample factor.
3448    * @param[in]     numTaps   number of filter coefficients in the filter.
3449    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3450    * @param[in]     *pState   points to the state buffer.
3451    * @param[in]     blockSize number of input samples to process per call.
3452    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3453    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3454    */
3455 
3456   arm_status arm_fir_interpolate_init_q15(
3457 					  arm_fir_interpolate_instance_q15 * S,
3458 					  uint8_t L,
3459 					  uint16_t numTaps,
3460 					  q15_t * pCoeffs,
3461 					  q15_t * pState,
3462 					  uint32_t blockSize);
3463 
3464   /**
3465    * @brief Processing function for the Q31 FIR interpolator.
3466    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
3467    * @param[in] *pSrc     points to the block of input data.
3468    * @param[out] *pDst    points to the block of output data.
3469    * @param[in] blockSize number of input samples to process per call.
3470    * @return none.
3471    */
3472 
3473   void arm_fir_interpolate_q31(
3474 			       const arm_fir_interpolate_instance_q31 * S,
3475 			        q31_t * pSrc,
3476 			       q31_t * pDst,
3477 			       uint32_t blockSize);
3478 
3479   /**
3480    * @brief  Initialization function for the Q31 FIR interpolator.
3481    * @param[in,out] *S        points to an instance of the Q31 FIR interpolator structure.
3482    * @param[in]     L         upsample factor.
3483    * @param[in]     numTaps   number of filter coefficients in the filter.
3484    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3485    * @param[in]     *pState   points to the state buffer.
3486    * @param[in]     blockSize number of input samples to process per call.
3487    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3488    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3489    */
3490 
3491   arm_status arm_fir_interpolate_init_q31(
3492 					  arm_fir_interpolate_instance_q31 * S,
3493 					  uint8_t L,
3494 					  uint16_t numTaps,
3495 					  q31_t * pCoeffs,
3496 					  q31_t * pState,
3497 					  uint32_t blockSize);
3498 
3499 
3500   /**
3501    * @brief Processing function for the floating-point FIR interpolator.
3502    * @param[in] *S        points to an instance of the floating-point FIR interpolator structure.
3503    * @param[in] *pSrc     points to the block of input data.
3504    * @param[out] *pDst    points to the block of output data.
3505    * @param[in] blockSize number of input samples to process per call.
3506    * @return none.
3507    */
3508 
3509   void arm_fir_interpolate_f32(
3510 			       const arm_fir_interpolate_instance_f32 * S,
3511 			        float32_t * pSrc,
3512 			       float32_t * pDst,
3513 			       uint32_t blockSize);
3514 
3515   /**
3516    * @brief  Initialization function for the floating-point FIR interpolator.
3517    * @param[in,out] *S        points to an instance of the floating-point FIR interpolator structure.
3518    * @param[in]     L         upsample factor.
3519    * @param[in]     numTaps   number of filter coefficients in the filter.
3520    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3521    * @param[in]     *pState   points to the state buffer.
3522    * @param[in]     blockSize number of input samples to process per call.
3523    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3524    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3525    */
3526 
3527   arm_status arm_fir_interpolate_init_f32(
3528 					  arm_fir_interpolate_instance_f32 * S,
3529 					  uint8_t L,
3530 					  uint16_t numTaps,
3531 					  float32_t * pCoeffs,
3532 					  float32_t * pState,
3533 					  uint32_t blockSize);
3534 
3535   /**
3536    * @brief Instance structure for the high precision Q31 Biquad cascade filter.
3537    */
3538 
3539   typedef struct
3540   {
3541     uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3542     q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
3543     q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
3544     uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
3545 
3546   } arm_biquad_cas_df1_32x64_ins_q31;
3547 
3548 
3549   /**
3550    * @param[in]  *S        points to an instance of the high precision Q31 Biquad cascade filter structure.
3551    * @param[in]  *pSrc     points to the block of input data.
3552    * @param[out] *pDst     points to the block of output data
3553    * @param[in]  blockSize number of samples to process.
3554    * @return none.
3555    */
3556 
3557   void arm_biquad_cas_df1_32x64_q31(
3558 				    const arm_biquad_cas_df1_32x64_ins_q31 * S,
3559 				     q31_t * pSrc,
3560 				    q31_t * pDst,
3561 				    uint32_t blockSize);
3562 
3563 
3564   /**
3565    * @param[in,out] *S           points to an instance of the high precision Q31 Biquad cascade filter structure.
3566    * @param[in]     numStages    number of 2nd order stages in the filter.
3567    * @param[in]     *pCoeffs     points to the filter coefficients.
3568    * @param[in]     *pState      points to the state buffer.
3569    * @param[in]     postShift    shift to be applied to the output. Varies according to the coefficients format
3570    * @return        none
3571    */
3572 
3573   void arm_biquad_cas_df1_32x64_init_q31(
3574 					 arm_biquad_cas_df1_32x64_ins_q31 * S,
3575 					 uint8_t numStages,
3576 					 q31_t * pCoeffs,
3577 					 q63_t * pState,
3578 					 uint8_t postShift);
3579 
3580 
3581 
3582   /**
3583    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3584    */
3585 
3586   typedef struct
3587   {
3588     uint8_t   numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3589     float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
3590     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3591   } arm_biquad_cascade_df2T_instance_f32;
3592 
3593 
3594   /**
3595    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
3596    * @param[in]  *S        points to an instance of the filter data structure.
3597    * @param[in]  *pSrc     points to the block of input data.
3598    * @param[out] *pDst     points to the block of output data
3599    * @param[in]  blockSize number of samples to process.
3600    * @return none.
3601    */
3602 
3603   void arm_biquad_cascade_df2T_f32(
3604 				   const arm_biquad_cascade_df2T_instance_f32 * S,
3605 				    float32_t * pSrc,
3606 				   float32_t * pDst,
3607 				   uint32_t blockSize);
3608 
3609 
3610   /**
3611    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3612    * @param[in,out] *S           points to an instance of the filter data structure.
3613    * @param[in]     numStages    number of 2nd order stages in the filter.
3614    * @param[in]     *pCoeffs     points to the filter coefficients.
3615    * @param[in]     *pState      points to the state buffer.
3616    * @return        none
3617    */
3618 
3619   void arm_biquad_cascade_df2T_init_f32(
3620 					arm_biquad_cascade_df2T_instance_f32 * S,
3621 					uint8_t numStages,
3622 					float32_t * pCoeffs,
3623 					float32_t * pState);
3624 
3625 
3626 
3627   /**
3628    * @brief Instance structure for the Q15 FIR lattice filter.
3629    */
3630 
3631   typedef struct
3632   {
3633     uint16_t numStages;                          /**< number of filter stages. */
3634     q15_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
3635     q15_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
3636   } arm_fir_lattice_instance_q15;
3637 
3638   /**
3639    * @brief Instance structure for the Q31 FIR lattice filter.
3640    */
3641 
3642   typedef struct
3643   {
3644     uint16_t numStages;                          /**< number of filter stages. */
3645     q31_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
3646     q31_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
3647   } arm_fir_lattice_instance_q31;
3648 
3649   /**
3650    * @brief Instance structure for the floating-point FIR lattice filter.
3651    */
3652 
3653   typedef struct
3654   {
3655     uint16_t numStages;                  /**< number of filter stages. */
3656     float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
3657     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
3658   } arm_fir_lattice_instance_f32;
3659 
3660   /**
3661    * @brief Initialization function for the Q15 FIR lattice filter.
3662    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
3663    * @param[in] numStages  number of filter stages.
3664    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3665    * @param[in] *pState points to the state buffer.  The array is of length numStages.
3666    * @return none.
3667    */
3668 
3669   void arm_fir_lattice_init_q15(
3670 				arm_fir_lattice_instance_q15 * S,
3671 				uint16_t numStages,
3672 				q15_t * pCoeffs,
3673 				q15_t * pState);
3674 
3675 
3676   /**
3677    * @brief Processing function for the Q15 FIR lattice filter.
3678    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
3679    * @param[in] *pSrc points to the block of input data.
3680    * @param[out] *pDst points to the block of output data.
3681    * @param[in] blockSize number of samples to process.
3682    * @return none.
3683    */
3684   void arm_fir_lattice_q15(
3685 			   const arm_fir_lattice_instance_q15 * S,
3686 			    q15_t * pSrc,
3687 			   q15_t * pDst,
3688 			   uint32_t blockSize);
3689 
3690   /**
3691    * @brief Initialization function for the Q31 FIR lattice filter.
3692    * @param[in] *S points to an instance of the Q31 FIR lattice structure.
3693    * @param[in] numStages  number of filter stages.
3694    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3695    * @param[in] *pState points to the state buffer.   The array is of length numStages.
3696    * @return none.
3697    */
3698 
3699   void arm_fir_lattice_init_q31(
3700 				arm_fir_lattice_instance_q31 * S,
3701 				uint16_t numStages,
3702 				q31_t * pCoeffs,
3703 				q31_t * pState);
3704 
3705 
3706   /**
3707    * @brief Processing function for the Q31 FIR lattice filter.
3708    * @param[in]  *S        points to an instance of the Q31 FIR lattice structure.
3709    * @param[in]  *pSrc     points to the block of input data.
3710    * @param[out] *pDst     points to the block of output data
3711    * @param[in]  blockSize number of samples to process.
3712    * @return none.
3713    */
3714 
3715   void arm_fir_lattice_q31(
3716 			   const arm_fir_lattice_instance_q31 * S,
3717 			    q31_t * pSrc,
3718 			   q31_t * pDst,
3719 			   uint32_t blockSize);
3720 
3721 /**
3722  * @brief Initialization function for the floating-point FIR lattice filter.
3723  * @param[in] *S points to an instance of the floating-point FIR lattice structure.
3724  * @param[in] numStages  number of filter stages.
3725  * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3726  * @param[in] *pState points to the state buffer.  The array is of length numStages.
3727  * @return none.
3728  */
3729 
3730   void arm_fir_lattice_init_f32(
3731 				arm_fir_lattice_instance_f32 * S,
3732 				uint16_t numStages,
3733 				float32_t * pCoeffs,
3734 				float32_t * pState);
3735 
3736   /**
3737    * @brief Processing function for the floating-point FIR lattice filter.
3738    * @param[in]  *S        points to an instance of the floating-point FIR lattice structure.
3739    * @param[in]  *pSrc     points to the block of input data.
3740    * @param[out] *pDst     points to the block of output data
3741    * @param[in]  blockSize number of samples to process.
3742    * @return none.
3743    */
3744 
3745   void arm_fir_lattice_f32(
3746 			   const arm_fir_lattice_instance_f32 * S,
3747 			    float32_t * pSrc,
3748 			   float32_t * pDst,
3749 			   uint32_t blockSize);
3750 
3751   /**
3752    * @brief Instance structure for the Q15 IIR lattice filter.
3753    */
3754   typedef struct
3755   {
3756     uint16_t numStages;                         /**< number of stages in the filter. */
3757     q15_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
3758     q15_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
3759     q15_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
3760   } arm_iir_lattice_instance_q15;
3761 
3762   /**
3763    * @brief Instance structure for the Q31 IIR lattice filter.
3764    */
3765   typedef struct
3766   {
3767     uint16_t numStages;                         /**< number of stages in the filter. */
3768     q31_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
3769     q31_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
3770     q31_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
3771   } arm_iir_lattice_instance_q31;
3772 
3773   /**
3774    * @brief Instance structure for the floating-point IIR lattice filter.
3775    */
3776   typedef struct
3777   {
3778     uint16_t numStages;                         /**< number of stages in the filter. */
3779     float32_t *pState;                          /**< points to the state variable array. The array is of length numStages+blockSize. */
3780     float32_t *pkCoeffs;                        /**< points to the reflection coefficient array. The array is of length numStages. */
3781     float32_t *pvCoeffs;                        /**< points to the ladder coefficient array. The array is of length numStages+1. */
3782   } arm_iir_lattice_instance_f32;
3783 
3784   /**
3785    * @brief Processing function for the floating-point IIR lattice filter.
3786    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
3787    * @param[in] *pSrc points to the block of input data.
3788    * @param[out] *pDst points to the block of output data.
3789    * @param[in] blockSize number of samples to process.
3790    * @return none.
3791    */
3792 
3793   void arm_iir_lattice_f32(
3794 			   const arm_iir_lattice_instance_f32 * S,
3795 			    float32_t * pSrc,
3796 			   float32_t * pDst,
3797 			   uint32_t blockSize);
3798 
3799   /**
3800    * @brief Initialization function for the floating-point IIR lattice filter.
3801    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
3802    * @param[in] numStages number of stages in the filter.
3803    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
3804    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
3805    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize-1.
3806    * @param[in] blockSize number of samples to process.
3807    * @return none.
3808    */
3809 
3810   void arm_iir_lattice_init_f32(
3811 				arm_iir_lattice_instance_f32 * S,
3812 				uint16_t numStages,
3813 				float32_t *pkCoeffs,
3814 				float32_t *pvCoeffs,
3815 				float32_t *pState,
3816 				uint32_t blockSize);
3817 
3818 
3819   /**
3820    * @brief Processing function for the Q31 IIR lattice filter.
3821    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
3822    * @param[in] *pSrc points to the block of input data.
3823    * @param[out] *pDst points to the block of output data.
3824    * @param[in] blockSize number of samples to process.
3825    * @return none.
3826    */
3827 
3828   void arm_iir_lattice_q31(
3829 			   const arm_iir_lattice_instance_q31 * S,
3830 			    q31_t * pSrc,
3831 			   q31_t * pDst,
3832 			   uint32_t blockSize);
3833 
3834 
3835   /**
3836    * @brief Initialization function for the Q31 IIR lattice filter.
3837    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
3838    * @param[in] numStages number of stages in the filter.
3839    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
3840    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
3841    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize.
3842    * @param[in] blockSize number of samples to process.
3843    * @return none.
3844    */
3845 
3846   void arm_iir_lattice_init_q31(
3847 				arm_iir_lattice_instance_q31 * S,
3848 				uint16_t numStages,
3849 				q31_t *pkCoeffs,
3850 				q31_t *pvCoeffs,
3851 				q31_t *pState,
3852 				uint32_t blockSize);
3853 
3854 
3855   /**
3856    * @brief Processing function for the Q15 IIR lattice filter.
3857    * @param[in] *S points to an instance of the Q15 IIR lattice structure.
3858    * @param[in] *pSrc points to the block of input data.
3859    * @param[out] *pDst points to the block of output data.
3860    * @param[in] blockSize number of samples to process.
3861    * @return none.
3862    */
3863 
3864   void arm_iir_lattice_q15(
3865 			   const arm_iir_lattice_instance_q15 * S,
3866 			    q15_t * pSrc,
3867 			   q15_t * pDst,
3868 			   uint32_t blockSize);
3869 
3870 
3871 /**
3872  * @brief Initialization function for the Q15 IIR lattice filter.
3873  * @param[in] *S points to an instance of the fixed-point Q15 IIR lattice structure.
3874  * @param[in] numStages  number of stages in the filter.
3875  * @param[in] *pkCoeffs points to reflection coefficient buffer.  The array is of length numStages.
3876  * @param[in] *pvCoeffs points to ladder coefficient buffer.  The array is of length numStages+1.
3877  * @param[in] *pState points to state buffer.  The array is of length numStages+blockSize.
3878  * @param[in] blockSize number of samples to process per call.
3879  * @return none.
3880  */
3881 
3882   void arm_iir_lattice_init_q15(
3883 				arm_iir_lattice_instance_q15 * S,
3884 				uint16_t numStages,
3885 				q15_t *pkCoeffs,
3886 				q15_t *pvCoeffs,
3887 				q15_t *pState,
3888 				uint32_t blockSize);
3889 
3890   /**
3891    * @brief Instance structure for the floating-point LMS filter.
3892    */
3893 
3894   typedef struct
3895   {
3896     uint16_t numTaps;    /**< number of coefficients in the filter. */
3897     float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3898     float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
3899     float32_t mu;        /**< step size that controls filter coefficient updates. */
3900   } arm_lms_instance_f32;
3901 
3902   /**
3903    * @brief Processing function for floating-point LMS filter.
3904    * @param[in]  *S points to an instance of the floating-point LMS filter structure.
3905    * @param[in]  *pSrc points to the block of input data.
3906    * @param[in]  *pRef points to the block of reference data.
3907    * @param[out] *pOut points to the block of output data.
3908    * @param[out] *pErr points to the block of error data.
3909    * @param[in]  blockSize number of samples to process.
3910    * @return     none.
3911    */
3912 
3913   void arm_lms_f32(
3914 		   const arm_lms_instance_f32 * S,
3915 		    float32_t * pSrc,
3916 		    float32_t * pRef,
3917 		   float32_t * pOut,
3918 		   float32_t * pErr,
3919 		   uint32_t blockSize);
3920 
3921   /**
3922    * @brief Initialization function for floating-point LMS filter.
3923    * @param[in] *S points to an instance of the floating-point LMS filter structure.
3924    * @param[in] numTaps  number of filter coefficients.
3925    * @param[in] *pCoeffs points to the coefficient buffer.
3926    * @param[in] *pState points to state buffer.
3927    * @param[in] mu step size that controls filter coefficient updates.
3928    * @param[in] blockSize number of samples to process.
3929    * @return none.
3930    */
3931 
3932   void arm_lms_init_f32(
3933 			arm_lms_instance_f32 * S,
3934 			uint16_t numTaps,
3935 			float32_t * pCoeffs,
3936 			float32_t * pState,
3937 			float32_t mu,
3938 			uint32_t blockSize);
3939 
3940   /**
3941    * @brief Instance structure for the Q15 LMS filter.
3942    */
3943 
3944   typedef struct
3945   {
3946     uint16_t numTaps;    /**< number of coefficients in the filter. */
3947     q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3948     q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
3949     q15_t mu;            /**< step size that controls filter coefficient updates. */
3950     uint32_t postShift;  /**< bit shift applied to coefficients. */
3951   } arm_lms_instance_q15;
3952 
3953 
3954   /**
3955    * @brief Initialization function for the Q15 LMS filter.
3956    * @param[in] *S points to an instance of the Q15 LMS filter structure.
3957    * @param[in] numTaps  number of filter coefficients.
3958    * @param[in] *pCoeffs points to the coefficient buffer.
3959    * @param[in] *pState points to the state buffer.
3960    * @param[in] mu step size that controls filter coefficient updates.
3961    * @param[in] blockSize number of samples to process.
3962    * @param[in] postShift bit shift applied to coefficients.
3963    * @return    none.
3964    */
3965 
3966   void arm_lms_init_q15(
3967 			arm_lms_instance_q15 * S,
3968 			uint16_t numTaps,
3969 			q15_t * pCoeffs,
3970 			q15_t * pState,
3971 			q15_t mu,
3972 			uint32_t blockSize,
3973 			uint32_t postShift);
3974 
3975   /**
3976    * @brief Processing function for Q15 LMS filter.
3977    * @param[in] *S points to an instance of the Q15 LMS filter structure.
3978    * @param[in] *pSrc points to the block of input data.
3979    * @param[in] *pRef points to the block of reference data.
3980    * @param[out] *pOut points to the block of output data.
3981    * @param[out] *pErr points to the block of error data.
3982    * @param[in] blockSize number of samples to process.
3983    * @return none.
3984    */
3985 
3986   void arm_lms_q15(
3987 		   const arm_lms_instance_q15 * S,
3988 		    q15_t * pSrc,
3989 		    q15_t * pRef,
3990 		   q15_t * pOut,
3991 		   q15_t * pErr,
3992 		   uint32_t blockSize);
3993 
3994 
3995   /**
3996    * @brief Instance structure for the Q31 LMS filter.
3997    */
3998 
3999   typedef struct
4000   {
4001     uint16_t numTaps;    /**< number of coefficients in the filter. */
4002     q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4003     q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4004     q31_t mu;            /**< step size that controls filter coefficient updates. */
4005     uint32_t postShift;  /**< bit shift applied to coefficients. */
4006 
4007   } arm_lms_instance_q31;
4008 
4009   /**
4010    * @brief Processing function for Q31 LMS filter.
4011    * @param[in]  *S points to an instance of the Q15 LMS filter structure.
4012    * @param[in]  *pSrc points to the block of input data.
4013    * @param[in]  *pRef points to the block of reference data.
4014    * @param[out] *pOut points to the block of output data.
4015    * @param[out] *pErr points to the block of error data.
4016    * @param[in]  blockSize number of samples to process.
4017    * @return     none.
4018    */
4019 
4020   void arm_lms_q31(
4021 		   const arm_lms_instance_q31 * S,
4022 		    q31_t * pSrc,
4023 		    q31_t * pRef,
4024 		   q31_t * pOut,
4025 		   q31_t * pErr,
4026 		   uint32_t blockSize);
4027 
4028   /**
4029    * @brief Initialization function for Q31 LMS filter.
4030    * @param[in] *S points to an instance of the Q31 LMS filter structure.
4031    * @param[in] numTaps  number of filter coefficients.
4032    * @param[in] *pCoeffs points to coefficient buffer.
4033    * @param[in] *pState points to state buffer.
4034    * @param[in] mu step size that controls filter coefficient updates.
4035    * @param[in] blockSize number of samples to process.
4036    * @param[in] postShift bit shift applied to coefficients.
4037    * @return none.
4038    */
4039 
4040   void arm_lms_init_q31(
4041 			arm_lms_instance_q31 * S,
4042 			uint16_t numTaps,
4043 			q31_t *pCoeffs,
4044 			q31_t *pState,
4045 			q31_t mu,
4046 			uint32_t blockSize,
4047 			uint32_t postShift);
4048 
4049   /**
4050    * @brief Instance structure for the floating-point normalized LMS filter.
4051    */
4052 
4053   typedef struct
4054   {
4055     uint16_t  numTaps;    /**< number of coefficients in the filter. */
4056     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4057     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
4058     float32_t mu;        /**< step size that control filter coefficient updates. */
4059     float32_t energy;    /**< saves previous frame energy. */
4060     float32_t x0;        /**< saves previous input sample. */
4061   } arm_lms_norm_instance_f32;
4062 
4063   /**
4064    * @brief Processing function for floating-point normalized LMS filter.
4065    * @param[in] *S points to an instance of the floating-point normalized LMS filter structure.
4066    * @param[in] *pSrc points to the block of input data.
4067    * @param[in] *pRef points to the block of reference data.
4068    * @param[out] *pOut points to the block of output data.
4069    * @param[out] *pErr points to the block of error data.
4070    * @param[in] blockSize number of samples to process.
4071    * @return none.
4072    */
4073 
4074   void arm_lms_norm_f32(
4075 			arm_lms_norm_instance_f32 * S,
4076 			 float32_t * pSrc,
4077 			 float32_t * pRef,
4078 			float32_t * pOut,
4079 			float32_t * pErr,
4080 			uint32_t blockSize);
4081 
4082   /**
4083    * @brief Initialization function for floating-point normalized LMS filter.
4084    * @param[in] *S points to an instance of the floating-point LMS filter structure.
4085    * @param[in] numTaps  number of filter coefficients.
4086    * @param[in] *pCoeffs points to coefficient buffer.
4087    * @param[in] *pState points to state buffer.
4088    * @param[in] mu step size that controls filter coefficient updates.
4089    * @param[in] blockSize number of samples to process.
4090    * @return none.
4091    */
4092 
4093   void arm_lms_norm_init_f32(
4094 			     arm_lms_norm_instance_f32 * S,
4095 			     uint16_t numTaps,
4096 			     float32_t * pCoeffs,
4097 			     float32_t * pState,
4098 			     float32_t mu,
4099 			     uint32_t blockSize);
4100 
4101 
4102   /**
4103    * @brief Instance structure for the Q31 normalized LMS filter.
4104    */
4105   typedef struct
4106   {
4107     uint16_t numTaps;     /**< number of coefficients in the filter. */
4108     q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4109     q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4110     q31_t mu;             /**< step size that controls filter coefficient updates. */
4111     uint8_t postShift;    /**< bit shift applied to coefficients. */
4112     q31_t *recipTable;    /**< points to the reciprocal initial value table. */
4113     q31_t energy;         /**< saves previous frame energy. */
4114     q31_t x0;             /**< saves previous input sample. */
4115   } arm_lms_norm_instance_q31;
4116 
4117   /**
4118    * @brief Processing function for Q31 normalized LMS filter.
4119    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
4120    * @param[in] *pSrc points to the block of input data.
4121    * @param[in] *pRef points to the block of reference data.
4122    * @param[out] *pOut points to the block of output data.
4123    * @param[out] *pErr points to the block of error data.
4124    * @param[in] blockSize number of samples to process.
4125    * @return none.
4126    */
4127 
4128   void arm_lms_norm_q31(
4129 			arm_lms_norm_instance_q31 * S,
4130 			 q31_t * pSrc,
4131 			 q31_t * pRef,
4132 			q31_t * pOut,
4133 			q31_t * pErr,
4134 			uint32_t blockSize);
4135 
4136   /**
4137    * @brief Initialization function for Q31 normalized LMS filter.
4138    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
4139    * @param[in] numTaps  number of filter coefficients.
4140    * @param[in] *pCoeffs points to coefficient buffer.
4141    * @param[in] *pState points to state buffer.
4142    * @param[in] mu step size that controls filter coefficient updates.
4143    * @param[in] blockSize number of samples to process.
4144    * @param[in] postShift bit shift applied to coefficients.
4145    * @return none.
4146    */
4147 
4148   void arm_lms_norm_init_q31(
4149 			     arm_lms_norm_instance_q31 * S,
4150 			     uint16_t numTaps,
4151 			     q31_t * pCoeffs,
4152 			     q31_t * pState,
4153 			     q31_t mu,
4154 			     uint32_t blockSize,
4155 			     uint8_t postShift);
4156 
4157   /**
4158    * @brief Instance structure for the Q15 normalized LMS filter.
4159    */
4160 
4161   typedef struct
4162   {
4163     uint16_t numTaps;    /**< Number of coefficients in the filter. */
4164     q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4165     q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4166     q15_t mu;            /**< step size that controls filter coefficient updates. */
4167     uint8_t postShift;   /**< bit shift applied to coefficients. */
4168     q15_t *recipTable;   /**< Points to the reciprocal initial value table. */
4169     q15_t energy;        /**< saves previous frame energy. */
4170     q15_t x0;            /**< saves previous input sample. */
4171   } arm_lms_norm_instance_q15;
4172 
4173   /**
4174    * @brief Processing function for Q15 normalized LMS filter.
4175    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
4176    * @param[in] *pSrc points to the block of input data.
4177    * @param[in] *pRef points to the block of reference data.
4178    * @param[out] *pOut points to the block of output data.
4179    * @param[out] *pErr points to the block of error data.
4180    * @param[in] blockSize number of samples to process.
4181    * @return none.
4182    */
4183 
4184   void arm_lms_norm_q15(
4185 			arm_lms_norm_instance_q15 * S,
4186 			 q15_t * pSrc,
4187 			 q15_t * pRef,
4188 			q15_t * pOut,
4189 			q15_t * pErr,
4190 			uint32_t blockSize);
4191 
4192 
4193   /**
4194    * @brief Initialization function for Q15 normalized LMS filter.
4195    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
4196    * @param[in] numTaps  number of filter coefficients.
4197    * @param[in] *pCoeffs points to coefficient buffer.
4198    * @param[in] *pState points to state buffer.
4199    * @param[in] mu step size that controls filter coefficient updates.
4200    * @param[in] blockSize number of samples to process.
4201    * @param[in] postShift bit shift applied to coefficients.
4202    * @return none.
4203    */
4204 
4205   void arm_lms_norm_init_q15(
4206 			     arm_lms_norm_instance_q15 * S,
4207 			     uint16_t numTaps,
4208 			     q15_t * pCoeffs,
4209 			     q15_t * pState,
4210 			     q15_t mu,
4211 			     uint32_t blockSize,
4212 			     uint8_t postShift);
4213 
4214   /**
4215    * @brief Correlation of floating-point sequences.
4216    * @param[in] *pSrcA points to the first input sequence.
4217    * @param[in] srcALen length of the first input sequence.
4218    * @param[in] *pSrcB points to the second input sequence.
4219    * @param[in] srcBLen length of the second input sequence.
4220    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4221    * @return none.
4222    */
4223 
4224   void arm_correlate_f32(
4225 			  float32_t * pSrcA,
4226 			 uint32_t srcALen,
4227 			  float32_t * pSrcB,
4228 			 uint32_t srcBLen,
4229 			 float32_t * pDst);
4230 
4231   /**
4232    * @brief Correlation of Q15 sequences.
4233    * @param[in] *pSrcA points to the first input sequence.
4234    * @param[in] srcALen length of the first input sequence.
4235    * @param[in] *pSrcB points to the second input sequence.
4236    * @param[in] srcBLen length of the second input sequence.
4237    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4238    * @return none.
4239    */
4240 
4241   void arm_correlate_q15(
4242 			  q15_t * pSrcA,
4243 			 uint32_t srcALen,
4244 			  q15_t * pSrcB,
4245 			 uint32_t srcBLen,
4246 			 q15_t * pDst);
4247 
4248   /**
4249    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
4250    * @param[in] *pSrcA points to the first input sequence.
4251    * @param[in] srcALen length of the first input sequence.
4252    * @param[in] *pSrcB points to the second input sequence.
4253    * @param[in] srcBLen length of the second input sequence.
4254    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4255    * @return none.
4256    */
4257 
4258   void arm_correlate_fast_q15(
4259 			       q15_t * pSrcA,
4260 			      uint32_t srcALen,
4261 			       q15_t * pSrcB,
4262 			      uint32_t srcBLen,
4263 			      q15_t * pDst);
4264 
4265   /**
4266    * @brief Correlation of Q31 sequences.
4267    * @param[in] *pSrcA points to the first input sequence.
4268    * @param[in] srcALen length of the first input sequence.
4269    * @param[in] *pSrcB points to the second input sequence.
4270    * @param[in] srcBLen length of the second input sequence.
4271    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4272    * @return none.
4273    */
4274 
4275   void arm_correlate_q31(
4276 			  q31_t * pSrcA,
4277 			 uint32_t srcALen,
4278 			  q31_t * pSrcB,
4279 			 uint32_t srcBLen,
4280 			 q31_t * pDst);
4281 
4282   /**
4283    * @brief Correlation of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
4284    * @param[in] *pSrcA points to the first input sequence.
4285    * @param[in] srcALen length of the first input sequence.
4286    * @param[in] *pSrcB points to the second input sequence.
4287    * @param[in] srcBLen length of the second input sequence.
4288    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4289    * @return none.
4290    */
4291 
4292   void arm_correlate_fast_q31(
4293 			       q31_t * pSrcA,
4294 			      uint32_t srcALen,
4295 			       q31_t * pSrcB,
4296 			      uint32_t srcBLen,
4297 			      q31_t * pDst);
4298 
4299   /**
4300    * @brief Correlation of Q7 sequences.
4301    * @param[in] *pSrcA points to the first input sequence.
4302    * @param[in] srcALen length of the first input sequence.
4303    * @param[in] *pSrcB points to the second input sequence.
4304    * @param[in] srcBLen length of the second input sequence.
4305    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4306    * @return none.
4307    */
4308 
4309   void arm_correlate_q7(
4310 			 q7_t * pSrcA,
4311 			uint32_t srcALen,
4312 			 q7_t * pSrcB,
4313 			uint32_t srcBLen,
4314 			q7_t * pDst);
4315 
4316   /**
4317    * @brief Instance structure for the floating-point sparse FIR filter.
4318    */
4319   typedef struct
4320   {
4321     uint16_t numTaps;             /**< number of coefficients in the filter. */
4322     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4323     float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4324     float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
4325     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4326     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4327   } arm_fir_sparse_instance_f32;
4328 
4329   /**
4330    * @brief Instance structure for the Q31 sparse FIR filter.
4331    */
4332 
4333   typedef struct
4334   {
4335     uint16_t numTaps;             /**< number of coefficients in the filter. */
4336     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4337     q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4338     q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4339     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4340     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4341   } arm_fir_sparse_instance_q31;
4342 
4343   /**
4344    * @brief Instance structure for the Q15 sparse FIR filter.
4345    */
4346 
4347   typedef struct
4348   {
4349     uint16_t numTaps;             /**< number of coefficients in the filter. */
4350     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4351     q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4352     q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4353     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4354     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4355   } arm_fir_sparse_instance_q15;
4356 
4357   /**
4358    * @brief Instance structure for the Q7 sparse FIR filter.
4359    */
4360 
4361   typedef struct
4362   {
4363     uint16_t numTaps;             /**< number of coefficients in the filter. */
4364     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4365     q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4366     q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
4367     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4368     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4369   } arm_fir_sparse_instance_q7;
4370 
4371   /**
4372    * @brief Processing function for the floating-point sparse FIR filter.
4373    * @param[in]  *S          points to an instance of the floating-point sparse FIR structure.
4374    * @param[in]  *pSrc       points to the block of input data.
4375    * @param[out] *pDst       points to the block of output data
4376    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
4377    * @param[in]  blockSize   number of input samples to process per call.
4378    * @return none.
4379    */
4380 
4381   void arm_fir_sparse_f32(
4382 			  arm_fir_sparse_instance_f32 * S,
4383 			   float32_t * pSrc,
4384 			  float32_t * pDst,
4385 			  float32_t * pScratchIn,
4386 			  uint32_t blockSize);
4387 
4388   /**
4389    * @brief  Initialization function for the floating-point sparse FIR filter.
4390    * @param[in,out] *S         points to an instance of the floating-point sparse FIR structure.
4391    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4392    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4393    * @param[in]     *pState    points to the state buffer.
4394    * @param[in]     *pTapDelay points to the array of offset times.
4395    * @param[in]     maxDelay   maximum offset time supported.
4396    * @param[in]     blockSize  number of samples that will be processed per block.
4397    * @return none
4398    */
4399 
4400   void arm_fir_sparse_init_f32(
4401 			       arm_fir_sparse_instance_f32 * S,
4402 			       uint16_t numTaps,
4403 			       float32_t * pCoeffs,
4404 			       float32_t * pState,
4405 			       int32_t * pTapDelay,
4406 			       uint16_t maxDelay,
4407 			       uint32_t blockSize);
4408 
4409   /**
4410    * @brief Processing function for the Q31 sparse FIR filter.
4411    * @param[in]  *S          points to an instance of the Q31 sparse FIR structure.
4412    * @param[in]  *pSrc       points to the block of input data.
4413    * @param[out] *pDst       points to the block of output data
4414    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
4415    * @param[in]  blockSize   number of input samples to process per call.
4416    * @return none.
4417    */
4418 
4419   void arm_fir_sparse_q31(
4420 			  arm_fir_sparse_instance_q31 * S,
4421 			   q31_t * pSrc,
4422 			  q31_t * pDst,
4423 			  q31_t * pScratchIn,
4424 			  uint32_t blockSize);
4425 
4426   /**
4427    * @brief  Initialization function for the Q31 sparse FIR filter.
4428    * @param[in,out] *S         points to an instance of the Q31 sparse FIR structure.
4429    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4430    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4431    * @param[in]     *pState    points to the state buffer.
4432    * @param[in]     *pTapDelay points to the array of offset times.
4433    * @param[in]     maxDelay   maximum offset time supported.
4434    * @param[in]     blockSize  number of samples that will be processed per block.
4435    * @return none
4436    */
4437 
4438   void arm_fir_sparse_init_q31(
4439 			       arm_fir_sparse_instance_q31 * S,
4440 			       uint16_t numTaps,
4441 			       q31_t * pCoeffs,
4442 			       q31_t * pState,
4443 			       int32_t * pTapDelay,
4444 			       uint16_t maxDelay,
4445 			       uint32_t blockSize);
4446 
4447   /**
4448    * @brief Processing function for the Q15 sparse FIR filter.
4449    * @param[in]  *S           points to an instance of the Q15 sparse FIR structure.
4450    * @param[in]  *pSrc        points to the block of input data.
4451    * @param[out] *pDst        points to the block of output data
4452    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
4453    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
4454    * @param[in]  blockSize    number of input samples to process per call.
4455    * @return none.
4456    */
4457 
4458   void arm_fir_sparse_q15(
4459 			  arm_fir_sparse_instance_q15 * S,
4460 			   q15_t * pSrc,
4461 			  q15_t * pDst,
4462 			  q15_t * pScratchIn,
4463 			  q31_t * pScratchOut,
4464 			  uint32_t blockSize);
4465 
4466 
4467   /**
4468    * @brief  Initialization function for the Q15 sparse FIR filter.
4469    * @param[in,out] *S         points to an instance of the Q15 sparse FIR structure.
4470    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4471    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4472    * @param[in]     *pState    points to the state buffer.
4473    * @param[in]     *pTapDelay points to the array of offset times.
4474    * @param[in]     maxDelay   maximum offset time supported.
4475    * @param[in]     blockSize  number of samples that will be processed per block.
4476    * @return none
4477    */
4478 
4479   void arm_fir_sparse_init_q15(
4480 			       arm_fir_sparse_instance_q15 * S,
4481 			       uint16_t numTaps,
4482 			       q15_t * pCoeffs,
4483 			       q15_t * pState,
4484 			       int32_t * pTapDelay,
4485 			       uint16_t maxDelay,
4486 			       uint32_t blockSize);
4487 
4488   /**
4489    * @brief Processing function for the Q7 sparse FIR filter.
4490    * @param[in]  *S           points to an instance of the Q7 sparse FIR structure.
4491    * @param[in]  *pSrc        points to the block of input data.
4492    * @param[out] *pDst        points to the block of output data
4493    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
4494    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
4495    * @param[in]  blockSize    number of input samples to process per call.
4496    * @return none.
4497    */
4498 
4499   void arm_fir_sparse_q7(
4500 			 arm_fir_sparse_instance_q7 * S,
4501 			  q7_t * pSrc,
4502 			 q7_t * pDst,
4503 			 q7_t * pScratchIn,
4504 			 q31_t * pScratchOut,
4505 			 uint32_t blockSize);
4506 
4507   /**
4508    * @brief  Initialization function for the Q7 sparse FIR filter.
4509    * @param[in,out] *S         points to an instance of the Q7 sparse FIR structure.
4510    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4511    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4512    * @param[in]     *pState    points to the state buffer.
4513    * @param[in]     *pTapDelay points to the array of offset times.
4514    * @param[in]     maxDelay   maximum offset time supported.
4515    * @param[in]     blockSize  number of samples that will be processed per block.
4516    * @return none
4517    */
4518 
4519   void arm_fir_sparse_init_q7(
4520 			      arm_fir_sparse_instance_q7 * S,
4521 			      uint16_t numTaps,
4522 			      q7_t * pCoeffs,
4523 			      q7_t * pState,
4524 			      int32_t *pTapDelay,
4525 			      uint16_t maxDelay,
4526 			      uint32_t blockSize);
4527 
4528 
4529   /*
4530    * @brief  Floating-point sin_cos function.
4531    * @param[in]  theta    input value in degrees
4532    * @param[out] *pSinVal points to the processed sine output.
4533    * @param[out] *pCosVal points to the processed cos output.
4534    * @return none.
4535    */
4536 
4537   void arm_sin_cos_f32(
4538 		       float32_t theta,
4539 		       float32_t *pSinVal,
4540 		       float32_t *pCcosVal);
4541 
4542   /*
4543    * @brief  Q31 sin_cos function.
4544    * @param[in]  theta    scaled input value in degrees
4545    * @param[out] *pSinVal points to the processed sine output.
4546    * @param[out] *pCosVal points to the processed cosine output.
4547    * @return none.
4548    */
4549 
4550   void arm_sin_cos_q31(
4551 		       q31_t theta,
4552 		       q31_t *pSinVal,
4553 		       q31_t *pCosVal);
4554 
4555 
4556   /**
4557    * @brief  Floating-point complex conjugate.
4558    * @param[in]  *pSrc points to the input vector
4559    * @param[out]  *pDst points to the output vector
4560    * @param[in]  numSamples number of complex samples in each vector
4561    * @return none.
4562    */
4563 
4564   void arm_cmplx_conj_f32(
4565 			   float32_t * pSrc,
4566 			  float32_t * pDst,
4567 			  uint32_t numSamples);
4568 
4569   /**
4570    * @brief  Q31 complex conjugate.
4571    * @param[in]  *pSrc points to the input vector
4572    * @param[out]  *pDst points to the output vector
4573    * @param[in]  numSamples number of complex samples in each vector
4574    * @return none.
4575    */
4576 
4577   void arm_cmplx_conj_q31(
4578 			   q31_t * pSrc,
4579 			  q31_t * pDst,
4580 			  uint32_t numSamples);
4581 
4582   /**
4583    * @brief  Q15 complex conjugate.
4584    * @param[in]  *pSrc points to the input vector
4585    * @param[out]  *pDst points to the output vector
4586    * @param[in]  numSamples number of complex samples in each vector
4587    * @return none.
4588    */
4589 
4590   void arm_cmplx_conj_q15(
4591 			   q15_t * pSrc,
4592 			  q15_t * pDst,
4593 			  uint32_t numSamples);
4594 
4595 
4596 
4597   /**
4598    * @brief  Floating-point complex magnitude squared
4599    * @param[in]  *pSrc points to the complex input vector
4600    * @param[out]  *pDst points to the real output vector
4601    * @param[in]  numSamples number of complex samples in the input vector
4602    * @return none.
4603    */
4604 
4605   void arm_cmplx_mag_squared_f32(
4606 				  float32_t * pSrc,
4607 				 float32_t * pDst,
4608 				 uint32_t numSamples);
4609 
4610   /**
4611    * @brief  Q31 complex magnitude squared
4612    * @param[in]  *pSrc points to the complex input vector
4613    * @param[out]  *pDst points to the real output vector
4614    * @param[in]  numSamples number of complex samples in the input vector
4615    * @return none.
4616    */
4617 
4618   void arm_cmplx_mag_squared_q31(
4619 				  q31_t * pSrc,
4620 				 q31_t * pDst,
4621 				 uint32_t numSamples);
4622 
4623   /**
4624    * @brief  Q15 complex magnitude squared
4625    * @param[in]  *pSrc points to the complex input vector
4626    * @param[out]  *pDst points to the real output vector
4627    * @param[in]  numSamples number of complex samples in the input vector
4628    * @return none.
4629    */
4630 
4631   void arm_cmplx_mag_squared_q15(
4632 				  q15_t * pSrc,
4633 				 q15_t * pDst,
4634 				 uint32_t numSamples);
4635 
4636 
4637  /**
4638    * @ingroup groupController
4639    */
4640 
4641   /**
4642    * @defgroup PID PID Motor Control
4643    *
4644    * A Proportional Integral Derivative (PID) controller is a generic feedback control
4645    * loop mechanism widely used in industrial control systems.
4646    * A PID controller is the most commonly used type of feedback controller.
4647    *
4648    * This set of functions implements (PID) controllers
4649    * for Q15, Q31, and floating-point data types.  The functions operate on a single sample
4650    * of data and each call to the function returns a single processed value.
4651    * <code>S</code> points to an instance of the PID control data structure.  <code>in</code>
4652    * is the input sample value. The functions return the output value.
4653    *
4654    * \par Algorithm:
4655    * <pre>
4656    *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
4657    *    A0 = Kp + Ki + Kd
4658    *    A1 = (-Kp ) - (2 * Kd )
4659    *    A2 = Kd  </pre>
4660    *
4661    * \par
4662    * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
4663    *
4664    * \par
4665    * \image html PID.gif "Proportional Integral Derivative Controller"
4666    *
4667    * \par
4668    * The PID controller calculates an "error" value as the difference between
4669    * the measured output and the reference input.
4670    * The controller attempts to minimize the error by adjusting the process control inputs.
4671    * The proportional value determines the reaction to the current error,
4672    * the integral value determines the reaction based on the sum of recent errors,
4673    * and the derivative value determines the reaction based on the rate at which the error has been changing.
4674    *
4675    * \par Instance Structure
4676    * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
4677    * A separate instance structure must be defined for each PID Controller.
4678    * There are separate instance structure declarations for each of the 3 supported data types.
4679    *
4680    * \par Reset Functions
4681    * There is also an associated reset function for each data type which clears the state array.
4682    *
4683    * \par Initialization Functions
4684    * There is also an associated initialization function for each data type.
4685    * The initialization function performs the following operations:
4686    * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
4687    * - Zeros out the values in the state buffer.
4688    *
4689    * \par
4690    * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.
4691    *
4692    * \par Fixed-Point Behavior
4693    * Care must be taken when using the fixed-point versions of the PID Controller functions.
4694    * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
4695    * Refer to the function specific documentation below for usage guidelines.
4696    */
4697 
4698   /**
4699    * @addtogroup PID
4700    * @{
4701    */
4702 
4703   /**
4704    * @brief  Process function for the floating-point PID Control.
4705    * @param[in,out] *S is an instance of the floating-point PID Control structure
4706    * @param[in] in input sample to process
4707    * @return out processed output sample.
4708    */
4709 
4710 
arm_pid_f32(arm_pid_instance_f32 * S,float32_t in)4711   static __INLINE float32_t arm_pid_f32(
4712 					arm_pid_instance_f32 * S,
4713 					float32_t in)
4714   {
4715     float32_t out;
4716 
4717     /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
4718     out = (S->A0 * in) +
4719       (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
4720 
4721     /* Update state */
4722     S->state[1] = S->state[0];
4723     S->state[0] = in;
4724     S->state[2] = out;
4725 
4726     /* return to application */
4727     return (out);
4728 
4729   }
4730 
4731   /**
4732    * @brief  Process function for the Q31 PID Control.
4733    * @param[in,out] *S points to an instance of the Q31 PID Control structure
4734    * @param[in] in input sample to process
4735    * @return out processed output sample.
4736    *
4737    * <b>Scaling and Overflow Behavior:</b>
4738    * \par
4739    * The function is implemented using an internal 64-bit accumulator.
4740    * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
4741    * Thus, if the accumulator result overflows it wraps around rather than clip.
4742    * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
4743    * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
4744    */
4745 
arm_pid_q31(arm_pid_instance_q31 * S,q31_t in)4746   static __INLINE q31_t arm_pid_q31(
4747 				    arm_pid_instance_q31 * S,
4748 				    q31_t in)
4749   {
4750     q63_t acc;
4751 	q31_t out;
4752 
4753     /* acc = A0 * x[n]  */
4754     acc = (q63_t) S->A0 * in;
4755 
4756     /* acc += A1 * x[n-1] */
4757     acc += (q63_t) S->A1 * S->state[0];
4758 
4759     /* acc += A2 * x[n-2]  */
4760     acc += (q63_t) S->A2 * S->state[1];
4761 
4762     /* convert output to 1.31 format to add y[n-1] */
4763     out = (q31_t) (acc >> 31u);
4764 
4765     /* out += y[n-1] */
4766     out += S->state[2];
4767 
4768     /* Update state */
4769     S->state[1] = S->state[0];
4770     S->state[0] = in;
4771     S->state[2] = out;
4772 
4773     /* return to application */
4774     return (out);
4775 
4776   }
4777 
4778   /**
4779    * @brief  Process function for the Q15 PID Control.
4780    * @param[in,out] *S points to an instance of the Q15 PID Control structure
4781    * @param[in] in input sample to process
4782    * @return out processed output sample.
4783    *
4784    * <b>Scaling and Overflow Behavior:</b>
4785    * \par
4786    * The function is implemented using a 64-bit internal accumulator.
4787    * Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
4788    * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
4789    * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
4790    * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
4791    * Lastly, the accumulator is saturated to yield a result in 1.15 format.
4792    */
4793 
arm_pid_q15(arm_pid_instance_q15 * S,q15_t in)4794   static __INLINE q15_t arm_pid_q15(
4795 				    arm_pid_instance_q15 * S,
4796 				    q15_t in)
4797   {
4798     q63_t acc;
4799     q15_t out;
4800 
4801     /* Implementation of PID controller */
4802 
4803 	#ifdef ARM_MATH_CM0
4804 
4805  	/* acc = A0 * x[n]  */
4806 	acc = ((q31_t) S->A0 )* in ;
4807 
4808     #else
4809 
4810     /* acc = A0 * x[n]  */
4811     acc = (q31_t) __SMUAD(S->A0, in);
4812 
4813 	#endif
4814 
4815 	#ifdef ARM_MATH_CM0
4816 
4817 	/* acc += A1 * x[n-1] + A2 * x[n-2]  */
4818 	acc += (q31_t) S->A1  *  S->state[0] ;
4819 	acc += (q31_t) S->A2  *  S->state[1] ;
4820 
4821 	#else
4822 
4823     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
4824     acc = __SMLALD(S->A1, (q31_t)__SIMD32(S->state), acc);
4825 
4826 	#endif
4827 
4828     /* acc += y[n-1] */
4829     acc += (q31_t) S->state[2] << 15;
4830 
4831     /* saturate the output */
4832     out = (q15_t) (__SSAT((acc >> 15), 16));
4833 
4834     /* Update state */
4835     S->state[1] = S->state[0];
4836     S->state[0] = in;
4837     S->state[2] = out;
4838 
4839     /* return to application */
4840     return (out);
4841 
4842   }
4843 
4844   /**
4845    * @} end of PID group
4846    */
4847 
4848 
4849   /**
4850    * @brief Floating-point matrix inverse.
4851    * @param[in]  *src points to the instance of the input floating-point matrix structure.
4852    * @param[out] *dst points to the instance of the output floating-point matrix structure.
4853    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
4854    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
4855    */
4856 
4857   arm_status arm_mat_inverse_f32(
4858 				 const arm_matrix_instance_f32 * src,
4859 				 arm_matrix_instance_f32 * dst);
4860 
4861 
4862 
4863   /**
4864    * @ingroup groupController
4865    */
4866 
4867 
4868   /**
4869    * @defgroup clarke Vector Clarke Transform
4870    * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
4871    * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
4872    * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
4873    * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
4874    * \image html clarke.gif Stator current space vector and its components in (a,b).
4875    * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
4876    * can be calculated using only <code>Ia</code> and <code>Ib</code>.
4877    *
4878    * The function operates on a single sample of data and each call to the function returns the processed output.
4879    * The library provides separate functions for Q31 and floating-point data types.
4880    * \par Algorithm
4881    * \image html clarkeFormula.gif
4882    * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
4883    * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
4884    * \par Fixed-Point Behavior
4885    * Care must be taken when using the Q31 version of the Clarke transform.
4886    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
4887    * Refer to the function specific documentation below for usage guidelines.
4888    */
4889 
4890   /**
4891    * @addtogroup clarke
4892    * @{
4893    */
4894 
4895   /**
4896    *
4897    * @brief  Floating-point Clarke transform
4898    * @param[in]       Ia       input three-phase coordinate <code>a</code>
4899    * @param[in]       Ib       input three-phase coordinate <code>b</code>
4900    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
4901    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
4902    * @return none.
4903    */
4904 
arm_clarke_f32(float32_t Ia,float32_t Ib,float32_t * pIalpha,float32_t * pIbeta)4905   static __INLINE void arm_clarke_f32(
4906 				      float32_t Ia,
4907 				      float32_t Ib,
4908 				      float32_t * pIalpha,
4909 				      float32_t * pIbeta)
4910   {
4911     /* Calculate pIalpha using the equation, pIalpha = Ia */
4912     *pIalpha = Ia;
4913 
4914     /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
4915     *pIbeta = ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
4916 
4917   }
4918 
4919   /**
4920    * @brief  Clarke transform for Q31 version
4921    * @param[in]       Ia       input three-phase coordinate <code>a</code>
4922    * @param[in]       Ib       input three-phase coordinate <code>b</code>
4923    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
4924    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
4925    * @return none.
4926    *
4927    * <b>Scaling and Overflow Behavior:</b>
4928    * \par
4929    * The function is implemented using an internal 32-bit accumulator.
4930    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
4931    * There is saturation on the addition, hence there is no risk of overflow.
4932    */
4933 
arm_clarke_q31(q31_t Ia,q31_t Ib,q31_t * pIalpha,q31_t * pIbeta)4934   static __INLINE void arm_clarke_q31(
4935 				      q31_t Ia,
4936 				      q31_t Ib,
4937 				      q31_t * pIalpha,
4938 				      q31_t * pIbeta)
4939   {
4940     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
4941 
4942     /* Calculating pIalpha from Ia by equation pIalpha = Ia */
4943     *pIalpha = Ia;
4944 
4945     /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
4946     product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
4947 
4948     /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
4949     product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
4950 
4951     /* pIbeta is calculated by adding the intermediate products */
4952     *pIbeta = __QADD(product1, product2);
4953   }
4954 
4955   /**
4956    * @} end of clarke group
4957    */
4958 
4959   /**
4960    * @brief  Converts the elements of the Q7 vector to Q31 vector.
4961    * @param[in]  *pSrc     input pointer
4962    * @param[out]  *pDst    output pointer
4963    * @param[in]  blockSize number of samples to process
4964    * @return none.
4965    */
4966   void arm_q7_to_q31(
4967 		     q7_t * pSrc,
4968 		     q31_t * pDst,
4969 		     uint32_t blockSize);
4970 
4971 
4972 
4973 
4974   /**
4975    * @ingroup groupController
4976    */
4977 
4978   /**
4979    * @defgroup inv_clarke Vector Inverse Clarke Transform
4980    * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
4981    *
4982    * The function operates on a single sample of data and each call to the function returns the processed output.
4983    * The library provides separate functions for Q31 and floating-point data types.
4984    * \par Algorithm
4985    * \image html clarkeInvFormula.gif
4986    * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
4987    * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
4988    * \par Fixed-Point Behavior
4989    * Care must be taken when using the Q31 version of the Clarke transform.
4990    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
4991    * Refer to the function specific documentation below for usage guidelines.
4992    */
4993 
4994   /**
4995    * @addtogroup inv_clarke
4996    * @{
4997    */
4998 
4999    /**
5000    * @brief  Floating-point Inverse Clarke transform
5001    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
5002    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
5003    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
5004    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
5005    * @return none.
5006    */
5007 
5008 
arm_inv_clarke_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pIa,float32_t * pIb)5009   static __INLINE void arm_inv_clarke_f32(
5010 					  float32_t Ialpha,
5011 					  float32_t Ibeta,
5012 					  float32_t * pIa,
5013 					  float32_t * pIb)
5014   {
5015     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5016     *pIa = Ialpha;
5017 
5018     /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
5019     *pIb = -0.5 * Ialpha + (float32_t) 0.8660254039 *Ibeta;
5020 
5021   }
5022 
5023   /**
5024    * @brief  Inverse Clarke transform for Q31 version
5025    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
5026    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
5027    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
5028    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
5029    * @return none.
5030    *
5031    * <b>Scaling and Overflow Behavior:</b>
5032    * \par
5033    * The function is implemented using an internal 32-bit accumulator.
5034    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5035    * There is saturation on the subtraction, hence there is no risk of overflow.
5036    */
5037 
arm_inv_clarke_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pIa,q31_t * pIb)5038   static __INLINE void arm_inv_clarke_q31(
5039 					  q31_t Ialpha,
5040 					  q31_t Ibeta,
5041 					  q31_t * pIa,
5042 					  q31_t * pIb)
5043   {
5044     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5045 
5046     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5047     *pIa = Ialpha;
5048 
5049     /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
5050     product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
5051 
5052     /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
5053     product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
5054 
5055     /* pIb is calculated by subtracting the products */
5056     *pIb = __QSUB(product2, product1);
5057 
5058   }
5059 
5060   /**
5061    * @} end of inv_clarke group
5062    */
5063 
5064   /**
5065    * @brief  Converts the elements of the Q7 vector to Q15 vector.
5066    * @param[in]  *pSrc     input pointer
5067    * @param[out] *pDst     output pointer
5068    * @param[in]  blockSize number of samples to process
5069    * @return none.
5070    */
5071   void arm_q7_to_q15(
5072 		      q7_t * pSrc,
5073 		     q15_t * pDst,
5074 		     uint32_t blockSize);
5075 
5076 
5077 
5078   /**
5079    * @ingroup groupController
5080    */
5081 
5082   /**
5083    * @defgroup park Vector Park Transform
5084    *
5085    * Forward Park transform converts the input two-coordinate vector to flux and torque components.
5086    * The Park transform can be used to realize the transformation of the <code>Ialpha</code> and the <code>Ibeta</code> currents
5087    * from the stationary to the moving reference frame and control the spatial relationship between
5088    * the stator vector current and rotor flux vector.
5089    * If we consider the d axis aligned with the rotor flux, the diagram below shows the
5090    * current vector and the relationship from the two reference frames:
5091    * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
5092    *
5093    * The function operates on a single sample of data and each call to the function returns the processed output.
5094    * The library provides separate functions for Q31 and floating-point data types.
5095    * \par Algorithm
5096    * \image html parkFormula.gif
5097    * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
5098    * <code>pId</code> and <code>pIq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5099    * cosine and sine values of theta (rotor flux position).
5100    * \par Fixed-Point Behavior
5101    * Care must be taken when using the Q31 version of the Park transform.
5102    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5103    * Refer to the function specific documentation below for usage guidelines.
5104    */
5105 
5106   /**
5107    * @addtogroup park
5108    * @{
5109    */
5110 
5111   /**
5112    * @brief Floating-point Park transform
5113    * @param[in]       Ialpha input two-phase vector coordinate alpha
5114    * @param[in]       Ibeta  input two-phase vector coordinate beta
5115    * @param[out]      *pId   points to output	rotor reference frame d
5116    * @param[out]      *pIq   points to output	rotor reference frame q
5117    * @param[in]       sinVal sine value of rotation angle theta
5118    * @param[in]       cosVal cosine value of rotation angle theta
5119    * @return none.
5120    *
5121    * The function implements the forward Park transform.
5122    *
5123    */
5124 
arm_park_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pId,float32_t * pIq,float32_t sinVal,float32_t cosVal)5125   static __INLINE void arm_park_f32(
5126 				    float32_t Ialpha,
5127 				    float32_t Ibeta,
5128 				    float32_t * pId,
5129 				    float32_t * pIq,
5130 				    float32_t sinVal,
5131 				    float32_t cosVal)
5132   {
5133     /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
5134     *pId = Ialpha * cosVal + Ibeta * sinVal;
5135 
5136     /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
5137     *pIq = -Ialpha * sinVal + Ibeta * cosVal;
5138 
5139   }
5140 
5141   /**
5142    * @brief  Park transform for Q31 version
5143    * @param[in]       Ialpha input two-phase vector coordinate alpha
5144    * @param[in]       Ibeta  input two-phase vector coordinate beta
5145    * @param[out]      *pId   points to output rotor reference frame d
5146    * @param[out]      *pIq   points to output rotor reference frame q
5147    * @param[in]       sinVal sine value of rotation angle theta
5148    * @param[in]       cosVal cosine value of rotation angle theta
5149    * @return none.
5150    *
5151    * <b>Scaling and Overflow Behavior:</b>
5152    * \par
5153    * The function is implemented using an internal 32-bit accumulator.
5154    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5155    * There is saturation on the addition and subtraction, hence there is no risk of overflow.
5156    */
5157 
5158 
arm_park_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pId,q31_t * pIq,q31_t sinVal,q31_t cosVal)5159   static __INLINE void arm_park_q31(
5160 				    q31_t Ialpha,
5161 				    q31_t Ibeta,
5162 				    q31_t * pId,
5163 				    q31_t * pIq,
5164 				    q31_t sinVal,
5165 				    q31_t cosVal)
5166   {
5167     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5168     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5169 
5170     /* Intermediate product is calculated by (Ialpha * cosVal) */
5171     product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
5172 
5173     /* Intermediate product is calculated by (Ibeta * sinVal) */
5174     product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
5175 
5176 
5177     /* Intermediate product is calculated by (Ialpha * sinVal) */
5178     product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
5179 
5180     /* Intermediate product is calculated by (Ibeta * cosVal) */
5181     product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
5182 
5183     /* Calculate pId by adding the two intermediate products 1 and 2 */
5184     *pId = __QADD(product1, product2);
5185 
5186     /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
5187     *pIq = __QSUB(product4, product3);
5188   }
5189 
5190   /**
5191    * @} end of park group
5192    */
5193 
5194   /**
5195    * @brief  Converts the elements of the Q7 vector to floating-point vector.
5196    * @param[in]  *pSrc is input pointer
5197    * @param[out]  *pDst is output pointer
5198    * @param[in]  blockSize is the number of samples to process
5199    * @return none.
5200    */
5201   void arm_q7_to_float(
5202 		        q7_t * pSrc,
5203 		       float32_t * pDst,
5204 		       uint32_t blockSize);
5205 
5206 
5207   /**
5208    * @ingroup groupController
5209    */
5210 
5211   /**
5212    * @defgroup inv_park Vector Inverse Park transform
5213    * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
5214    *
5215    * The function operates on a single sample of data and each call to the function returns the processed output.
5216    * The library provides separate functions for Q31 and floating-point data types.
5217    * \par Algorithm
5218    * \image html parkInvFormula.gif
5219    * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
5220    * <code>Id</code> and <code>Iq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5221    * cosine and sine values of theta (rotor flux position).
5222    * \par Fixed-Point Behavior
5223    * Care must be taken when using the Q31 version of the Park transform.
5224    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5225    * Refer to the function specific documentation below for usage guidelines.
5226    */
5227 
5228   /**
5229    * @addtogroup inv_park
5230    * @{
5231    */
5232 
5233    /**
5234    * @brief  Floating-point Inverse Park transform
5235    * @param[in]       Id        input coordinate of rotor reference frame d
5236    * @param[in]       Iq        input coordinate of rotor reference frame q
5237    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
5238    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
5239    * @param[in]       sinVal    sine value of rotation angle theta
5240    * @param[in]       cosVal    cosine value of rotation angle theta
5241    * @return none.
5242    */
5243 
arm_inv_park_f32(float32_t Id,float32_t Iq,float32_t * pIalpha,float32_t * pIbeta,float32_t sinVal,float32_t cosVal)5244   static __INLINE void arm_inv_park_f32(
5245 					float32_t Id,
5246 					float32_t Iq,
5247 					float32_t * pIalpha,
5248 					float32_t * pIbeta,
5249 					float32_t sinVal,
5250 					float32_t cosVal)
5251   {
5252     /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
5253     *pIalpha = Id * cosVal - Iq * sinVal;
5254 
5255     /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
5256     *pIbeta = Id * sinVal + Iq * cosVal;
5257 
5258   }
5259 
5260 
5261   /**
5262    * @brief  Inverse Park transform for	Q31 version
5263    * @param[in]       Id        input coordinate of rotor reference frame d
5264    * @param[in]       Iq        input coordinate of rotor reference frame q
5265    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
5266    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
5267    * @param[in]       sinVal    sine value of rotation angle theta
5268    * @param[in]       cosVal    cosine value of rotation angle theta
5269    * @return none.
5270    *
5271    * <b>Scaling and Overflow Behavior:</b>
5272    * \par
5273    * The function is implemented using an internal 32-bit accumulator.
5274    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5275    * There is saturation on the addition, hence there is no risk of overflow.
5276    */
5277 
5278 
arm_inv_park_q31(q31_t Id,q31_t Iq,q31_t * pIalpha,q31_t * pIbeta,q31_t sinVal,q31_t cosVal)5279   static __INLINE void arm_inv_park_q31(
5280 					q31_t Id,
5281 					q31_t Iq,
5282 					q31_t * pIalpha,
5283 					q31_t * pIbeta,
5284 					q31_t sinVal,
5285 					q31_t cosVal)
5286   {
5287     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5288     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5289 
5290     /* Intermediate product is calculated by (Id * cosVal) */
5291     product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
5292 
5293     /* Intermediate product is calculated by (Iq * sinVal) */
5294     product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
5295 
5296 
5297     /* Intermediate product is calculated by (Id * sinVal) */
5298     product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
5299 
5300     /* Intermediate product is calculated by (Iq * cosVal) */
5301     product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
5302 
5303     /* Calculate pIalpha by using the two intermediate products 1 and 2 */
5304     *pIalpha = __QSUB(product1, product2);
5305 
5306     /* Calculate pIbeta by using the two intermediate products 3 and 4 */
5307     *pIbeta = __QADD(product4, product3);
5308 
5309   }
5310 
5311   /**
5312    * @} end of Inverse park group
5313    */
5314 
5315 
5316   /**
5317    * @brief  Converts the elements of the Q31 vector to floating-point vector.
5318    * @param[in]  *pSrc is input pointer
5319    * @param[out]  *pDst is output pointer
5320    * @param[in]  blockSize is the number of samples to process
5321    * @return none.
5322    */
5323   void arm_q31_to_float(
5324 			 q31_t * pSrc,
5325 			float32_t * pDst,
5326 			uint32_t blockSize);
5327 
5328   /**
5329    * @ingroup groupInterpolation
5330    */
5331 
5332   /**
5333    * @defgroup LinearInterpolate Linear Interpolation
5334    *
5335    * Linear interpolation is a method of curve fitting using linear polynomials.
5336    * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line
5337    *
5338    * \par
5339    * \image html LinearInterp.gif "Linear interpolation"
5340    *
5341    * \par
5342    * A  Linear Interpolate function calculates an output value(y), for the input(x)
5343    * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values)
5344    *
5345    * \par Algorithm:
5346    * <pre>
5347    *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
5348    *       where x0, x1 are nearest values of input x
5349    *             y0, y1 are nearest values to output y
5350    * </pre>
5351    *
5352    * \par
5353    * This set of functions implements Linear interpolation process
5354    * for Q7, Q15, Q31, and floating-point data types.  The functions operate on a single
5355    * sample of data and each call to the function returns a single processed value.
5356    * <code>S</code> points to an instance of the Linear Interpolate function data structure.
5357    * <code>x</code> is the input sample value. The functions returns the output value.
5358    *
5359    * \par
5360    * if x is outside of the table boundary, Linear interpolation returns first value of the table
5361    * if x is below input range and returns last value of table if x is above range.
5362    */
5363 
5364   /**
5365    * @addtogroup LinearInterpolate
5366    * @{
5367    */
5368 
5369   /**
5370    * @brief  Process function for the floating-point Linear Interpolation Function.
5371    * @param[in,out] *S is an instance of the floating-point Linear Interpolation structure
5372    * @param[in] x input sample to process
5373    * @return y processed output sample.
5374    *
5375    */
5376 
arm_linear_interp_f32(arm_linear_interp_instance_f32 * S,float32_t x)5377   static __INLINE float32_t arm_linear_interp_f32(
5378 						  arm_linear_interp_instance_f32 * S,
5379 						  float32_t x)
5380   {
5381 
5382 	  float32_t y;
5383 	  float32_t x0, x1;						/* Nearest input values */
5384 	  float32_t y0, y1;	  					/* Nearest output values */
5385 	  float32_t xSpacing = S->xSpacing;		/* spacing between input values */
5386 	  int32_t i;  							/* Index variable */
5387 	  float32_t *pYData = S->pYData;	    /* pointer to output table */
5388 
5389 	  /* Calculation of index */
5390 	  i =   (x - S->x1) / xSpacing;
5391 
5392 	  if(i < 0)
5393 	  {
5394 	     /* Iniatilize output for below specified range as least output value of table */
5395 		 y = pYData[0];
5396 	  }
5397 	  else if(i >= S->nValues)
5398 	  {
5399 	  	  /* Iniatilize output for above specified range as last output value of table */
5400 	  	  y = pYData[S->nValues-1];
5401 	  }
5402 	  else
5403 	  {
5404 	  	  /* Calculation of nearest input values */
5405 		  x0 = S->x1 + i * xSpacing;
5406 		  x1 = S->x1 + (i +1) * xSpacing;
5407 
5408 		 /* Read of nearest output values */
5409 		  y0 = pYData[i];
5410 		  y1 = pYData[i + 1];
5411 
5412 		  /* Calculation of output */
5413 		  y = y0 + (x - x0) * ((y1 - y0)/(x1-x0));
5414 
5415 	  }
5416 
5417       /* returns output value */
5418 	  return (y);
5419   }
5420 
5421    /**
5422    *
5423    * @brief  Process function for the Q31 Linear Interpolation Function.
5424    * @param[in] *pYData  pointer to Q31 Linear Interpolation table
5425    * @param[in] x input sample to process
5426    * @param[in] nValues number of table values
5427    * @return y processed output sample.
5428    *
5429    * \par
5430    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5431    * This function can support maximum of table size 2^12.
5432    *
5433    */
5434 
5435 
arm_linear_interp_q31(q31_t * pYData,q31_t x,uint32_t nValues)5436   static __INLINE q31_t arm_linear_interp_q31(q31_t *pYData,
5437 					      q31_t x, uint32_t nValues)
5438   {
5439     q31_t y;                                   /* output */
5440     q31_t y0, y1;                                /* Nearest output values */
5441     q31_t fract;                                 /* fractional part */
5442     int32_t index;                              /* Index to read nearest output values */
5443 
5444     /* Input is in 12.20 format */
5445     /* 12 bits for the table index */
5446     /* Index value calculation */
5447     index = ((x & 0xFFF00000) >> 20);
5448 
5449 	if(index >= (nValues - 1))
5450 	{
5451 		return(pYData[nValues - 1]);
5452 	}
5453 	else if(index < 0)
5454 	{
5455 		return(pYData[0]);
5456 	}
5457 	else
5458 	{
5459 
5460 	    /* 20 bits for the fractional part */
5461 	    /* shift left by 11 to keep fract in 1.31 format */
5462 	    fract = (x & 0x000FFFFF) << 11;
5463 
5464 	    /* Read two nearest output values from the index in 1.31(q31) format */
5465 	    y0 = pYData[index];
5466 	    y1 = pYData[index + 1u];
5467 
5468 	    /* Calculation of y0 * (1-fract) and y is in 2.30 format */
5469 	    y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32));
5470 
5471 	    /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
5472 	    y += ((q31_t) (((q63_t) y1 * fract) >> 32));
5473 
5474 	    /* Convert y to 1.31 format */
5475 	    return (y << 1u);
5476 
5477 	}
5478 
5479   }
5480 
5481   /**
5482    *
5483    * @brief  Process function for the Q15 Linear Interpolation Function.
5484    * @param[in] *pYData  pointer to Q15 Linear Interpolation table
5485    * @param[in] x input sample to process
5486    * @param[in] nValues number of table values
5487    * @return y processed output sample.
5488    *
5489    * \par
5490    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5491    * This function can support maximum of table size 2^12.
5492    *
5493    */
5494 
5495 
arm_linear_interp_q15(q15_t * pYData,q31_t x,uint32_t nValues)5496   static __INLINE q15_t arm_linear_interp_q15(q15_t *pYData, q31_t x, uint32_t nValues)
5497   {
5498     q63_t y;                                   /* output */
5499     q15_t y0, y1;                              /* Nearest output values */
5500     q31_t fract;                               /* fractional part */
5501     int32_t index;                            /* Index to read nearest output values */
5502 
5503     /* Input is in 12.20 format */
5504     /* 12 bits for the table index */
5505     /* Index value calculation */
5506     index = ((x & 0xFFF00000) >> 20u);
5507 
5508 	if(index >= (nValues - 1))
5509 	{
5510 		return(pYData[nValues - 1]);
5511 	}
5512 	else if(index < 0)
5513 	{
5514 		return(pYData[0]);
5515 	}
5516 	else
5517 	{
5518 	    /* 20 bits for the fractional part */
5519 	    /* fract is in 12.20 format */
5520 	    fract = (x & 0x000FFFFF);
5521 
5522 	    /* Read two nearest output values from the index */
5523 	    y0 = pYData[index];
5524 	    y1 = pYData[index + 1u];
5525 
5526 	    /* Calculation of y0 * (1-fract) and y is in 13.35 format */
5527 	    y = ((q63_t) y0 * (0xFFFFF - fract));
5528 
5529 	    /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
5530 	    y += ((q63_t) y1 * (fract));
5531 
5532 	    /* convert y to 1.15 format */
5533 	    return (y >> 20);
5534 	}
5535 
5536 
5537   }
5538 
5539   /**
5540    *
5541    * @brief  Process function for the Q7 Linear Interpolation Function.
5542    * @param[in] *pYData  pointer to Q7 Linear Interpolation table
5543    * @param[in] x input sample to process
5544    * @param[in] nValues number of table values
5545    * @return y processed output sample.
5546    *
5547    * \par
5548    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5549    * This function can support maximum of table size 2^12.
5550    */
5551 
5552 
arm_linear_interp_q7(q7_t * pYData,q31_t x,uint32_t nValues)5553   static __INLINE q7_t arm_linear_interp_q7(q7_t *pYData, q31_t x,  uint32_t nValues)
5554   {
5555     q31_t y;                                   /* output */
5556     q7_t y0, y1;                                 /* Nearest output values */
5557     q31_t fract;                                 /* fractional part */
5558     int32_t index;                              /* Index to read nearest output values */
5559 
5560     /* Input is in 12.20 format */
5561     /* 12 bits for the table index */
5562     /* Index value calculation */
5563     index = ((x & 0xFFF00000) >> 20u);
5564 
5565 
5566     if(index >= (nValues - 1))
5567 	{
5568 		return(pYData[nValues - 1]);
5569 	}
5570 	else if(index < 0)
5571 	{
5572 		return(pYData[0]);
5573 	}
5574 	else
5575 	{
5576 
5577 	    /* 20 bits for the fractional part */
5578 	    /* fract is in 12.20 format */
5579 	    fract = (x & 0x000FFFFF);
5580 
5581 	    /* Read two nearest output values from the index and are in 1.7(q7) format */
5582 	    y0 = pYData[index];
5583 	    y1 = pYData[index + 1u];
5584 
5585 	    /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */
5586 	    y = ((y0 * (0xFFFFF - fract)));
5587 
5588 	    /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
5589 	    y += (y1 * fract);
5590 
5591 	    /* convert y to 1.7(q7) format */
5592 	    return (y >> 20u);
5593 
5594 	}
5595 
5596   }
5597   /**
5598    * @} end of LinearInterpolate group
5599    */
5600 
5601   /**
5602    * @brief  Fast approximation to the trigonometric sine function for floating-point data.
5603    * @param[in] x input value in radians.
5604    * @return  sin(x).
5605    */
5606 
5607   float32_t arm_sin_f32(
5608 			 float32_t x);
5609 
5610   /**
5611    * @brief  Fast approximation to the trigonometric sine function for Q31 data.
5612    * @param[in] x Scaled input value in radians.
5613    * @return  sin(x).
5614    */
5615 
5616   q31_t arm_sin_q31(
5617 		     q31_t x);
5618 
5619   /**
5620    * @brief  Fast approximation to the trigonometric sine function for Q15 data.
5621    * @param[in] x Scaled input value in radians.
5622    * @return  sin(x).
5623    */
5624 
5625   q15_t arm_sin_q15(
5626 		     q15_t x);
5627 
5628   /**
5629    * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
5630    * @param[in] x input value in radians.
5631    * @return  cos(x).
5632    */
5633 
5634   float32_t arm_cos_f32(
5635 			 float32_t x);
5636 
5637   /**
5638    * @brief Fast approximation to the trigonometric cosine function for Q31 data.
5639    * @param[in] x Scaled input value in radians.
5640    * @return  cos(x).
5641    */
5642 
5643   q31_t arm_cos_q31(
5644 		     q31_t x);
5645 
5646   /**
5647    * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
5648    * @param[in] x Scaled input value in radians.
5649    * @return  cos(x).
5650    */
5651 
5652   q15_t arm_cos_q15(
5653 		     q15_t x);
5654 
5655 
5656   /**
5657    * @ingroup groupFastMath
5658    */
5659 
5660 
5661   /**
5662    * @defgroup SQRT Square Root
5663    *
5664    * Computes the square root of a number.
5665    * There are separate functions for Q15, Q31, and floating-point data types.
5666    * The square root function is computed using the Newton-Raphson algorithm.
5667    * This is an iterative algorithm of the form:
5668    * <pre>
5669    *      x1 = x0 - f(x0)/f'(x0)
5670    * </pre>
5671    * where <code>x1</code> is the current estimate,
5672    * <code>x0</code> is the previous estimate and
5673    * <code>f'(x0)</code> is the derivative of <code>f()</code> evaluated at <code>x0</code>.
5674    * For the square root function, the algorithm reduces to:
5675    * <pre>
5676    *     x0 = in/2                         [initial guess]
5677    *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
5678    * </pre>
5679    */
5680 
5681 
5682   /**
5683    * @addtogroup SQRT
5684    * @{
5685    */
5686 
5687   /**
5688    * @brief  Floating-point square root function.
5689    * @param[in]  in     input value.
5690    * @param[out] *pOut  square root of input value.
5691    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5692    * <code>in</code> is negative value and returns zero output for negative values.
5693    */
5694 
arm_sqrt_f32(float32_t in,float32_t * pOut)5695   static __INLINE arm_status  arm_sqrt_f32(
5696 					  float32_t in, float32_t *pOut)
5697   {
5698   	if(in > 0)
5699 	{
5700 
5701 //	#if __FPU_USED
5702     #if (__FPU_USED == 1) && defined ( __CC_ARM   )
5703 		*pOut = __sqrtf(in);
5704 	#else
5705 		*pOut = sqrtf(in);
5706 	#endif
5707 
5708 		return (ARM_MATH_SUCCESS);
5709 	}
5710   	else
5711 	{
5712 		*pOut = 0.0f;
5713 		return (ARM_MATH_ARGUMENT_ERROR);
5714 	}
5715 
5716   }
5717 
5718 
5719   /**
5720    * @brief Q31 square root function.
5721    * @param[in]   in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
5722    * @param[out]  *pOut square root of input value.
5723    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5724    * <code>in</code> is negative value and returns zero output for negative values.
5725    */
5726   arm_status arm_sqrt_q31(
5727 		      q31_t in, q31_t *pOut);
5728 
5729   /**
5730    * @brief  Q15 square root function.
5731    * @param[in]   in     input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
5732    * @param[out]  *pOut  square root of input value.
5733    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5734    * <code>in</code> is negative value and returns zero output for negative values.
5735    */
5736   arm_status arm_sqrt_q15(
5737 		      q15_t in, q15_t *pOut);
5738 
5739   /**
5740    * @} end of SQRT group
5741    */
5742 
5743 
5744 
5745 
5746 
5747 
5748   /**
5749    * @brief floating-point Circular write function.
5750    */
5751 
arm_circularWrite_f32(int32_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const int32_t * src,int32_t srcInc,uint32_t blockSize)5752   static __INLINE void arm_circularWrite_f32(
5753 					     int32_t * circBuffer,
5754 					     int32_t L,
5755 					     uint16_t * writeOffset,
5756 					     int32_t bufferInc,
5757 					     const int32_t * src,
5758 					     int32_t srcInc,
5759 					     uint32_t blockSize)
5760   {
5761     uint32_t i = 0u;
5762     int32_t wOffset;
5763 
5764     /* Copy the value of Index pointer that points
5765      * to the current location where the input samples to be copied */
5766     wOffset = *writeOffset;
5767 
5768     /* Loop over the blockSize */
5769     i = blockSize;
5770 
5771     while(i > 0u)
5772       {
5773 	/* copy the input sample to the circular buffer */
5774 	circBuffer[wOffset] = *src;
5775 
5776 	/* Update the input pointer */
5777 	src += srcInc;
5778 
5779 	/* Circularly update wOffset.  Watch out for positive and negative value */
5780 	wOffset += bufferInc;
5781 	if(wOffset >= L)
5782 	  wOffset -= L;
5783 
5784 	/* Decrement the loop counter */
5785 	i--;
5786       }
5787 
5788     /* Update the index pointer */
5789     *writeOffset = wOffset;
5790   }
5791 
5792 
5793 
5794   /**
5795    * @brief floating-point Circular Read function.
5796    */
arm_circularRead_f32(int32_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,int32_t * dst,int32_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)5797   static __INLINE void arm_circularRead_f32(
5798 					    int32_t * circBuffer,
5799 					    int32_t L,
5800 					    int32_t * readOffset,
5801 					    int32_t bufferInc,
5802 					    int32_t * dst,
5803 					    int32_t * dst_base,
5804 					    int32_t dst_length,
5805 					    int32_t dstInc,
5806 					    uint32_t blockSize)
5807   {
5808     uint32_t i = 0u;
5809     int32_t rOffset, dst_end;
5810 
5811     /* Copy the value of Index pointer that points
5812      * to the current location from where the input samples to be read */
5813     rOffset = *readOffset;
5814     dst_end = (int32_t) (dst_base + dst_length);
5815 
5816     /* Loop over the blockSize */
5817     i = blockSize;
5818 
5819     while(i > 0u)
5820       {
5821 	/* copy the sample from the circular buffer to the destination buffer */
5822 	*dst = circBuffer[rOffset];
5823 
5824 	/* Update the input pointer */
5825 	dst += dstInc;
5826 
5827 	if(dst == (int32_t *) dst_end)
5828 	  {
5829 	    dst = dst_base;
5830 	  }
5831 
5832 	/* Circularly update rOffset.  Watch out for positive and negative value  */
5833 	rOffset += bufferInc;
5834 
5835 	if(rOffset >= L)
5836 	  {
5837 	    rOffset -= L;
5838 	  }
5839 
5840 	/* Decrement the loop counter */
5841 	i--;
5842       }
5843 
5844     /* Update the index pointer */
5845     *readOffset = rOffset;
5846   }
5847 
5848   /**
5849    * @brief Q15 Circular write function.
5850    */
5851 
arm_circularWrite_q15(q15_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q15_t * src,int32_t srcInc,uint32_t blockSize)5852   static __INLINE void arm_circularWrite_q15(
5853 					     q15_t * circBuffer,
5854 					     int32_t L,
5855 					     uint16_t * writeOffset,
5856 					     int32_t bufferInc,
5857 					     const q15_t * src,
5858 					     int32_t srcInc,
5859 					     uint32_t blockSize)
5860   {
5861     uint32_t i = 0u;
5862     int32_t wOffset;
5863 
5864     /* Copy the value of Index pointer that points
5865      * to the current location where the input samples to be copied */
5866     wOffset = *writeOffset;
5867 
5868     /* Loop over the blockSize */
5869     i = blockSize;
5870 
5871     while(i > 0u)
5872       {
5873 	/* copy the input sample to the circular buffer */
5874 	circBuffer[wOffset] = *src;
5875 
5876 	/* Update the input pointer */
5877 	src += srcInc;
5878 
5879 	/* Circularly update wOffset.  Watch out for positive and negative value */
5880 	wOffset += bufferInc;
5881 	if(wOffset >= L)
5882 	  wOffset -= L;
5883 
5884 	/* Decrement the loop counter */
5885 	i--;
5886       }
5887 
5888     /* Update the index pointer */
5889     *writeOffset = wOffset;
5890   }
5891 
5892 
5893 
5894   /**
5895    * @brief Q15 Circular Read function.
5896    */
arm_circularRead_q15(q15_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q15_t * dst,q15_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)5897   static __INLINE void arm_circularRead_q15(
5898 					    q15_t * circBuffer,
5899 					    int32_t L,
5900 					    int32_t * readOffset,
5901 					    int32_t bufferInc,
5902 					    q15_t * dst,
5903 					    q15_t * dst_base,
5904 					    int32_t dst_length,
5905 					    int32_t dstInc,
5906 					    uint32_t blockSize)
5907   {
5908     uint32_t i = 0;
5909     int32_t rOffset, dst_end;
5910 
5911     /* Copy the value of Index pointer that points
5912      * to the current location from where the input samples to be read */
5913     rOffset = *readOffset;
5914 
5915     dst_end = (int32_t) (dst_base + dst_length);
5916 
5917     /* Loop over the blockSize */
5918     i = blockSize;
5919 
5920     while(i > 0u)
5921       {
5922 	/* copy the sample from the circular buffer to the destination buffer */
5923 	*dst = circBuffer[rOffset];
5924 
5925 	/* Update the input pointer */
5926 	dst += dstInc;
5927 
5928 	if(dst == (q15_t *) dst_end)
5929 	  {
5930 	    dst = dst_base;
5931 	  }
5932 
5933 	/* Circularly update wOffset.  Watch out for positive and negative value */
5934 	rOffset += bufferInc;
5935 
5936 	if(rOffset >= L)
5937 	  {
5938 	    rOffset -= L;
5939 	  }
5940 
5941 	/* Decrement the loop counter */
5942 	i--;
5943       }
5944 
5945     /* Update the index pointer */
5946     *readOffset = rOffset;
5947   }
5948 
5949 
5950   /**
5951    * @brief Q7 Circular write function.
5952    */
5953 
arm_circularWrite_q7(q7_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q7_t * src,int32_t srcInc,uint32_t blockSize)5954   static __INLINE void arm_circularWrite_q7(
5955 					    q7_t * circBuffer,
5956 					    int32_t L,
5957 					    uint16_t * writeOffset,
5958 					    int32_t bufferInc,
5959 					    const q7_t * src,
5960 					    int32_t srcInc,
5961 					    uint32_t blockSize)
5962   {
5963     uint32_t i = 0u;
5964     int32_t wOffset;
5965 
5966     /* Copy the value of Index pointer that points
5967      * to the current location where the input samples to be copied */
5968     wOffset = *writeOffset;
5969 
5970     /* Loop over the blockSize */
5971     i = blockSize;
5972 
5973     while(i > 0u)
5974       {
5975 	/* copy the input sample to the circular buffer */
5976 	circBuffer[wOffset] = *src;
5977 
5978 	/* Update the input pointer */
5979 	src += srcInc;
5980 
5981 	/* Circularly update wOffset.  Watch out for positive and negative value */
5982 	wOffset += bufferInc;
5983 	if(wOffset >= L)
5984 	  wOffset -= L;
5985 
5986 	/* Decrement the loop counter */
5987 	i--;
5988       }
5989 
5990     /* Update the index pointer */
5991     *writeOffset = wOffset;
5992   }
5993 
5994 
5995 
5996   /**
5997    * @brief Q7 Circular Read function.
5998    */
arm_circularRead_q7(q7_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q7_t * dst,q7_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)5999   static __INLINE void arm_circularRead_q7(
6000 					   q7_t * circBuffer,
6001 					   int32_t L,
6002 					   int32_t * readOffset,
6003 					   int32_t bufferInc,
6004 					   q7_t * dst,
6005 					   q7_t * dst_base,
6006 					   int32_t dst_length,
6007 					   int32_t dstInc,
6008 					   uint32_t blockSize)
6009   {
6010     uint32_t i = 0;
6011     int32_t rOffset, dst_end;
6012 
6013     /* Copy the value of Index pointer that points
6014      * to the current location from where the input samples to be read */
6015     rOffset = *readOffset;
6016 
6017     dst_end = (int32_t) (dst_base + dst_length);
6018 
6019     /* Loop over the blockSize */
6020     i = blockSize;
6021 
6022     while(i > 0u)
6023       {
6024 	/* copy the sample from the circular buffer to the destination buffer */
6025 	*dst = circBuffer[rOffset];
6026 
6027 	/* Update the input pointer */
6028 	dst += dstInc;
6029 
6030 	if(dst == (q7_t *) dst_end)
6031 	  {
6032 	    dst = dst_base;
6033 	  }
6034 
6035 	/* Circularly update rOffset.  Watch out for positive and negative value */
6036 	rOffset += bufferInc;
6037 
6038 	if(rOffset >= L)
6039 	  {
6040 	    rOffset -= L;
6041 	  }
6042 
6043 	/* Decrement the loop counter */
6044 	i--;
6045       }
6046 
6047     /* Update the index pointer */
6048     *readOffset = rOffset;
6049   }
6050 
6051 
6052   /**
6053    * @brief  Sum of the squares of the elements of a Q31 vector.
6054    * @param[in]  *pSrc is input pointer
6055    * @param[in]  blockSize is the number of samples to process
6056    * @param[out]  *pResult is output value.
6057    * @return none.
6058    */
6059 
6060   void arm_power_q31(
6061 		      q31_t * pSrc,
6062 		     uint32_t blockSize,
6063 		     q63_t * pResult);
6064 
6065   /**
6066    * @brief  Sum of the squares of the elements of a floating-point vector.
6067    * @param[in]  *pSrc is input pointer
6068    * @param[in]  blockSize is the number of samples to process
6069    * @param[out]  *pResult is output value.
6070    * @return none.
6071    */
6072 
6073   void arm_power_f32(
6074 		      float32_t * pSrc,
6075 		     uint32_t blockSize,
6076 		     float32_t * pResult);
6077 
6078   /**
6079    * @brief  Sum of the squares of the elements of a Q15 vector.
6080    * @param[in]  *pSrc is input pointer
6081    * @param[in]  blockSize is the number of samples to process
6082    * @param[out]  *pResult is output value.
6083    * @return none.
6084    */
6085 
6086   void arm_power_q15(
6087 		      q15_t * pSrc,
6088 		     uint32_t blockSize,
6089 		     q63_t * pResult);
6090 
6091   /**
6092    * @brief  Sum of the squares of the elements of a Q7 vector.
6093    * @param[in]  *pSrc is input pointer
6094    * @param[in]  blockSize is the number of samples to process
6095    * @param[out]  *pResult is output value.
6096    * @return none.
6097    */
6098 
6099   void arm_power_q7(
6100 		     q7_t * pSrc,
6101 		    uint32_t blockSize,
6102 		    q31_t * pResult);
6103 
6104   /**
6105    * @brief  Mean value of a Q7 vector.
6106    * @param[in]  *pSrc is input pointer
6107    * @param[in]  blockSize is the number of samples to process
6108    * @param[out]  *pResult is output value.
6109    * @return none.
6110    */
6111 
6112   void arm_mean_q7(
6113 		    q7_t * pSrc,
6114 		   uint32_t blockSize,
6115 		   q7_t * pResult);
6116 
6117   /**
6118    * @brief  Mean value of a Q15 vector.
6119    * @param[in]  *pSrc is input pointer
6120    * @param[in]  blockSize is the number of samples to process
6121    * @param[out]  *pResult is output value.
6122    * @return none.
6123    */
6124   void arm_mean_q15(
6125 		     q15_t * pSrc,
6126 		    uint32_t blockSize,
6127 		    q15_t * pResult);
6128 
6129   /**
6130    * @brief  Mean value of a Q31 vector.
6131    * @param[in]  *pSrc is input pointer
6132    * @param[in]  blockSize is the number of samples to process
6133    * @param[out]  *pResult is output value.
6134    * @return none.
6135    */
6136   void arm_mean_q31(
6137 		     q31_t * pSrc,
6138 		    uint32_t blockSize,
6139 		    q31_t * pResult);
6140 
6141   /**
6142    * @brief  Mean value of a floating-point vector.
6143    * @param[in]  *pSrc is input pointer
6144    * @param[in]  blockSize is the number of samples to process
6145    * @param[out]  *pResult is output value.
6146    * @return none.
6147    */
6148   void arm_mean_f32(
6149 		     float32_t * pSrc,
6150 		    uint32_t blockSize,
6151 		    float32_t * pResult);
6152 
6153   /**
6154    * @brief  Variance of the elements of a floating-point vector.
6155    * @param[in]  *pSrc is input pointer
6156    * @param[in]  blockSize is the number of samples to process
6157    * @param[out]  *pResult is output value.
6158    * @return none.
6159    */
6160 
6161   void arm_var_f32(
6162 		    float32_t * pSrc,
6163 		   uint32_t blockSize,
6164 		   float32_t * pResult);
6165 
6166   /**
6167    * @brief  Variance of the elements of a Q31 vector.
6168    * @param[in]  *pSrc is input pointer
6169    * @param[in]  blockSize is the number of samples to process
6170    * @param[out]  *pResult is output value.
6171    * @return none.
6172    */
6173 
6174   void arm_var_q31(
6175 		    q31_t * pSrc,
6176 		   uint32_t blockSize,
6177 		   q63_t * pResult);
6178 
6179   /**
6180    * @brief  Variance of the elements of a Q15 vector.
6181    * @param[in]  *pSrc is input pointer
6182    * @param[in]  blockSize is the number of samples to process
6183    * @param[out]  *pResult is output value.
6184    * @return none.
6185    */
6186 
6187   void arm_var_q15(
6188 		    q15_t * pSrc,
6189 		   uint32_t blockSize,
6190 		   q31_t * pResult);
6191 
6192   /**
6193    * @brief  Root Mean Square of the elements of a floating-point vector.
6194    * @param[in]  *pSrc is input pointer
6195    * @param[in]  blockSize is the number of samples to process
6196    * @param[out]  *pResult is output value.
6197    * @return none.
6198    */
6199 
6200   void arm_rms_f32(
6201 		    float32_t * pSrc,
6202 		   uint32_t blockSize,
6203 		   float32_t * pResult);
6204 
6205   /**
6206    * @brief  Root Mean Square of the elements of a Q31 vector.
6207    * @param[in]  *pSrc is input pointer
6208    * @param[in]  blockSize is the number of samples to process
6209    * @param[out]  *pResult is output value.
6210    * @return none.
6211    */
6212 
6213   void arm_rms_q31(
6214 		    q31_t * pSrc,
6215 		   uint32_t blockSize,
6216 		   q31_t * pResult);
6217 
6218   /**
6219    * @brief  Root Mean Square of the elements of a Q15 vector.
6220    * @param[in]  *pSrc is input pointer
6221    * @param[in]  blockSize is the number of samples to process
6222    * @param[out]  *pResult is output value.
6223    * @return none.
6224    */
6225 
6226   void arm_rms_q15(
6227 		    q15_t * pSrc,
6228 		   uint32_t blockSize,
6229 		   q15_t * pResult);
6230 
6231   /**
6232    * @brief  Standard deviation of the elements of a floating-point vector.
6233    * @param[in]  *pSrc is input pointer
6234    * @param[in]  blockSize is the number of samples to process
6235    * @param[out]  *pResult is output value.
6236    * @return none.
6237    */
6238 
6239   void arm_std_f32(
6240 		    float32_t * pSrc,
6241 		   uint32_t blockSize,
6242 		   float32_t * pResult);
6243 
6244   /**
6245    * @brief  Standard deviation of the elements of a Q31 vector.
6246    * @param[in]  *pSrc is input pointer
6247    * @param[in]  blockSize is the number of samples to process
6248    * @param[out]  *pResult is output value.
6249    * @return none.
6250    */
6251 
6252   void arm_std_q31(
6253 		    q31_t * pSrc,
6254 		   uint32_t blockSize,
6255 		   q31_t * pResult);
6256 
6257   /**
6258    * @brief  Standard deviation of the elements of a Q15 vector.
6259    * @param[in]  *pSrc is input pointer
6260    * @param[in]  blockSize is the number of samples to process
6261    * @param[out]  *pResult is output value.
6262    * @return none.
6263    */
6264 
6265   void arm_std_q15(
6266 		    q15_t * pSrc,
6267 		   uint32_t blockSize,
6268 		   q15_t * pResult);
6269 
6270   /**
6271    * @brief  Floating-point complex magnitude
6272    * @param[in]  *pSrc points to the complex input vector
6273    * @param[out]  *pDst points to the real output vector
6274    * @param[in]  numSamples number of complex samples in the input vector
6275    * @return none.
6276    */
6277 
6278   void arm_cmplx_mag_f32(
6279 			  float32_t * pSrc,
6280 			 float32_t * pDst,
6281 			 uint32_t numSamples);
6282 
6283   /**
6284    * @brief  Q31 complex magnitude
6285    * @param[in]  *pSrc points to the complex input vector
6286    * @param[out]  *pDst points to the real output vector
6287    * @param[in]  numSamples number of complex samples in the input vector
6288    * @return none.
6289    */
6290 
6291   void arm_cmplx_mag_q31(
6292 			  q31_t * pSrc,
6293 			 q31_t * pDst,
6294 			 uint32_t numSamples);
6295 
6296   /**
6297    * @brief  Q15 complex magnitude
6298    * @param[in]  *pSrc points to the complex input vector
6299    * @param[out]  *pDst points to the real output vector
6300    * @param[in]  numSamples number of complex samples in the input vector
6301    * @return none.
6302    */
6303 
6304   void arm_cmplx_mag_q15(
6305 			  q15_t * pSrc,
6306 			 q15_t * pDst,
6307 			 uint32_t numSamples);
6308 
6309   /**
6310    * @brief  Q15 complex dot product
6311    * @param[in]  *pSrcA points to the first input vector
6312    * @param[in]  *pSrcB points to the second input vector
6313    * @param[in]  numSamples number of complex samples in each vector
6314    * @param[out]  *realResult real part of the result returned here
6315    * @param[out]  *imagResult imaginary part of the result returned here
6316    * @return none.
6317    */
6318 
6319   void arm_cmplx_dot_prod_q15(
6320 			       q15_t * pSrcA,
6321 			       q15_t * pSrcB,
6322 			      uint32_t numSamples,
6323 			      q31_t * realResult,
6324 			      q31_t * imagResult);
6325 
6326   /**
6327    * @brief  Q31 complex dot product
6328    * @param[in]  *pSrcA points to the first input vector
6329    * @param[in]  *pSrcB points to the second input vector
6330    * @param[in]  numSamples number of complex samples in each vector
6331    * @param[out]  *realResult real part of the result returned here
6332    * @param[out]  *imagResult imaginary part of the result returned here
6333    * @return none.
6334    */
6335 
6336   void arm_cmplx_dot_prod_q31(
6337 			       q31_t * pSrcA,
6338 			       q31_t * pSrcB,
6339 			      uint32_t numSamples,
6340 			      q63_t * realResult,
6341 			      q63_t * imagResult);
6342 
6343   /**
6344    * @brief  Floating-point complex dot product
6345    * @param[in]  *pSrcA points to the first input vector
6346    * @param[in]  *pSrcB points to the second input vector
6347    * @param[in]  numSamples number of complex samples in each vector
6348    * @param[out]  *realResult real part of the result returned here
6349    * @param[out]  *imagResult imaginary part of the result returned here
6350    * @return none.
6351    */
6352 
6353   void arm_cmplx_dot_prod_f32(
6354 			       float32_t * pSrcA,
6355 			       float32_t * pSrcB,
6356 			      uint32_t numSamples,
6357 			      float32_t * realResult,
6358 			      float32_t * imagResult);
6359 
6360   /**
6361    * @brief  Q15 complex-by-real multiplication
6362    * @param[in]  *pSrcCmplx points to the complex input vector
6363    * @param[in]  *pSrcReal points to the real input vector
6364    * @param[out]  *pCmplxDst points to the complex output vector
6365    * @param[in]  numSamples number of samples in each vector
6366    * @return none.
6367    */
6368 
6369   void arm_cmplx_mult_real_q15(
6370 			        q15_t * pSrcCmplx,
6371 			        q15_t * pSrcReal,
6372 			       q15_t * pCmplxDst,
6373 			       uint32_t numSamples);
6374 
6375   /**
6376    * @brief  Q31 complex-by-real multiplication
6377    * @param[in]  *pSrcCmplx points to the complex input vector
6378    * @param[in]  *pSrcReal points to the real input vector
6379    * @param[out]  *pCmplxDst points to the complex output vector
6380    * @param[in]  numSamples number of samples in each vector
6381    * @return none.
6382    */
6383 
6384   void arm_cmplx_mult_real_q31(
6385 			        q31_t * pSrcCmplx,
6386 			        q31_t * pSrcReal,
6387 			       q31_t * pCmplxDst,
6388 			       uint32_t numSamples);
6389 
6390   /**
6391    * @brief  Floating-point complex-by-real multiplication
6392    * @param[in]  *pSrcCmplx points to the complex input vector
6393    * @param[in]  *pSrcReal points to the real input vector
6394    * @param[out]  *pCmplxDst points to the complex output vector
6395    * @param[in]  numSamples number of samples in each vector
6396    * @return none.
6397    */
6398 
6399   void arm_cmplx_mult_real_f32(
6400 			        float32_t * pSrcCmplx,
6401 			        float32_t * pSrcReal,
6402 			       float32_t * pCmplxDst,
6403 			       uint32_t numSamples);
6404 
6405   /**
6406    * @brief  Minimum value of a Q7 vector.
6407    * @param[in]  *pSrc is input pointer
6408    * @param[in]  blockSize is the number of samples to process
6409    * @param[out]  *result is output pointer
6410    * @param[in]  index is the array index of the minimum value in the input buffer.
6411    * @return none.
6412    */
6413 
6414   void arm_min_q7(
6415 		   q7_t * pSrc,
6416 		  uint32_t blockSize,
6417 		  q7_t * result,
6418 		  uint32_t * index);
6419 
6420   /**
6421    * @brief  Minimum value of a Q15 vector.
6422    * @param[in]  *pSrc is input pointer
6423    * @param[in]  blockSize is the number of samples to process
6424    * @param[out]  *pResult is output pointer
6425    * @param[in]  *pIndex is the array index of the minimum value in the input buffer.
6426    * @return none.
6427    */
6428 
6429   void arm_min_q15(
6430 		    q15_t * pSrc,
6431 		   uint32_t blockSize,
6432 		   q15_t * pResult,
6433 		   uint32_t * pIndex);
6434 
6435   /**
6436    * @brief  Minimum value of a Q31 vector.
6437    * @param[in]  *pSrc is input pointer
6438    * @param[in]  blockSize is the number of samples to process
6439    * @param[out]  *pResult is output pointer
6440    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
6441    * @return none.
6442    */
6443   void arm_min_q31(
6444 		    q31_t * pSrc,
6445 		   uint32_t blockSize,
6446 		   q31_t * pResult,
6447 		   uint32_t * pIndex);
6448 
6449   /**
6450    * @brief  Minimum value of a floating-point vector.
6451    * @param[in]  *pSrc is input pointer
6452    * @param[in]  blockSize is the number of samples to process
6453    * @param[out]  *pResult is output pointer
6454    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
6455    * @return none.
6456    */
6457 
6458   void arm_min_f32(
6459 		    float32_t * pSrc,
6460 		   uint32_t blockSize,
6461 		   float32_t * pResult,
6462 		   uint32_t * pIndex);
6463 
6464 /**
6465  * @brief Maximum value of a Q7 vector.
6466  * @param[in]       *pSrc points to the input buffer
6467  * @param[in]       blockSize length of the input vector
6468  * @param[out]      *pResult maximum value returned here
6469  * @param[out]      *pIndex index of maximum value returned here
6470  * @return none.
6471  */
6472 
6473   void arm_max_q7(
6474 		   q7_t * pSrc,
6475 		  uint32_t blockSize,
6476 		  q7_t * pResult,
6477 		  uint32_t * pIndex);
6478 
6479 /**
6480  * @brief Maximum value of a Q15 vector.
6481  * @param[in]       *pSrc points to the input buffer
6482  * @param[in]       blockSize length of the input vector
6483  * @param[out]      *pResult maximum value returned here
6484  * @param[out]      *pIndex index of maximum value returned here
6485  * @return none.
6486  */
6487 
6488   void arm_max_q15(
6489 		    q15_t * pSrc,
6490 		   uint32_t blockSize,
6491 		   q15_t * pResult,
6492 		   uint32_t * pIndex);
6493 
6494 /**
6495  * @brief Maximum value of a Q31 vector.
6496  * @param[in]       *pSrc points to the input buffer
6497  * @param[in]       blockSize length of the input vector
6498  * @param[out]      *pResult maximum value returned here
6499  * @param[out]      *pIndex index of maximum value returned here
6500  * @return none.
6501  */
6502 
6503   void arm_max_q31(
6504 		    q31_t * pSrc,
6505 		   uint32_t blockSize,
6506 		   q31_t * pResult,
6507 		   uint32_t * pIndex);
6508 
6509 /**
6510  * @brief Maximum value of a floating-point vector.
6511  * @param[in]       *pSrc points to the input buffer
6512  * @param[in]       blockSize length of the input vector
6513  * @param[out]      *pResult maximum value returned here
6514  * @param[out]      *pIndex index of maximum value returned here
6515  * @return none.
6516  */
6517 
6518   void arm_max_f32(
6519 		    float32_t * pSrc,
6520 		   uint32_t blockSize,
6521 		   float32_t * pResult,
6522 		   uint32_t * pIndex);
6523 
6524   /**
6525    * @brief  Q15 complex-by-complex multiplication
6526    * @param[in]  *pSrcA points to the first input vector
6527    * @param[in]  *pSrcB points to the second input vector
6528    * @param[out]  *pDst  points to the output vector
6529    * @param[in]  numSamples number of complex samples in each vector
6530    * @return none.
6531    */
6532 
6533   void arm_cmplx_mult_cmplx_q15(
6534 			        q15_t * pSrcA,
6535 			        q15_t * pSrcB,
6536 			       q15_t * pDst,
6537 			       uint32_t numSamples);
6538 
6539   /**
6540    * @brief  Q31 complex-by-complex multiplication
6541    * @param[in]  *pSrcA points to the first input vector
6542    * @param[in]  *pSrcB points to the second input vector
6543    * @param[out]  *pDst  points to the output vector
6544    * @param[in]  numSamples number of complex samples in each vector
6545    * @return none.
6546    */
6547 
6548   void arm_cmplx_mult_cmplx_q31(
6549 			        q31_t * pSrcA,
6550 			        q31_t * pSrcB,
6551 			       q31_t * pDst,
6552 			       uint32_t numSamples);
6553 
6554   /**
6555    * @brief  Floating-point complex-by-complex multiplication
6556    * @param[in]  *pSrcA points to the first input vector
6557    * @param[in]  *pSrcB points to the second input vector
6558    * @param[out]  *pDst  points to the output vector
6559    * @param[in]  numSamples number of complex samples in each vector
6560    * @return none.
6561    */
6562 
6563   void arm_cmplx_mult_cmplx_f32(
6564 			        float32_t * pSrcA,
6565 			        float32_t * pSrcB,
6566 			       float32_t * pDst,
6567 			       uint32_t numSamples);
6568 
6569   /**
6570    * @brief Converts the elements of the floating-point vector to Q31 vector.
6571    * @param[in]       *pSrc points to the floating-point input vector
6572    * @param[out]      *pDst points to the Q31 output vector
6573    * @param[in]       blockSize length of the input vector
6574    * @return none.
6575    */
6576   void arm_float_to_q31(
6577 			       float32_t * pSrc,
6578 			      q31_t * pDst,
6579 			      uint32_t blockSize);
6580 
6581   /**
6582    * @brief Converts the elements of the floating-point vector to Q15 vector.
6583    * @param[in]       *pSrc points to the floating-point input vector
6584    * @param[out]      *pDst points to the Q15 output vector
6585    * @param[in]       blockSize length of the input vector
6586    * @return          none
6587    */
6588   void arm_float_to_q15(
6589 			       float32_t * pSrc,
6590 			      q15_t * pDst,
6591 			      uint32_t blockSize);
6592 
6593   /**
6594    * @brief Converts the elements of the floating-point vector to Q7 vector.
6595    * @param[in]       *pSrc points to the floating-point input vector
6596    * @param[out]      *pDst points to the Q7 output vector
6597    * @param[in]       blockSize length of the input vector
6598    * @return          none
6599    */
6600   void arm_float_to_q7(
6601 			      float32_t * pSrc,
6602 			     q7_t * pDst,
6603 			     uint32_t blockSize);
6604 
6605 
6606   /**
6607    * @brief  Converts the elements of the Q31 vector to Q15 vector.
6608    * @param[in]  *pSrc is input pointer
6609    * @param[out]  *pDst is output pointer
6610    * @param[in]  blockSize is the number of samples to process
6611    * @return none.
6612    */
6613   void arm_q31_to_q15(
6614 		       q31_t * pSrc,
6615 		      q15_t * pDst,
6616 		      uint32_t blockSize);
6617 
6618   /**
6619    * @brief  Converts the elements of the Q31 vector to Q7 vector.
6620    * @param[in]  *pSrc is input pointer
6621    * @param[out]  *pDst is output pointer
6622    * @param[in]  blockSize is the number of samples to process
6623    * @return none.
6624    */
6625   void arm_q31_to_q7(
6626 		      q31_t * pSrc,
6627 		     q7_t * pDst,
6628 		     uint32_t blockSize);
6629 
6630   /**
6631    * @brief  Converts the elements of the Q15 vector to floating-point vector.
6632    * @param[in]  *pSrc is input pointer
6633    * @param[out]  *pDst is output pointer
6634    * @param[in]  blockSize is the number of samples to process
6635    * @return none.
6636    */
6637   void arm_q15_to_float(
6638 			 q15_t * pSrc,
6639 			float32_t * pDst,
6640 			uint32_t blockSize);
6641 
6642 
6643   /**
6644    * @brief  Converts the elements of the Q15 vector to Q31 vector.
6645    * @param[in]  *pSrc is input pointer
6646    * @param[out]  *pDst is output pointer
6647    * @param[in]  blockSize is the number of samples to process
6648    * @return none.
6649    */
6650   void arm_q15_to_q31(
6651 		       q15_t * pSrc,
6652 		      q31_t * pDst,
6653 		      uint32_t blockSize);
6654 
6655 
6656   /**
6657    * @brief  Converts the elements of the Q15 vector to Q7 vector.
6658    * @param[in]  *pSrc is input pointer
6659    * @param[out]  *pDst is output pointer
6660    * @param[in]  blockSize is the number of samples to process
6661    * @return none.
6662    */
6663   void arm_q15_to_q7(
6664 		      q15_t * pSrc,
6665 		     q7_t * pDst,
6666 		     uint32_t blockSize);
6667 
6668 
6669   /**
6670    * @ingroup groupInterpolation
6671    */
6672 
6673   /**
6674    * @defgroup BilinearInterpolate Bilinear Interpolation
6675    *
6676    * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
6677    * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
6678    * determines values between the grid points.
6679    * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension.
6680    * Bilinear interpolation is often used in image processing to rescale images.
6681    * The CMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
6682    *
6683    * <b>Algorithm</b>
6684    * \par
6685    * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
6686    * For floating-point, the instance structure is defined as:
6687    * <pre>
6688    *   typedef struct
6689    *   {
6690    *     uint16_t numRows;
6691    *     uint16_t numCols;
6692    *     float32_t *pData;
6693    * } arm_bilinear_interp_instance_f32;
6694    * </pre>
6695    *
6696    * \par
6697    * where <code>numRows</code> specifies the number of rows in the table;
6698    * <code>numCols</code> specifies the number of columns in the table;
6699    * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
6700    * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
6701    * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
6702    *
6703    * \par
6704    * Let <code>(x, y)</code> specify the desired interpolation point.  Then define:
6705    * <pre>
6706    *     XF = floor(x)
6707    *     YF = floor(y)
6708    * </pre>
6709    * \par
6710    * The interpolated output point is computed as:
6711    * <pre>
6712    *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
6713    *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
6714    *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
6715    *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
6716    * </pre>
6717    * Note that the coordinates (x, y) contain integer and fractional components.
6718    * The integer components specify which portion of the table to use while the
6719    * fractional components control the interpolation processor.
6720    *
6721    * \par
6722    * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
6723    */
6724 
6725   /**
6726    * @addtogroup BilinearInterpolate
6727    * @{
6728    */
6729 
6730   /**
6731   *
6732   * @brief  Floating-point bilinear interpolation.
6733   * @param[in,out] *S points to an instance of the interpolation structure.
6734   * @param[in] X interpolation coordinate.
6735   * @param[in] Y interpolation coordinate.
6736   * @return out interpolated value.
6737   */
6738 
6739 
arm_bilinear_interp_f32(const arm_bilinear_interp_instance_f32 * S,float32_t X,float32_t Y)6740   static __INLINE float32_t arm_bilinear_interp_f32(
6741 						    const arm_bilinear_interp_instance_f32 * S,
6742 						    float32_t X,
6743 						    float32_t Y)
6744   {
6745     float32_t out;
6746     float32_t f00, f01, f10, f11;
6747     float32_t *pData = S->pData;
6748     int32_t xIndex, yIndex, index;
6749     float32_t xdiff, ydiff;
6750     float32_t b1, b2, b3, b4;
6751 
6752     xIndex = (int32_t) X;
6753     yIndex = (int32_t) Y;
6754 
6755 	/* Care taken for table outside boundary */
6756 	/* Returns zero output when values are outside table boundary */
6757 	if(xIndex < 0 || xIndex > (S->numRows-1) || yIndex < 0  || yIndex > ( S->numCols-1))
6758 	{
6759 		return(0);
6760 	}
6761 
6762     /* Calculation of index for two nearest points in X-direction */
6763     index = (xIndex - 1) + (yIndex-1) *  S->numCols ;
6764 
6765 
6766     /* Read two nearest points in X-direction */
6767     f00 = pData[index];
6768     f01 = pData[index + 1];
6769 
6770     /* Calculation of index for two nearest points in Y-direction */
6771     index = (xIndex-1) + (yIndex) * S->numCols;
6772 
6773 
6774     /* Read two nearest points in Y-direction */
6775     f10 = pData[index];
6776     f11 = pData[index + 1];
6777 
6778     /* Calculation of intermediate values */
6779     b1 = f00;
6780     b2 = f01 - f00;
6781     b3 = f10 - f00;
6782     b4 = f00 - f01 - f10 + f11;
6783 
6784     /* Calculation of fractional part in X */
6785     xdiff = X - xIndex;
6786 
6787     /* Calculation of fractional part in Y */
6788     ydiff = Y - yIndex;
6789 
6790     /* Calculation of bi-linear interpolated output */
6791      out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
6792 
6793    /* return to application */
6794     return (out);
6795 
6796   }
6797 
6798   /**
6799   *
6800   * @brief  Q31 bilinear interpolation.
6801   * @param[in,out] *S points to an instance of the interpolation structure.
6802   * @param[in] X interpolation coordinate in 12.20 format.
6803   * @param[in] Y interpolation coordinate in 12.20 format.
6804   * @return out interpolated value.
6805   */
6806 
arm_bilinear_interp_q31(arm_bilinear_interp_instance_q31 * S,q31_t X,q31_t Y)6807   static __INLINE q31_t arm_bilinear_interp_q31(
6808 						arm_bilinear_interp_instance_q31 * S,
6809 						q31_t X,
6810 						q31_t Y)
6811   {
6812     q31_t out;                                   /* Temporary output */
6813     q31_t acc = 0;                               /* output */
6814     q31_t xfract, yfract;                        /* X, Y fractional parts */
6815     q31_t x1, x2, y1, y2;                        /* Nearest output values */
6816     int32_t rI, cI;                             /* Row and column indices */
6817     q31_t *pYData = S->pData;                    /* pointer to output table values */
6818     uint32_t nCols = S->numCols;                 /* num of rows */
6819 
6820 
6821     /* Input is in 12.20 format */
6822     /* 12 bits for the table index */
6823     /* Index value calculation */
6824     rI = ((X & 0xFFF00000) >> 20u);
6825 
6826     /* Input is in 12.20 format */
6827     /* 12 bits for the table index */
6828     /* Index value calculation */
6829     cI = ((Y & 0xFFF00000) >> 20u);
6830 
6831 	/* Care taken for table outside boundary */
6832 	/* Returns zero output when values are outside table boundary */
6833 	if(rI < 0 || rI > (S->numRows-1) || cI < 0  || cI > ( S->numCols-1))
6834 	{
6835 		return(0);
6836 	}
6837 
6838     /* 20 bits for the fractional part */
6839     /* shift left xfract by 11 to keep 1.31 format */
6840     xfract = (X & 0x000FFFFF) << 11u;
6841 
6842     /* Read two nearest output values from the index */
6843     x1 = pYData[(rI) + nCols * (cI)];
6844     x2 = pYData[(rI) + nCols * (cI) + 1u];
6845 
6846     /* 20 bits for the fractional part */
6847     /* shift left yfract by 11 to keep 1.31 format */
6848     yfract = (Y & 0x000FFFFF) << 11u;
6849 
6850     /* Read two nearest output values from the index */
6851     y1 = pYData[(rI) + nCols * (cI + 1)];
6852     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
6853 
6854     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */
6855     out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32));
6856     acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32));
6857 
6858     /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
6859     out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32));
6860     acc += ((q31_t) ((q63_t) out * (xfract) >> 32));
6861 
6862     /* y1 * (1 - xfract) * (yfract)  in 3.29(q29) and adding to acc */
6863     out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32));
6864     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
6865 
6866     /* y2 * (xfract) * (yfract)  in 3.29(q29) and adding to acc */
6867     out = ((q31_t) ((q63_t) y2 * (xfract) >> 32));
6868     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
6869 
6870     /* Convert acc to 1.31(q31) format */
6871     return (acc << 2u);
6872 
6873   }
6874 
6875   /**
6876   * @brief  Q15 bilinear interpolation.
6877   * @param[in,out] *S points to an instance of the interpolation structure.
6878   * @param[in] X interpolation coordinate in 12.20 format.
6879   * @param[in] Y interpolation coordinate in 12.20 format.
6880   * @return out interpolated value.
6881   */
6882 
arm_bilinear_interp_q15(arm_bilinear_interp_instance_q15 * S,q31_t X,q31_t Y)6883   static __INLINE q15_t arm_bilinear_interp_q15(
6884 						arm_bilinear_interp_instance_q15 * S,
6885 						q31_t X,
6886 						q31_t Y)
6887   {
6888     q63_t acc = 0;                               /* output */
6889     q31_t out;                                   /* Temporary output */
6890     q15_t x1, x2, y1, y2;                        /* Nearest output values */
6891     q31_t xfract, yfract;                        /* X, Y fractional parts */
6892     int32_t rI, cI;                             /* Row and column indices */
6893     q15_t *pYData = S->pData;                    /* pointer to output table values */
6894     uint32_t nCols = S->numCols;                 /* num of rows */
6895 
6896     /* Input is in 12.20 format */
6897     /* 12 bits for the table index */
6898     /* Index value calculation */
6899     rI = ((X & 0xFFF00000) >> 20);
6900 
6901     /* Input is in 12.20 format */
6902     /* 12 bits for the table index */
6903     /* Index value calculation */
6904     cI = ((Y & 0xFFF00000) >> 20);
6905 
6906 	/* Care taken for table outside boundary */
6907 	/* Returns zero output when values are outside table boundary */
6908 	if(rI < 0 || rI > (S->numRows-1) || cI < 0  || cI > ( S->numCols-1))
6909 	{
6910 		return(0);
6911 	}
6912 
6913     /* 20 bits for the fractional part */
6914     /* xfract should be in 12.20 format */
6915     xfract = (X & 0x000FFFFF);
6916 
6917     /* Read two nearest output values from the index */
6918     x1 = pYData[(rI) + nCols * (cI)];
6919     x2 = pYData[(rI) + nCols * (cI) + 1u];
6920 
6921 
6922     /* 20 bits for the fractional part */
6923     /* yfract should be in 12.20 format */
6924     yfract = (Y & 0x000FFFFF);
6925 
6926     /* Read two nearest output values from the index */
6927     y1 = pYData[(rI) + nCols * (cI + 1)];
6928     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
6929 
6930     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */
6931 
6932     /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
6933     /* convert 13.35 to 13.31 by right shifting  and out is in 1.31 */
6934     out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
6935     acc = ((q63_t) out * (0xFFFFF - yfract));
6936 
6937     /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
6938     out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
6939     acc += ((q63_t) out * (xfract));
6940 
6941     /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
6942     out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
6943     acc += ((q63_t) out * (yfract));
6944 
6945     /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
6946     out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
6947     acc += ((q63_t) out * (yfract));
6948 
6949     /* acc is in 13.51 format and down shift acc by 36 times */
6950     /* Convert out to 1.15 format */
6951     return (acc >> 36);
6952 
6953   }
6954 
6955   /**
6956   * @brief  Q7 bilinear interpolation.
6957   * @param[in,out] *S points to an instance of the interpolation structure.
6958   * @param[in] X interpolation coordinate in 12.20 format.
6959   * @param[in] Y interpolation coordinate in 12.20 format.
6960   * @return out interpolated value.
6961   */
6962 
arm_bilinear_interp_q7(arm_bilinear_interp_instance_q7 * S,q31_t X,q31_t Y)6963   static __INLINE q7_t arm_bilinear_interp_q7(
6964 					      arm_bilinear_interp_instance_q7 * S,
6965 					      q31_t X,
6966 					      q31_t Y)
6967   {
6968     q63_t acc = 0;                               /* output */
6969     q31_t out;                                   /* Temporary output */
6970     q31_t xfract, yfract;                        /* X, Y fractional parts */
6971     q7_t x1, x2, y1, y2;                         /* Nearest output values */
6972     int32_t rI, cI;                             /* Row and column indices */
6973     q7_t *pYData = S->pData;                     /* pointer to output table values */
6974     uint32_t nCols = S->numCols;                 /* num of rows */
6975 
6976     /* Input is in 12.20 format */
6977     /* 12 bits for the table index */
6978     /* Index value calculation */
6979     rI = ((X & 0xFFF00000) >> 20);
6980 
6981     /* Input is in 12.20 format */
6982     /* 12 bits for the table index */
6983     /* Index value calculation */
6984     cI = ((Y & 0xFFF00000) >> 20);
6985 
6986 	/* Care taken for table outside boundary */
6987 	/* Returns zero output when values are outside table boundary */
6988 	if(rI < 0 || rI > (S->numRows-1) || cI < 0  || cI > ( S->numCols-1))
6989 	{
6990 		return(0);
6991 	}
6992 
6993     /* 20 bits for the fractional part */
6994     /* xfract should be in 12.20 format */
6995     xfract = (X & 0x000FFFFF);
6996 
6997     /* Read two nearest output values from the index */
6998     x1 = pYData[(rI) + nCols * (cI)];
6999     x2 = pYData[(rI) + nCols * (cI) + 1u];
7000 
7001 
7002     /* 20 bits for the fractional part */
7003     /* yfract should be in 12.20 format */
7004     yfract = (Y & 0x000FFFFF);
7005 
7006     /* Read two nearest output values from the index */
7007     y1 = pYData[(rI) + nCols * (cI + 1)];
7008     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7009 
7010     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */
7011     out = ((x1 * (0xFFFFF - xfract)));
7012     acc = (((q63_t) out * (0xFFFFF - yfract)));
7013 
7014     /* x2 * (xfract) * (1-yfract)  in 2.22 and adding to acc */
7015     out = ((x2 * (0xFFFFF - yfract)));
7016     acc += (((q63_t) out * (xfract)));
7017 
7018     /* y1 * (1 - xfract) * (yfract)  in 2.22 and adding to acc */
7019     out = ((y1 * (0xFFFFF - xfract)));
7020     acc += (((q63_t) out * (yfract)));
7021 
7022     /* y2 * (xfract) * (yfract)  in 2.22 and adding to acc */
7023     out = ((y2 * (yfract)));
7024     acc += (((q63_t) out * (xfract)));
7025 
7026     /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
7027     return (acc >> 40);
7028 
7029   }
7030 
7031   /**
7032    * @} end of BilinearInterpolate group
7033    */
7034 
7035 
7036 
7037 
7038 
7039 
7040 #ifdef	__cplusplus
7041 }
7042 #endif
7043 
7044 
7045 #endif /* _ARM_MATH_H */
7046 
7047 
7048 /**
7049  *
7050  * End of file.
7051  */
7052