tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/arm_math.h

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:      CMSIS DSP Library
   8 * Title:        arm_math.h
   9 *
  10 * Description:  Public header file for CMSIS DSP Library
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39  * -------------------------------------------------------------------- */
  40
  41 /**
  42    \mainpage CMSIS DSP Software Library
  43    *
  44    * <b>Introduction</b>
  45    *
  46    * This user manual describes the CMSIS DSP software library,
  47    * a suite of common signal processing functions for use on Cortex-M processor based devices.
  48    *
  49    * The library is divided into a number of functions each covering a specific category:
  50    * - Basic math functions
  51    * - Fast math functions
  52    * - Complex math functions
  53    * - Filters
  54    * - Matrix functions
  55    * - Transforms
  56    * - Motor control functions
  57    * - Statistical functions
  58    * - Support functions
  59    * - Interpolation functions
  60    *
  61    * The library has separate functions for operating on 8-bit integers, 16-bit integers,
  62    * 32-bit integer and 32-bit floating-point values.
  63    *
  64    * <b>Using the Library</b>
  65    *
  66    * The library installer contains prebuilt versions of the libraries in the <code>Lib</code> folder.
  67    * - arm_cortexM4lf_math.lib (Little endian and Floating Point Unit on Cortex-M4)
  68    * - arm_cortexM4bf_math.lib (Big endian and Floating Point Unit on Cortex-M4)
  69    * - arm_cortexM4l_math.lib (Little endian on Cortex-M4)
  70    * - arm_cortexM4b_math.lib (Big endian on Cortex-M4)
  71    * - arm_cortexM3l_math.lib (Little endian on Cortex-M3)
  72    * - arm_cortexM3b_math.lib (Big endian on Cortex-M3)
  73    * - arm_cortexM0l_math.lib (Little endian on Cortex-M0)
  74    * - arm_cortexM0b_math.lib (Big endian on Cortex-M3)
  75    *
  76    * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
  77    * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single
  78    * public header file <code> arm_math.h</code> for Cortex-M4/M3/M0 with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
  79    * Define the appropriate pre processor MACRO ARM_MATH_CM4 or  ARM_MATH_CM3 or
  80    * ARM_MATH_CM0 or ARM_MATH_CM0PLUS depending on the target processor in the application.
  81    *
  82    * <b>Examples</b>
  83    *
  84    * The library ships with a number of examples which demonstrate how to use the library functions.
  85    *
  86    * <b>Toolchain Support</b>
  87    *
  88    * The library has been developed and tested with MDK-ARM version 4.60.
  89    * The library is being tested in GCC and IAR toolchains and updates on this activity will be made available shortly.
  90    *
  91    * <b>Building the Library</b>
  92    *
  93    * The library installer contains project files to re build libraries on MDK Tool chain in the <code>CMSIS\\DSP_Lib\\Source\\ARM</code> folder.
  94    * - arm_cortexM0b_math.uvproj
  95    * - arm_cortexM0l_math.uvproj
  96    * - arm_cortexM3b_math.uvproj
  97    * - arm_cortexM3l_math.uvproj
  98    * - arm_cortexM4b_math.uvproj
  99    * - arm_cortexM4l_math.uvproj
 100    * - arm_cortexM4bf_math.uvproj
 101    * - arm_cortexM4lf_math.uvproj
 102    *
 103    *
 104    * The project can be built by opening the appropriate project in MDK-ARM 4.60 chain and defining the optional pre processor MACROs detailed above.
 105    *
 106    * <b>Pre-processor Macros</b>
 107    *
 108    * Each library project have differant pre-processor macros.
 109    *
 110    * - UNALIGNED_SUPPORT_DISABLE:
 111    *
 112    * Define macro UNALIGNED_SUPPORT_DISABLE, If the silicon does not support unaligned memory access
 113    *
 114    * - ARM_MATH_BIG_ENDIAN:
 115    *
 116    * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
 117    *
 118    * - ARM_MATH_MATRIX_CHECK:
 119    *
 120    * Define macro ARM_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices
 121    *
 122    * - ARM_MATH_ROUNDING:
 123    *
 124    * Define macro ARM_MATH_ROUNDING for rounding on support functions
 125    *
 126    * - ARM_MATH_CMx:
 127    *
 128    * Define macro ARM_MATH_CM4 for building the library on Cortex-M4 target, ARM_MATH_CM3 for building library on Cortex-M3 target
 129    * and ARM_MATH_CM0 for building library on cortex-M0 target, ARM_MATH_CM0PLUS for building library on cortex-M0+ target.
 130    *
 131    * - __FPU_PRESENT:
 132    *
 133    * Initialize macro __FPU_PRESENT = 1 when building on FPU supported Targets. Enable this macro for M4bf and M4lf libraries
 134    *
 135    * <b>Copyright Notice</b>
 136    *
 137    * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
 138    */
 139
 140
 141 /**
 142  * @defgroup groupMath Basic Math Functions
 143  */
 144
 145 /**
 146  * @defgroup groupFastMath Fast Math Functions
 147  * This set of functions provides a fast approximation to sine, cosine, and square root.
 148  * As compared to most of the other functions in the CMSIS math library, the fast math functions
 149  * operate on individual values and not arrays.
 150  * There are separate functions for Q15, Q31, and floating-point data.
 151  *
 152  */
 153
 154 /**
 155  * @defgroup groupCmplxMath Complex Math Functions
 156  * This set of functions operates on complex data vectors.
 157  * The data in the complex arrays is stored in an interleaved fashion
 158  * (real, imag, real, imag, ...).
 159  * In the API functions, the number of samples in a complex array refers
 160  * to the number of complex values; the array contains twice this number of
 161  * real values.
 162  */
 163
 164 /**
 165  * @defgroup groupFilters Filtering Functions
 166  */
 167
 168 /**
 169  * @defgroup groupMatrix Matrix Functions
 170  *
 171  * This set of functions provides basic matrix math operations.
 172  * The functions operate on matrix data structures.  For example,
 173  * the type
 174  * definition for the floating-point matrix structure is shown
 175  * below:
 176  * <pre>
 177  *     typedef struct
 178  *     {
 179  *       uint16_t numRows;     // number of rows of the matrix.
 180  *       uint16_t numCols;     // number of columns of the matrix.
 181  *       float32_t *pData;     // points to the data of the matrix.
 182  *     } arm_matrix_instance_f32;
 183  * </pre>
 184  * There are similar definitions for Q15 and Q31 data types.
 185  *
 186  * The structure specifies the size of the matrix and then points to
 187  * an array of data.  The array is of size <code>numRows X numCols</code>
 188  * and the values are arranged in row order.  That is, the
 189  * matrix element (i, j) is stored at:
 190  * <pre>
 191  *     pData[i*numCols + j]
 192  * </pre>
 193  *
 194  * \par Init Functions
 195  * There is an associated initialization function for each type of matrix
 196  * data structure.
 197  * The initialization function sets the values of the internal structure fields.
 198  * Refer to the function <code>arm_mat_init_f32()</code>, <code>arm_mat_init_q31()</code>
 199  * and <code>arm_mat_init_q15()</code> for floating-point, Q31 and Q15 types,  respectively.
 200  *
 201  * \par
 202  * Use of the initialization function is optional. However, if initialization function is used
 203  * then the instance structure cannot be placed into a const data section.
 204  * To place the instance structure in a const data
 205  * section, manually initialize the data structure.  For example:
 206  * <pre>
 207  * <code>arm_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
 208  * <code>arm_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
 209  * <code>arm_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
 210  * </pre>
 211  * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
 212  * specifies the number of columns, and <code>pData</code> points to the
 213  * data array.
 214  *
 215  * \par Size Checking
 216  * By default all of the matrix functions perform size checking on the input and
 217  * output matrices.  For example, the matrix addition function verifies that the
 218  * two input matrices and the output matrix all have the same number of rows and
 219  * columns.  If the size check fails the functions return:
 220  * <pre>
 221  *     ARM_MATH_SIZE_MISMATCH
 222  * </pre>
 223  * Otherwise the functions return
 224  * <pre>
 225  *     ARM_MATH_SUCCESS
 226  * </pre>
 227  * There is some overhead associated with this matrix size checking.
 228  * The matrix size checking is enabled via the \#define
 229  * <pre>
 230  *     ARM_MATH_MATRIX_CHECK
 231  * </pre>
 232  * within the library project settings.  By default this macro is defined
 233  * and size checking is enabled.  By changing the project settings and
 234  * undefining this macro size checking is eliminated and the functions
 235  * run a bit faster.  With size checking disabled the functions always
 236  * return <code>ARM_MATH_SUCCESS</code>.
 237  */
 238
 239 /**
 240  * @defgroup groupTransforms Transform Functions
 241  */
 242
 243 /**
 244  * @defgroup groupController Controller Functions
 245  */
 246
 247 /**
 248  * @defgroup groupStats Statistics Functions
 249  */
 250 /**
 251  * @defgroup groupSupport Support Functions
 252  */
 253
 254 /**
 255  * @defgroup groupInterpolation Interpolation Functions
 256  * These functions perform 1- and 2-dimensional interpolation of data.
 257  * Linear interpolation is used for 1-dimensional data and
 258  * bilinear interpolation is used for 2-dimensional data.
 259  */
 260
 261 /**
 262  * @defgroup groupExamples Examples
 263  */
 264 #ifndef _ARM_MATH_H
 265 #define _ARM_MATH_H
 266
 267 #define __CMSIS_GENERIC         /* disable NVIC and Systick functions */
 268
 269 #if defined (ARM_MATH_CM4)
 270 #include "core_cm4.h"
 271 #elif defined (ARM_MATH_CM3)
 272 #include "core_cm3.h"
 273 #elif defined (ARM_MATH_CM0)
 274 #include "core_cm0.h"
 275 #define ARM_MATH_CM0_FAMILY
 276 #elif defined (ARM_MATH_CM0PLUS)
 277 #include "core_cm0plus.h"
 278 #define ARM_MATH_CM0_FAMILY
 279 #else
 280 #include "ARMCM4.h"
 281 #warning "Define either ARM_MATH_CM4 OR ARM_MATH_CM3...By Default building on ARM_MATH_CM4....."
 282 #endif
 283
 284 #undef  __CMSIS_GENERIC         /* enable NVIC and Systick functions */
 285 #include "string.h"
 286 #include "math.h"
 287 #ifdef  __cplusplus
 288 extern "C"
 289 {
 290 #endif
 291
 292
 293   /**
 294    * @brief Macros required for reciprocal calculation in Normalized LMS
 295    */
 296
 297 #define DELTA_Q31                       (0x100)
 298 #define DELTA_Q15                       0x5
 299 #define INDEX_MASK                      0x0000003F
 300 #ifndef PI
 301 #define PI                                      3.14159265358979f
 302 #endif
 303
 304   /**
 305    * @brief Macros required for SINE and COSINE Fast math approximations
 306    */
 307
 308 #define TABLE_SIZE                      256
 309 #define TABLE_SPACING_Q31       0x800000
 310 #define TABLE_SPACING_Q15       0x80
 311
 312   /**
 313    * @brief Macros required for SINE and COSINE Controller functions
 314    */
 315   /* 1.31(q31) Fixed value of 2/360 */
 316   /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
 317 #define INPUT_SPACING                   0xB60B61
 318
 319   /**
 320    * @brief Macro for Unaligned Support
 321    */
 322 #ifndef UNALIGNED_SUPPORT_DISABLE
 323     #define ALIGN4
 324 #else
 325   #if defined  (__GNUC__)
 326     #define ALIGN4 __attribute__((aligned(4)))
 327   #else
 328     #define ALIGN4 __align(4)
 329   #endif
 330 #endif  /*      #ifndef UNALIGNED_SUPPORT_DISABLE       */
 331
 332   /**
 333    * @brief Error status returned by some functions in the library.
 334    */
 335
 336   typedef enum
 337   {
 338     ARM_MATH_SUCCESS = 0,                /**< No error */
 339     ARM_MATH_ARGUMENT_ERROR = -1,        /**< One or more arguments are incorrect */
 340     ARM_MATH_LENGTH_ERROR = -2,          /**< Length of data buffer is incorrect */
 341     ARM_MATH_SIZE_MISMATCH = -3,         /**< Size of matrices is not compatible with the operation. */
 342     ARM_MATH_NANINF = -4,                /**< Not-a-number (NaN) or infinity is generated */
 343     ARM_MATH_SINGULAR = -5,              /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
 344     ARM_MATH_TEST_FAILURE = -6           /**< Test Failed  */
 345   } arm_status;
 346
 347   /**
 348    * @brief 8-bit fractional data type in 1.7 format.
 349    */
 350   typedef int8_t q7_t;
 351
 352   /**
 353    * @brief 16-bit fractional data type in 1.15 format.
 354    */
 355   typedef int16_t q15_t;
 356
 357   /**
 358    * @brief 32-bit fractional data type in 1.31 format.
 359    */
 360   typedef int32_t q31_t;
 361
 362   /**
 363    * @brief 64-bit fractional data type in 1.63 format.
 364    */
 365   typedef int64_t q63_t;
 366
 367   /**
 368    * @brief 32-bit floating-point type definition.
 369    */
 370   typedef float float32_t;
 371
 372   /**
 373    * @brief 64-bit floating-point type definition.
 374    */
 375   typedef double float64_t;
 376
 377   /**
 378    * @brief definition to read/write two 16 bit values.
 379    */
 380 #if defined __CC_ARM
 381 #define __SIMD32_TYPE int32_t __packed
 382 #define CMSIS_UNUSED __attribute__((unused))
 383 #elif defined __ICCARM__
 384 #define CMSIS_UNUSED
 385 #define __SIMD32_TYPE int32_t __packed
 386 #elif defined __GNUC__
 387 #define __SIMD32_TYPE int32_t
 388 #define CMSIS_UNUSED __attribute__((unused))
 389 #else
 390 #error Unknown compiler
 391 #endif
 392
 393 #define __SIMD32(addr)  (*(__SIMD32_TYPE **) & (addr))
 394 #define __SIMD32_CONST(addr)  ((__SIMD32_TYPE *)(addr))
 395
 396 #define _SIMD32_OFFSET(addr)  (*(__SIMD32_TYPE *)  (addr))
 397
 398 #define __SIMD64(addr)  (*(int64_t **) & (addr))
 399
 400 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY)
 401   /**
 402    * @brief definition to pack two 16 bit values.
 403    */
 404 #define __PKHBT(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0x0000FFFF) | \
 405                                          (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
 406 #define __PKHTB(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0xFFFF0000) | \
 407                                          (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
 408
 409 #endif
 410
 411
 412    /**
 413    * @brief definition to pack four 8 bit values.
 414    */
 415 #ifndef ARM_MATH_BIG_ENDIAN
 416
 417 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) | \
 418                                 (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) | \
 419                                                             (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) |     \
 420                                                             (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
 421 #else
 422
 423 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) | \
 424                                 (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) | \
 425                                                             (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) |     \
 426                                                             (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
 427
 428 #endif
 429
 430
 431   /**
 432    * @brief Clips Q63 to Q31 values.
 433    */
 434   static __INLINE q31_t clip_q63_to_q31(
 435   q63_t x)
 436   {
 437     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
 438       ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
 439   }
 440
 441   /**
 442    * @brief Clips Q63 to Q15 values.
 443    */
 444   static __INLINE q15_t clip_q63_to_q15(
 445   q63_t x)
 446   {
 447     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
 448       ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
 449   }
 450
 451   /**
 452    * @brief Clips Q31 to Q7 values.
 453    */
 454   static __INLINE q7_t clip_q31_to_q7(
 455   q31_t x)
 456   {
 457     return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
 458       ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
 459   }
 460
 461   /**
 462    * @brief Clips Q31 to Q15 values.
 463    */
 464   static __INLINE q15_t clip_q31_to_q15(
 465   q31_t x)
 466   {
 467     return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
 468       ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
 469   }
 470
 471   /**
 472    * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
 473    */
 474
 475   static __INLINE q63_t mult32x64(
 476   q63_t x,
 477   q31_t y)
 478   {
 479     return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
 480             (((q63_t) (x >> 32) * y)));
 481   }
 482
 483
 484 #if defined (ARM_MATH_CM0_FAMILY) && defined ( __CC_ARM   )
 485 #define __CLZ __clz
 486 #elif defined (ARM_MATH_CM0_FAMILY) && ((defined (__ICCARM__)) ||(defined (__GNUC__)) || defined (__TASKING__) )
 487
 488   static __INLINE uint32_t __CLZ(
 489   q31_t data);
 490
 491
 492   static __INLINE uint32_t __CLZ(
 493   q31_t data)
 494   {
 495     uint32_t count = 0;
 496     uint32_t mask = 0x80000000;
 497
 498     while((data & mask) == 0)
 499     {
 500       count += 1u;
 501       mask = mask >> 1u;
 502     }
 503
 504     return (count);
 505
 506   }
 507
 508 #endif
 509
 510   /**
 511    * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type.
 512    */
 513
 514   static __INLINE uint32_t arm_recip_q31(
 515   q31_t in,
 516   q31_t * dst,
 517   q31_t * pRecipTable)
 518   {
 519
 520     uint32_t out, tempVal;
 521     uint32_t index, i;
 522     uint32_t signBits;
 523
 524     if(in > 0)
 525     {
 526       signBits = __CLZ(in) - 1;
 527     }
 528     else
 529     {
 530       signBits = __CLZ(-in) - 1;
 531     }
 532
 533     /* Convert input sample to 1.31 format */
 534     in = in << signBits;
 535
 536     /* calculation of index for initial approximated Val */
 537     index = (uint32_t) (in >> 24u);
 538     index = (index & INDEX_MASK);
 539
 540     /* 1.31 with exp 1 */
 541     out = pRecipTable[index];
 542
 543     /* calculation of reciprocal value */
 544     /* running approximation for two iterations */
 545     for (i = 0u; i < 2u; i++)
 546     {
 547       tempVal = (q31_t) (((q63_t) in * out) >> 31u);
 548       tempVal = 0x7FFFFFFF - tempVal;
 549       /*      1.31 with exp 1 */
 550       //out = (q31_t) (((q63_t) out * tempVal) >> 30u);
 551       out = (q31_t) clip_q63_to_q31(((q63_t) out * tempVal) >> 30u);
 552     }
 553
 554     /* write output */
 555     *dst = out;
 556
 557     /* return num of signbits of out = 1/in value */
 558     return (signBits + 1u);
 559
 560   }
 561
 562   /**
 563    * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type.
 564    */
 565   static __INLINE uint32_t arm_recip_q15(
 566   q15_t in,
 567   q15_t * dst,
 568   q15_t * pRecipTable)
 569   {
 570
 571     uint32_t out = 0, tempVal = 0;
 572     uint32_t index = 0, i = 0;
 573     uint32_t signBits = 0;
 574
 575     if(in > 0)
 576     {
 577       signBits = __CLZ(in) - 17;
 578     }
 579     else
 580     {
 581       signBits = __CLZ(-in) - 17;
 582     }
 583
 584     /* Convert input sample to 1.15 format */
 585     in = in << signBits;
 586
 587     /* calculation of index for initial approximated Val */
 588     index = in >> 8;
 589     index = (index & INDEX_MASK);
 590
 591     /*      1.15 with exp 1  */
 592     out = pRecipTable[index];
 593
 594     /* calculation of reciprocal value */
 595     /* running approximation for two iterations */
 596     for (i = 0; i < 2; i++)
 597     {
 598       tempVal = (q15_t) (((q31_t) in * out) >> 15);
 599       tempVal = 0x7FFF - tempVal;
 600       /*      1.15 with exp 1 */
 601       out = (q15_t) (((q31_t) out * tempVal) >> 14);
 602     }
 603
 604     /* write output */
 605     *dst = out;
 606
 607     /* return num of signbits of out = 1/in value */
 608     return (signBits + 1);
 609
 610   }
 611
 612
 613   /*
 614    * @brief C custom defined intrinisic function for only M0 processors
 615    */
 616 #if defined(ARM_MATH_CM0_FAMILY)
 617
 618   static __INLINE q31_t __SSAT(
 619   q31_t x,
 620   uint32_t y)
 621   {
 622     int32_t posMax, negMin;
 623     uint32_t i;
 624
 625     posMax = 1;
 626     for (i = 0; i < (y - 1); i++)
 627     {
 628       posMax = posMax * 2;
 629     }
 630
 631     if(x > 0)
 632     {
 633       posMax = (posMax - 1);
 634
 635       if(x > posMax)
 636       {
 637         x = posMax;
 638       }
 639     }
 640     else
 641     {
 642       negMin = -posMax;
 643
 644       if(x < negMin)
 645       {
 646         x = negMin;
 647       }
 648     }
 649     return (x);
 650
 651
 652   }
 653
 654 #endif /* end of ARM_MATH_CM0_FAMILY */
 655
 656
 657
 658   /*
 659    * @brief C custom defined intrinsic function for M3 and M0 processors
 660    */
 661 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY)
 662
 663   /*
 664    * @brief C custom defined QADD8 for M3 and M0 processors
 665    */
 666   static __INLINE q31_t __QADD8(
 667   q31_t x,
 668   q31_t y)
 669   {
 670
 671     q31_t sum;
 672     q7_t r, s, t, u;
 673
 674     r = (q7_t) x;
 675     s = (q7_t) y;
 676
 677     r = __SSAT((q31_t) (r + s), 8);
 678     s = __SSAT(((q31_t) (((x << 16) >> 24) + ((y << 16) >> 24))), 8);
 679     t = __SSAT(((q31_t) (((x << 8) >> 24) + ((y << 8) >> 24))), 8);
 680     u = __SSAT(((q31_t) ((x >> 24) + (y >> 24))), 8);
 681
 682     sum =
 683       (((q31_t) u << 24) & 0xFF000000) | (((q31_t) t << 16) & 0x00FF0000) |
 684       (((q31_t) s << 8) & 0x0000FF00) | (r & 0x000000FF);
 685
 686     return sum;
 687
 688   }
 689
 690   /*
 691    * @brief C custom defined QSUB8 for M3 and M0 processors
 692    */
 693   static __INLINE q31_t __QSUB8(
 694   q31_t x,
 695   q31_t y)
 696   {
 697
 698     q31_t sum;
 699     q31_t r, s, t, u;
 700
 701     r = (q7_t) x;
 702     s = (q7_t) y;
 703
 704     r = __SSAT((r - s), 8);
 705     s = __SSAT(((q31_t) (((x << 16) >> 24) - ((y << 16) >> 24))), 8) << 8;
 706     t = __SSAT(((q31_t) (((x << 8) >> 24) - ((y << 8) >> 24))), 8) << 16;
 707     u = __SSAT(((q31_t) ((x >> 24) - (y >> 24))), 8) << 24;
 708
 709     sum =
 710       (u & 0xFF000000) | (t & 0x00FF0000) | (s & 0x0000FF00) | (r &
 711                                                                 0x000000FF);
 712
 713     return sum;
 714   }
 715
 716   /*
 717    * @brief C custom defined QADD16 for M3 and M0 processors
 718    */
 719
 720   /*
 721    * @brief C custom defined QADD16 for M3 and M0 processors
 722    */
 723   static __INLINE q31_t __QADD16(
 724   q31_t x,
 725   q31_t y)
 726   {
 727
 728     q31_t sum;
 729     q31_t r, s;
 730
 731     r = (short) x;
 732     s = (short) y;
 733
 734     r = __SSAT(r + s, 16);
 735     s = __SSAT(((q31_t) ((x >> 16) + (y >> 16))), 16) << 16;
 736
 737     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
 738
 739     return sum;
 740
 741   }
 742
 743   /*
 744    * @brief C custom defined SHADD16 for M3 and M0 processors
 745    */
 746   static __INLINE q31_t __SHADD16(
 747   q31_t x,
 748   q31_t y)
 749   {
 750
 751     q31_t sum;
 752     q31_t r, s;
 753
 754     r = (short) x;
 755     s = (short) y;
 756
 757     r = ((r >> 1) + (s >> 1));
 758     s = ((q31_t) ((x >> 17) + (y >> 17))) << 16;
 759
 760     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
 761
 762     return sum;
 763
 764   }
 765
 766   /*
 767    * @brief C custom defined QSUB16 for M3 and M0 processors
 768    */
 769   static __INLINE q31_t __QSUB16(
 770   q31_t x,
 771   q31_t y)
 772   {
 773
 774     q31_t sum;
 775     q31_t r, s;
 776
 777     r = (short) x;
 778     s = (short) y;
 779
 780     r = __SSAT(r - s, 16);
 781     s = __SSAT(((q31_t) ((x >> 16) - (y >> 16))), 16) << 16;
 782
 783     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
 784
 785     return sum;
 786   }
 787
 788   /*
 789    * @brief C custom defined SHSUB16 for M3 and M0 processors
 790    */
 791   static __INLINE q31_t __SHSUB16(
 792   q31_t x,
 793   q31_t y)
 794   {
 795
 796     q31_t diff;
 797     q31_t r, s;
 798
 799     r = (short) x;
 800     s = (short) y;
 801
 802     r = ((r >> 1) - (s >> 1));
 803     s = (((x >> 17) - (y >> 17)) << 16);
 804
 805     diff = (s & 0xFFFF0000) | (r & 0x0000FFFF);
 806
 807     return diff;
 808   }
 809
 810   /*
 811    * @brief C custom defined QASX for M3 and M0 processors
 812    */
 813   static __INLINE q31_t __QASX(
 814   q31_t x,
 815   q31_t y)
 816   {
 817
 818     q31_t sum = 0;
 819
 820     sum =
 821       ((sum +
 822         clip_q31_to_q15((q31_t) ((short) (x >> 16) + (short) y))) << 16) +
 823       clip_q31_to_q15((q31_t) ((short) x - (short) (y >> 16)));
 824
 825     return sum;
 826   }
 827
 828   /*
 829    * @brief C custom defined SHASX for M3 and M0 processors
 830    */
 831   static __INLINE q31_t __SHASX(
 832   q31_t x,
 833   q31_t y)
 834   {
 835
 836     q31_t sum;
 837     q31_t r, s;
 838
 839     r = (short) x;
 840     s = (short) y;
 841
 842     r = ((r >> 1) - (y >> 17));
 843     s = (((x >> 17) + (s >> 1)) << 16);
 844
 845     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
 846
 847     return sum;
 848   }
 849
 850
 851   /*
 852    * @brief C custom defined QSAX for M3 and M0 processors
 853    */
 854   static __INLINE q31_t __QSAX(
 855   q31_t x,
 856   q31_t y)
 857   {
 858
 859     q31_t sum = 0;
 860
 861     sum =
 862       ((sum +
 863         clip_q31_to_q15((q31_t) ((short) (x >> 16) - (short) y))) << 16) +
 864       clip_q31_to_q15((q31_t) ((short) x + (short) (y >> 16)));
 865
 866     return sum;
 867   }
 868
 869   /*
 870    * @brief C custom defined SHSAX for M3 and M0 processors
 871    */
 872   static __INLINE q31_t __SHSAX(
 873   q31_t x,
 874   q31_t y)
 875   {
 876
 877     q31_t sum;
 878     q31_t r, s;
 879
 880     r = (short) x;
 881     s = (short) y;
 882
 883     r = ((r >> 1) + (y >> 17));
 884     s = (((x >> 17) - (s >> 1)) << 16);
 885
 886     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
 887
 888     return sum;
 889   }
 890
 891   /*
 892    * @brief C custom defined SMUSDX for M3 and M0 processors
 893    */
 894   static __INLINE q31_t __SMUSDX(
 895   q31_t x,
 896   q31_t y)
 897   {
 898
 899     return ((q31_t) (((short) x * (short) (y >> 16)) -
 900                      ((short) (x >> 16) * (short) y)));
 901   }
 902
 903   /*
 904    * @brief C custom defined SMUADX for M3 and M0 processors
 905    */
 906   static __INLINE q31_t __SMUADX(
 907   q31_t x,
 908   q31_t y)
 909   {
 910
 911     return ((q31_t) (((short) x * (short) (y >> 16)) +
 912                      ((short) (x >> 16) * (short) y)));
 913   }
 914
 915   /*
 916    * @brief C custom defined QADD for M3 and M0 processors
 917    */
 918   static __INLINE q31_t __QADD(
 919   q31_t x,
 920   q31_t y)
 921   {
 922     return clip_q63_to_q31((q63_t) x + y);
 923   }
 924
 925   /*
 926    * @brief C custom defined QSUB for M3 and M0 processors
 927    */
 928   static __INLINE q31_t __QSUB(
 929   q31_t x,
 930   q31_t y)
 931   {
 932     return clip_q63_to_q31((q63_t) x - y);
 933   }
 934
 935   /*
 936    * @brief C custom defined SMLAD for M3 and M0 processors
 937    */
 938   static __INLINE q31_t __SMLAD(
 939   q31_t x,
 940   q31_t y,
 941   q31_t sum)
 942   {
 943
 944     return (sum + ((short) (x >> 16) * (short) (y >> 16)) +
 945             ((short) x * (short) y));
 946   }
 947
 948   /*
 949    * @brief C custom defined SMLADX for M3 and M0 processors
 950    */
 951   static __INLINE q31_t __SMLADX(
 952   q31_t x,
 953   q31_t y,
 954   q31_t sum)
 955   {
 956
 957     return (sum + ((short) (x >> 16) * (short) (y)) +
 958             ((short) x * (short) (y >> 16)));
 959   }
 960
 961   /*
 962    * @brief C custom defined SMLSDX for M3 and M0 processors
 963    */
 964   static __INLINE q31_t __SMLSDX(
 965   q31_t x,
 966   q31_t y,
 967   q31_t sum)
 968   {
 969
 970     return (sum - ((short) (x >> 16) * (short) (y)) +
 971             ((short) x * (short) (y >> 16)));
 972   }
 973
 974   /*
 975    * @brief C custom defined SMLALD for M3 and M0 processors
 976    */
 977   static __INLINE q63_t __SMLALD(
 978   q31_t x,
 979   q31_t y,
 980   q63_t sum)
 981   {
 982
 983     return (sum + ((short) (x >> 16) * (short) (y >> 16)) +
 984             ((short) x * (short) y));
 985   }
 986
 987   /*
 988    * @brief C custom defined SMLALDX for M3 and M0 processors
 989    */
 990   static __INLINE q63_t __SMLALDX(
 991   q31_t x,
 992   q31_t y,
 993   q63_t sum)
 994   {
 995
 996     return (sum + ((short) (x >> 16) * (short) y)) +
 997       ((short) x * (short) (y >> 16));
 998   }
 999
1000   /*
1001    * @brief C custom defined SMUAD for M3 and M0 processors
1002    */
1003   static __INLINE q31_t __SMUAD(
1004   q31_t x,
1005   q31_t y)
1006   {
1007
1008     return (((x >> 16) * (y >> 16)) +
1009             (((x << 16) >> 16) * ((y << 16) >> 16)));
1010   }
1011
1012   /*
1013    * @brief C custom defined SMUSD for M3 and M0 processors
1014    */
1015   static __INLINE q31_t __SMUSD(
1016   q31_t x,
1017   q31_t y)
1018   {
1019
1020     return (-((x >> 16) * (y >> 16)) +
1021             (((x << 16) >> 16) * ((y << 16) >> 16)));
1022   }
1023
1024
1025   /*
1026    * @brief C custom defined SXTB16 for M3 and M0 processors
1027    */
1028   static __INLINE q31_t __SXTB16(
1029   q31_t x)
1030   {
1031
1032     return ((((x << 24) >> 24) & 0x0000FFFF) |
1033             (((x << 8) >> 8) & 0xFFFF0000));
1034   }
1035
1036
1037 #endif /* defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY) */
1038
1039
1040   /**
1041    * @brief Instance structure for the Q7 FIR filter.
1042    */
1043   typedef struct
1044   {
1045     uint16_t numTaps;        /**< number of filter coefficients in the filter. */
1046     q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1047     q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1048   } arm_fir_instance_q7;
1049
1050   /**
1051    * @brief Instance structure for the Q15 FIR filter.
1052    */
1053   typedef struct
1054   {
1055     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1056     q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1057     q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1058   } arm_fir_instance_q15;
1059
1060   /**
1061    * @brief Instance structure for the Q31 FIR filter.
1062    */
1063   typedef struct
1064   {
1065     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1066     q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1067     q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
1068   } arm_fir_instance_q31;
1069
1070   /**
1071    * @brief Instance structure for the floating-point FIR filter.
1072    */
1073   typedef struct
1074   {
1075     uint16_t numTaps;     /**< number of filter coefficients in the filter. */
1076     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1077     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
1078   } arm_fir_instance_f32;
1079
1080
1081   /**
1082    * @brief Processing function for the Q7 FIR filter.
1083    * @param[in] *S points to an instance of the Q7 FIR filter structure.
1084    * @param[in] *pSrc points to the block of input data.
1085    * @param[out] *pDst points to the block of output data.
1086    * @param[in] blockSize number of samples to process.
1087    * @return none.
1088    */
1089   void arm_fir_q7(
1090   const arm_fir_instance_q7 * S,
1091   q7_t * pSrc,
1092   q7_t * pDst,
1093   uint32_t blockSize);
1094
1095
1096   /**
1097    * @brief  Initialization function for the Q7 FIR filter.
1098    * @param[in,out] *S points to an instance of the Q7 FIR structure.
1099    * @param[in] numTaps  Number of filter coefficients in the filter.
1100    * @param[in] *pCoeffs points to the filter coefficients.
1101    * @param[in] *pState points to the state buffer.
1102    * @param[in] blockSize number of samples that are processed.
1103    * @return none
1104    */
1105   void arm_fir_init_q7(
1106   arm_fir_instance_q7 * S,
1107   uint16_t numTaps,
1108   q7_t * pCoeffs,
1109   q7_t * pState,
1110   uint32_t blockSize);
1111
1112
1113   /**
1114    * @brief Processing function for the Q15 FIR filter.
1115    * @param[in] *S points to an instance of the Q15 FIR structure.
1116    * @param[in] *pSrc points to the block of input data.
1117    * @param[out] *pDst points to the block of output data.
1118    * @param[in] blockSize number of samples to process.
1119    * @return none.
1120    */
1121   void arm_fir_q15(
1122   const arm_fir_instance_q15 * S,
1123   q15_t * pSrc,
1124   q15_t * pDst,
1125   uint32_t blockSize);
1126
1127   /**
1128    * @brief Processing function for the fast Q15 FIR filter for Cortex-M3 and Cortex-M4.
1129    * @param[in] *S points to an instance of the Q15 FIR filter structure.
1130    * @param[in] *pSrc points to the block of input data.
1131    * @param[out] *pDst points to the block of output data.
1132    * @param[in] blockSize number of samples to process.
1133    * @return none.
1134    */
1135   void arm_fir_fast_q15(
1136   const arm_fir_instance_q15 * S,
1137   q15_t * pSrc,
1138   q15_t * pDst,
1139   uint32_t blockSize);
1140
1141   /**
1142    * @brief  Initialization function for the Q15 FIR filter.
1143    * @param[in,out] *S points to an instance of the Q15 FIR filter structure.
1144    * @param[in] numTaps  Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
1145    * @param[in] *pCoeffs points to the filter coefficients.
1146    * @param[in] *pState points to the state buffer.
1147    * @param[in] blockSize number of samples that are processed at a time.
1148    * @return The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_ARGUMENT_ERROR if
1149    * <code>numTaps</code> is not a supported value.
1150    */
1151
1152   arm_status arm_fir_init_q15(
1153   arm_fir_instance_q15 * S,
1154   uint16_t numTaps,
1155   q15_t * pCoeffs,
1156   q15_t * pState,
1157   uint32_t blockSize);
1158
1159   /**
1160    * @brief Processing function for the Q31 FIR filter.
1161    * @param[in] *S points to an instance of the Q31 FIR filter structure.
1162    * @param[in] *pSrc points to the block of input data.
1163    * @param[out] *pDst points to the block of output data.
1164    * @param[in] blockSize number of samples to process.
1165    * @return none.
1166    */
1167   void arm_fir_q31(
1168   const arm_fir_instance_q31 * S,
1169   q31_t * pSrc,
1170   q31_t * pDst,
1171   uint32_t blockSize);
1172
1173   /**
1174    * @brief Processing function for the fast Q31 FIR filter for Cortex-M3 and Cortex-M4.
1175    * @param[in] *S points to an instance of the Q31 FIR structure.
1176    * @param[in] *pSrc points to the block of input data.
1177    * @param[out] *pDst points to the block of output data.
1178    * @param[in] blockSize number of samples to process.
1179    * @return none.
1180    */
1181   void arm_fir_fast_q31(
1182   const arm_fir_instance_q31 * S,
1183   q31_t * pSrc,
1184   q31_t * pDst,
1185   uint32_t blockSize);
1186
1187   /**
1188    * @brief  Initialization function for the Q31 FIR filter.
1189    * @param[in,out] *S points to an instance of the Q31 FIR structure.
1190    * @param[in]         numTaps  Number of filter coefficients in the filter.
1191    * @param[in]         *pCoeffs points to the filter coefficients.
1192    * @param[in]         *pState points to the state buffer.
1193    * @param[in]         blockSize number of samples that are processed at a time.
1194    * @return            none.
1195    */
1196   void arm_fir_init_q31(
1197   arm_fir_instance_q31 * S,
1198   uint16_t numTaps,
1199   q31_t * pCoeffs,
1200   q31_t * pState,
1201   uint32_t blockSize);
1202
1203   /**
1204    * @brief Processing function for the floating-point FIR filter.
1205    * @param[in] *S points to an instance of the floating-point FIR structure.
1206    * @param[in] *pSrc points to the block of input data.
1207    * @param[out] *pDst points to the block of output data.
1208    * @param[in] blockSize number of samples to process.
1209    * @return none.
1210    */
1211   void arm_fir_f32(
1212   const arm_fir_instance_f32 * S,
1213   float32_t * pSrc,
1214   float32_t * pDst,
1215   uint32_t blockSize);
1216
1217   /**
1218    * @brief  Initialization function for the floating-point FIR filter.
1219    * @param[in,out] *S points to an instance of the floating-point FIR filter structure.
1220    * @param[in]         numTaps  Number of filter coefficients in the filter.
1221    * @param[in]         *pCoeffs points to the filter coefficients.
1222    * @param[in]         *pState points to the state buffer.
1223    * @param[in]         blockSize number of samples that are processed at a time.
1224    * @return            none.
1225    */
1226   void arm_fir_init_f32(
1227   arm_fir_instance_f32 * S,
1228   uint16_t numTaps,
1229   float32_t * pCoeffs,
1230   float32_t * pState,
1231   uint32_t blockSize);
1232
1233
1234   /**
1235    * @brief Instance structure for the Q15 Biquad cascade filter.
1236    */
1237   typedef struct
1238   {
1239     int8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1240     q15_t *pState;            /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1241     q15_t *pCoeffs;           /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1242     int8_t postShift;         /**< Additional shift, in bits, applied to each output sample. */
1243
1244   } arm_biquad_casd_df1_inst_q15;
1245
1246
1247   /**
1248    * @brief Instance structure for the Q31 Biquad cascade filter.
1249    */
1250   typedef struct
1251   {
1252     uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1253     q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1254     q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1255     uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
1256
1257   } arm_biquad_casd_df1_inst_q31;
1258
1259   /**
1260    * @brief Instance structure for the floating-point Biquad cascade filter.
1261    */
1262   typedef struct
1263   {
1264     uint32_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1265     float32_t *pState;          /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1266     float32_t *pCoeffs;         /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1267
1268
1269   } arm_biquad_casd_df1_inst_f32;
1270
1271
1272
1273   /**
1274    * @brief Processing function for the Q15 Biquad cascade filter.
1275    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
1276    * @param[in]  *pSrc points to the block of input data.
1277    * @param[out] *pDst points to the block of output data.
1278    * @param[in]  blockSize number of samples to process.
1279    * @return     none.
1280    */
1281
1282   void arm_biquad_cascade_df1_q15(
1283   const arm_biquad_casd_df1_inst_q15 * S,
1284   q15_t * pSrc,
1285   q15_t * pDst,
1286   uint32_t blockSize);
1287
1288   /**
1289    * @brief  Initialization function for the Q15 Biquad cascade filter.
1290    * @param[in,out] *S           points to an instance of the Q15 Biquad cascade structure.
1291    * @param[in]     numStages    number of 2nd order stages in the filter.
1292    * @param[in]     *pCoeffs     points to the filter coefficients.
1293    * @param[in]     *pState      points to the state buffer.
1294    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
1295    * @return        none
1296    */
1297
1298   void arm_biquad_cascade_df1_init_q15(
1299   arm_biquad_casd_df1_inst_q15 * S,
1300   uint8_t numStages,
1301   q15_t * pCoeffs,
1302   q15_t * pState,
1303   int8_t postShift);
1304
1305
1306   /**
1307    * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1308    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
1309    * @param[in]  *pSrc points to the block of input data.
1310    * @param[out] *pDst points to the block of output data.
1311    * @param[in]  blockSize number of samples to process.
1312    * @return     none.
1313    */
1314
1315   void arm_biquad_cascade_df1_fast_q15(
1316   const arm_biquad_casd_df1_inst_q15 * S,
1317   q15_t * pSrc,
1318   q15_t * pDst,
1319   uint32_t blockSize);
1320
1321
1322   /**
1323    * @brief Processing function for the Q31 Biquad cascade filter
1324    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
1325    * @param[in]  *pSrc      points to the block of input data.
1326    * @param[out] *pDst      points to the block of output data.
1327    * @param[in]  blockSize  number of samples to process.
1328    * @return     none.
1329    */
1330
1331   void arm_biquad_cascade_df1_q31(
1332   const arm_biquad_casd_df1_inst_q31 * S,
1333   q31_t * pSrc,
1334   q31_t * pDst,
1335   uint32_t blockSize);
1336
1337   /**
1338    * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1339    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
1340    * @param[in]  *pSrc      points to the block of input data.
1341    * @param[out] *pDst      points to the block of output data.
1342    * @param[in]  blockSize  number of samples to process.
1343    * @return     none.
1344    */
1345
1346   void arm_biquad_cascade_df1_fast_q31(
1347   const arm_biquad_casd_df1_inst_q31 * S,
1348   q31_t * pSrc,
1349   q31_t * pDst,
1350   uint32_t blockSize);
1351
1352   /**
1353    * @brief  Initialization function for the Q31 Biquad cascade filter.
1354    * @param[in,out] *S           points to an instance of the Q31 Biquad cascade structure.
1355    * @param[in]     numStages      number of 2nd order stages in the filter.
1356    * @param[in]     *pCoeffs     points to the filter coefficients.
1357    * @param[in]     *pState      points to the state buffer.
1358    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
1359    * @return        none
1360    */
1361
1362   void arm_biquad_cascade_df1_init_q31(
1363   arm_biquad_casd_df1_inst_q31 * S,
1364   uint8_t numStages,
1365   q31_t * pCoeffs,
1366   q31_t * pState,
1367   int8_t postShift);
1368
1369   /**
1370    * @brief Processing function for the floating-point Biquad cascade filter.
1371    * @param[in]  *S         points to an instance of the floating-point Biquad cascade structure.
1372    * @param[in]  *pSrc      points to the block of input data.
1373    * @param[out] *pDst      points to the block of output data.
1374    * @param[in]  blockSize  number of samples to process.
1375    * @return     none.
1376    */
1377
1378   void arm_biquad_cascade_df1_f32(
1379   const arm_biquad_casd_df1_inst_f32 * S,
1380   float32_t * pSrc,
1381   float32_t * pDst,
1382   uint32_t blockSize);
1383
1384   /**
1385    * @brief  Initialization function for the floating-point Biquad cascade filter.
1386    * @param[in,out] *S           points to an instance of the floating-point Biquad cascade structure.
1387    * @param[in]     numStages    number of 2nd order stages in the filter.
1388    * @param[in]     *pCoeffs     points to the filter coefficients.
1389    * @param[in]     *pState      points to the state buffer.
1390    * @return        none
1391    */
1392
1393   void arm_biquad_cascade_df1_init_f32(
1394   arm_biquad_casd_df1_inst_f32 * S,
1395   uint8_t numStages,
1396   float32_t * pCoeffs,
1397   float32_t * pState);
1398
1399
1400   /**
1401    * @brief Instance structure for the floating-point matrix structure.
1402    */
1403
1404   typedef struct
1405   {
1406     uint16_t numRows;     /**< number of rows of the matrix.     */
1407     uint16_t numCols;     /**< number of columns of the matrix.  */
1408     float32_t *pData;     /**< points to the data of the matrix. */
1409   } arm_matrix_instance_f32;
1410
1411   /**
1412    * @brief Instance structure for the Q15 matrix structure.
1413    */
1414
1415   typedef struct
1416   {
1417     uint16_t numRows;     /**< number of rows of the matrix.     */
1418     uint16_t numCols;     /**< number of columns of the matrix.  */
1419     q15_t *pData;         /**< points to the data of the matrix. */
1420
1421   } arm_matrix_instance_q15;
1422
1423   /**
1424    * @brief Instance structure for the Q31 matrix structure.
1425    */
1426
1427   typedef struct
1428   {
1429     uint16_t numRows;     /**< number of rows of the matrix.     */
1430     uint16_t numCols;     /**< number of columns of the matrix.  */
1431     q31_t *pData;         /**< points to the data of the matrix. */
1432
1433   } arm_matrix_instance_q31;
1434
1435
1436
1437   /**
1438    * @brief Floating-point matrix addition.
1439    * @param[in]       *pSrcA points to the first input matrix structure
1440    * @param[in]       *pSrcB points to the second input matrix structure
1441    * @param[out]      *pDst points to output matrix structure
1442    * @return     The function returns either
1443    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1444    */
1445
1446   arm_status arm_mat_add_f32(
1447   const arm_matrix_instance_f32 * pSrcA,
1448   const arm_matrix_instance_f32 * pSrcB,
1449   arm_matrix_instance_f32 * pDst);
1450
1451   /**
1452    * @brief Q15 matrix addition.
1453    * @param[in]       *pSrcA points to the first input matrix structure
1454    * @param[in]       *pSrcB points to the second input matrix structure
1455    * @param[out]      *pDst points to output matrix structure
1456    * @return     The function returns either
1457    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1458    */
1459
1460   arm_status arm_mat_add_q15(
1461   const arm_matrix_instance_q15 * pSrcA,
1462   const arm_matrix_instance_q15 * pSrcB,
1463   arm_matrix_instance_q15 * pDst);
1464
1465   /**
1466    * @brief Q31 matrix addition.
1467    * @param[in]       *pSrcA points to the first input matrix structure
1468    * @param[in]       *pSrcB points to the second input matrix structure
1469    * @param[out]      *pDst points to output matrix structure
1470    * @return     The function returns either
1471    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1472    */
1473
1474   arm_status arm_mat_add_q31(
1475   const arm_matrix_instance_q31 * pSrcA,
1476   const arm_matrix_instance_q31 * pSrcB,
1477   arm_matrix_instance_q31 * pDst);
1478
1479
1480   /**
1481    * @brief Floating-point matrix transpose.
1482    * @param[in]  *pSrc points to the input matrix
1483    * @param[out] *pDst points to the output matrix
1484    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1485    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1486    */
1487
1488   arm_status arm_mat_trans_f32(
1489   const arm_matrix_instance_f32 * pSrc,
1490   arm_matrix_instance_f32 * pDst);
1491
1492
1493   /**
1494    * @brief Q15 matrix transpose.
1495    * @param[in]  *pSrc points to the input matrix
1496    * @param[out] *pDst points to the output matrix
1497    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1498    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1499    */
1500
1501   arm_status arm_mat_trans_q15(
1502   const arm_matrix_instance_q15 * pSrc,
1503   arm_matrix_instance_q15 * pDst);
1504
1505   /**
1506    * @brief Q31 matrix transpose.
1507    * @param[in]  *pSrc points to the input matrix
1508    * @param[out] *pDst points to the output matrix
1509    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1510    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1511    */
1512
1513   arm_status arm_mat_trans_q31(
1514   const arm_matrix_instance_q31 * pSrc,
1515   arm_matrix_instance_q31 * pDst);
1516
1517
1518   /**
1519    * @brief Floating-point matrix multiplication
1520    * @param[in]       *pSrcA points to the first input matrix structure
1521    * @param[in]       *pSrcB points to the second input matrix structure
1522    * @param[out]      *pDst points to output matrix structure
1523    * @return     The function returns either
1524    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1525    */
1526
1527   arm_status arm_mat_mult_f32(
1528   const arm_matrix_instance_f32 * pSrcA,
1529   const arm_matrix_instance_f32 * pSrcB,
1530   arm_matrix_instance_f32 * pDst);
1531
1532   /**
1533    * @brief Q15 matrix multiplication
1534    * @param[in]       *pSrcA points to the first input matrix structure
1535    * @param[in]       *pSrcB points to the second input matrix structure
1536    * @param[out]      *pDst points to output matrix structure
1537    * @param[in]           *pState points to the array for storing intermediate results
1538    * @return     The function returns either
1539    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1540    */
1541
1542   arm_status arm_mat_mult_q15(
1543   const arm_matrix_instance_q15 * pSrcA,
1544   const arm_matrix_instance_q15 * pSrcB,
1545   arm_matrix_instance_q15 * pDst,
1546   q15_t * pState);
1547
1548   /**
1549    * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1550    * @param[in]       *pSrcA  points to the first input matrix structure
1551    * @param[in]       *pSrcB  points to the second input matrix structure
1552    * @param[out]      *pDst   points to output matrix structure
1553    * @param[in]           *pState points to the array for storing intermediate results
1554    * @return     The function returns either
1555    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1556    */
1557
1558   arm_status arm_mat_mult_fast_q15(
1559   const arm_matrix_instance_q15 * pSrcA,
1560   const arm_matrix_instance_q15 * pSrcB,
1561   arm_matrix_instance_q15 * pDst,
1562   q15_t * pState);
1563
1564   /**
1565    * @brief Q31 matrix multiplication
1566    * @param[in]       *pSrcA points to the first input matrix structure
1567    * @param[in]       *pSrcB points to the second input matrix structure
1568    * @param[out]      *pDst points to output matrix structure
1569    * @return     The function returns either
1570    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1571    */
1572
1573   arm_status arm_mat_mult_q31(
1574   const arm_matrix_instance_q31 * pSrcA,
1575   const arm_matrix_instance_q31 * pSrcB,
1576   arm_matrix_instance_q31 * pDst);
1577
1578   /**
1579    * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1580    * @param[in]       *pSrcA points to the first input matrix structure
1581    * @param[in]       *pSrcB points to the second input matrix structure
1582    * @param[out]      *pDst points to output matrix structure
1583    * @return     The function returns either
1584    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1585    */
1586
1587   arm_status arm_mat_mult_fast_q31(
1588   const arm_matrix_instance_q31 * pSrcA,
1589   const arm_matrix_instance_q31 * pSrcB,
1590   arm_matrix_instance_q31 * pDst);
1591
1592
1593   /**
1594    * @brief Floating-point matrix subtraction
1595    * @param[in]       *pSrcA points to the first input matrix structure
1596    * @param[in]       *pSrcB points to the second input matrix structure
1597    * @param[out]      *pDst points to output matrix structure
1598    * @return     The function returns either
1599    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1600    */
1601
1602   arm_status arm_mat_sub_f32(
1603   const arm_matrix_instance_f32 * pSrcA,
1604   const arm_matrix_instance_f32 * pSrcB,
1605   arm_matrix_instance_f32 * pDst);
1606
1607   /**
1608    * @brief Q15 matrix subtraction
1609    * @param[in]       *pSrcA points to the first input matrix structure
1610    * @param[in]       *pSrcB points to the second input matrix structure
1611    * @param[out]      *pDst points to output matrix structure
1612    * @return     The function returns either
1613    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1614    */
1615
1616   arm_status arm_mat_sub_q15(
1617   const arm_matrix_instance_q15 * pSrcA,
1618   const arm_matrix_instance_q15 * pSrcB,
1619   arm_matrix_instance_q15 * pDst);
1620
1621   /**
1622    * @brief Q31 matrix subtraction
1623    * @param[in]       *pSrcA points to the first input matrix structure
1624    * @param[in]       *pSrcB points to the second input matrix structure
1625    * @param[out]      *pDst points to output matrix structure
1626    * @return     The function returns either
1627    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1628    */
1629
1630   arm_status arm_mat_sub_q31(
1631   const arm_matrix_instance_q31 * pSrcA,
1632   const arm_matrix_instance_q31 * pSrcB,
1633   arm_matrix_instance_q31 * pDst);
1634
1635   /**
1636    * @brief Floating-point matrix scaling.
1637    * @param[in]  *pSrc points to the input matrix
1638    * @param[in]  scale scale factor
1639    * @param[out] *pDst points to the output matrix
1640    * @return     The function returns either
1641    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1642    */
1643
1644   arm_status arm_mat_scale_f32(
1645   const arm_matrix_instance_f32 * pSrc,
1646   float32_t scale,
1647   arm_matrix_instance_f32 * pDst);
1648
1649   /**
1650    * @brief Q15 matrix scaling.
1651    * @param[in]       *pSrc points to input matrix
1652    * @param[in]       scaleFract fractional portion of the scale factor
1653    * @param[in]       shift number of bits to shift the result by
1654    * @param[out]      *pDst points to output matrix
1655    * @return     The function returns either
1656    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1657    */
1658
1659   arm_status arm_mat_scale_q15(
1660   const arm_matrix_instance_q15 * pSrc,
1661   q15_t scaleFract,
1662   int32_t shift,
1663   arm_matrix_instance_q15 * pDst);
1664
1665   /**
1666    * @brief Q31 matrix scaling.
1667    * @param[in]       *pSrc points to input matrix
1668    * @param[in]       scaleFract fractional portion of the scale factor
1669    * @param[in]       shift number of bits to shift the result by
1670    * @param[out]      *pDst points to output matrix structure
1671    * @return     The function returns either
1672    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1673    */
1674
1675   arm_status arm_mat_scale_q31(
1676   const arm_matrix_instance_q31 * pSrc,
1677   q31_t scaleFract,
1678   int32_t shift,
1679   arm_matrix_instance_q31 * pDst);
1680
1681
1682   /**
1683    * @brief  Q31 matrix initialization.
1684    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1685    * @param[in]     nRows          number of rows in the matrix.
1686    * @param[in]     nColumns       number of columns in the matrix.
1687    * @param[in]     *pData             points to the matrix data array.
1688    * @return        none
1689    */
1690
1691   void arm_mat_init_q31(
1692   arm_matrix_instance_q31 * S,
1693   uint16_t nRows,
1694   uint16_t nColumns,
1695   q31_t * pData);
1696
1697   /**
1698    * @brief  Q15 matrix initialization.
1699    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1700    * @param[in]     nRows          number of rows in the matrix.
1701    * @param[in]     nColumns       number of columns in the matrix.
1702    * @param[in]     *pData             points to the matrix data array.
1703    * @return        none
1704    */
1705
1706   void arm_mat_init_q15(
1707   arm_matrix_instance_q15 * S,
1708   uint16_t nRows,
1709   uint16_t nColumns,
1710   q15_t * pData);
1711
1712   /**
1713    * @brief  Floating-point matrix initialization.
1714    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1715    * @param[in]     nRows          number of rows in the matrix.
1716    * @param[in]     nColumns       number of columns in the matrix.
1717    * @param[in]     *pData             points to the matrix data array.
1718    * @return        none
1719    */
1720
1721   void arm_mat_init_f32(
1722   arm_matrix_instance_f32 * S,
1723   uint16_t nRows,
1724   uint16_t nColumns,
1725   float32_t * pData);
1726
1727
1728
1729   /**
1730    * @brief Instance structure for the Q15 PID Control.
1731    */
1732   typedef struct
1733   {
1734     q15_t A0;    /**< The derived gain, A0 = Kp + Ki + Kd . */
1735 #ifdef ARM_MATH_CM0_FAMILY
1736     q15_t A1;
1737     q15_t A2;
1738 #else
1739     q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
1740 #endif
1741     q15_t state[3];       /**< The state array of length 3. */
1742     q15_t Kp;           /**< The proportional gain. */
1743     q15_t Ki;           /**< The integral gain. */
1744     q15_t Kd;           /**< The derivative gain. */
1745   } arm_pid_instance_q15;
1746
1747   /**
1748    * @brief Instance structure for the Q31 PID Control.
1749    */
1750   typedef struct
1751   {
1752     q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
1753     q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
1754     q31_t A2;            /**< The derived gain, A2 = Kd . */
1755     q31_t state[3];      /**< The state array of length 3. */
1756     q31_t Kp;            /**< The proportional gain. */
1757     q31_t Ki;            /**< The integral gain. */
1758     q31_t Kd;            /**< The derivative gain. */
1759
1760   } arm_pid_instance_q31;
1761
1762   /**
1763    * @brief Instance structure for the floating-point PID Control.
1764    */
1765   typedef struct
1766   {
1767     float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
1768     float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
1769     float32_t A2;          /**< The derived gain, A2 = Kd . */
1770     float32_t state[3];    /**< The state array of length 3. */
1771     float32_t Kp;               /**< The proportional gain. */
1772     float32_t Ki;               /**< The integral gain. */
1773     float32_t Kd;               /**< The derivative gain. */
1774   } arm_pid_instance_f32;
1775
1776
1777
1778   /**
1779    * @brief  Initialization function for the floating-point PID Control.
1780    * @param[in,out] *S      points to an instance of the PID structure.
1781    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1782    * @return none.
1783    */
1784   void arm_pid_init_f32(
1785   arm_pid_instance_f32 * S,
1786   int32_t resetStateFlag);
1787
1788   /**
1789    * @brief  Reset function for the floating-point PID Control.
1790    * @param[in,out] *S is an instance of the floating-point PID Control structure
1791    * @return none
1792    */
1793   void arm_pid_reset_f32(
1794   arm_pid_instance_f32 * S);
1795
1796
1797   /**
1798    * @brief  Initialization function for the Q31 PID Control.
1799    * @param[in,out] *S points to an instance of the Q15 PID structure.
1800    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1801    * @return none.
1802    */
1803   void arm_pid_init_q31(
1804   arm_pid_instance_q31 * S,
1805   int32_t resetStateFlag);
1806
1807
1808   /**
1809    * @brief  Reset function for the Q31 PID Control.
1810    * @param[in,out] *S points to an instance of the Q31 PID Control structure
1811    * @return none
1812    */
1813
1814   void arm_pid_reset_q31(
1815   arm_pid_instance_q31 * S);
1816
1817   /**
1818    * @brief  Initialization function for the Q15 PID Control.
1819    * @param[in,out] *S points to an instance of the Q15 PID structure.
1820    * @param[in] resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1821    * @return none.
1822    */
1823   void arm_pid_init_q15(
1824   arm_pid_instance_q15 * S,
1825   int32_t resetStateFlag);
1826
1827   /**
1828    * @brief  Reset function for the Q15 PID Control.
1829    * @param[in,out] *S points to an instance of the q15 PID Control structure
1830    * @return none
1831    */
1832   void arm_pid_reset_q15(
1833   arm_pid_instance_q15 * S);
1834
1835
1836   /**
1837    * @brief Instance structure for the floating-point Linear Interpolate function.
1838    */
1839   typedef struct
1840   {
1841     uint32_t nValues;           /**< nValues */
1842     float32_t x1;               /**< x1 */
1843     float32_t xSpacing;         /**< xSpacing */
1844     float32_t *pYData;          /**< pointer to the table of Y values */
1845   } arm_linear_interp_instance_f32;
1846
1847   /**
1848    * @brief Instance structure for the floating-point bilinear interpolation function.
1849    */
1850
1851   typedef struct
1852   {
1853     uint16_t numRows;   /**< number of rows in the data table. */
1854     uint16_t numCols;   /**< number of columns in the data table. */
1855     float32_t *pData;   /**< points to the data table. */
1856   } arm_bilinear_interp_instance_f32;
1857
1858    /**
1859    * @brief Instance structure for the Q31 bilinear interpolation function.
1860    */
1861
1862   typedef struct
1863   {
1864     uint16_t numRows;   /**< number of rows in the data table. */
1865     uint16_t numCols;   /**< number of columns in the data table. */
1866     q31_t *pData;       /**< points to the data table. */
1867   } arm_bilinear_interp_instance_q31;
1868
1869    /**
1870    * @brief Instance structure for the Q15 bilinear interpolation function.
1871    */
1872
1873   typedef struct
1874   {
1875     uint16_t numRows;   /**< number of rows in the data table. */
1876     uint16_t numCols;   /**< number of columns in the data table. */
1877     q15_t *pData;       /**< points to the data table. */
1878   } arm_bilinear_interp_instance_q15;
1879
1880    /**
1881    * @brief Instance structure for the Q15 bilinear interpolation function.
1882    */
1883
1884   typedef struct
1885   {
1886     uint16_t numRows;   /**< number of rows in the data table. */
1887     uint16_t numCols;   /**< number of columns in the data table. */
1888     q7_t *pData;                /**< points to the data table. */
1889   } arm_bilinear_interp_instance_q7;
1890
1891
1892   /**
1893    * @brief Q7 vector multiplication.
1894    * @param[in]       *pSrcA points to the first input vector
1895    * @param[in]       *pSrcB points to the second input vector
1896    * @param[out]      *pDst  points to the output vector
1897    * @param[in]       blockSize number of samples in each vector
1898    * @return none.
1899    */
1900
1901   void arm_mult_q7(
1902   q7_t * pSrcA,
1903   q7_t * pSrcB,
1904   q7_t * pDst,
1905   uint32_t blockSize);
1906
1907   /**
1908    * @brief Q15 vector multiplication.
1909    * @param[in]       *pSrcA points to the first input vector
1910    * @param[in]       *pSrcB points to the second input vector
1911    * @param[out]      *pDst  points to the output vector
1912    * @param[in]       blockSize number of samples in each vector
1913    * @return none.
1914    */
1915
1916   void arm_mult_q15(
1917   q15_t * pSrcA,
1918   q15_t * pSrcB,
1919   q15_t * pDst,
1920   uint32_t blockSize);
1921
1922   /**
1923    * @brief Q31 vector multiplication.
1924    * @param[in]       *pSrcA points to the first input vector
1925    * @param[in]       *pSrcB points to the second input vector
1926    * @param[out]      *pDst points to the output vector
1927    * @param[in]       blockSize number of samples in each vector
1928    * @return none.
1929    */
1930
1931   void arm_mult_q31(
1932   q31_t * pSrcA,
1933   q31_t * pSrcB,
1934   q31_t * pDst,
1935   uint32_t blockSize);
1936
1937   /**
1938    * @brief Floating-point vector multiplication.
1939    * @param[in]       *pSrcA points to the first input vector
1940    * @param[in]       *pSrcB points to the second input vector
1941    * @param[out]      *pDst points to the output vector
1942    * @param[in]       blockSize number of samples in each vector
1943    * @return none.
1944    */
1945
1946   void arm_mult_f32(
1947   float32_t * pSrcA,
1948   float32_t * pSrcB,
1949   float32_t * pDst,
1950   uint32_t blockSize);
1951
1952
1953
1954
1955
1956
1957   /**
1958    * @brief Instance structure for the Q15 CFFT/CIFFT function.
1959    */
1960
1961   typedef struct
1962   {
1963     uint16_t fftLen;                 /**< length of the FFT. */
1964     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1965     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1966     q15_t *pTwiddle;                     /**< points to the Sin twiddle factor table. */
1967     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
1968     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1969     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1970   } arm_cfft_radix2_instance_q15;
1971
1972   arm_status arm_cfft_radix2_init_q15(
1973   arm_cfft_radix2_instance_q15 * S,
1974   uint16_t fftLen,
1975   uint8_t ifftFlag,
1976   uint8_t bitReverseFlag);
1977
1978   void arm_cfft_radix2_q15(
1979   const arm_cfft_radix2_instance_q15 * S,
1980   q15_t * pSrc);
1981
1982
1983
1984   /**
1985    * @brief Instance structure for the Q15 CFFT/CIFFT function.
1986    */
1987
1988   typedef struct
1989   {
1990     uint16_t fftLen;                 /**< length of the FFT. */
1991     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1992     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1993     q15_t *pTwiddle;                 /**< points to the twiddle factor table. */
1994     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
1995     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1996     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1997   } arm_cfft_radix4_instance_q15;
1998
1999   arm_status arm_cfft_radix4_init_q15(
2000   arm_cfft_radix4_instance_q15 * S,
2001   uint16_t fftLen,
2002   uint8_t ifftFlag,
2003   uint8_t bitReverseFlag);
2004
2005   void arm_cfft_radix4_q15(
2006   const arm_cfft_radix4_instance_q15 * S,
2007   q15_t * pSrc);
2008
2009   /**
2010    * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
2011    */
2012
2013   typedef struct
2014   {
2015     uint16_t fftLen;                 /**< length of the FFT. */
2016     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2017     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2018     q31_t *pTwiddle;                     /**< points to the Twiddle factor table. */
2019     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2020     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2021     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2022   } arm_cfft_radix2_instance_q31;
2023
2024   arm_status arm_cfft_radix2_init_q31(
2025   arm_cfft_radix2_instance_q31 * S,
2026   uint16_t fftLen,
2027   uint8_t ifftFlag,
2028   uint8_t bitReverseFlag);
2029
2030   void arm_cfft_radix2_q31(
2031   const arm_cfft_radix2_instance_q31 * S,
2032   q31_t * pSrc);
2033
2034   /**
2035    * @brief Instance structure for the Q31 CFFT/CIFFT function.
2036    */
2037
2038   typedef struct
2039   {
2040     uint16_t fftLen;                 /**< length of the FFT. */
2041     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2042     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2043     q31_t *pTwiddle;                 /**< points to the twiddle factor table. */
2044     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2045     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2046     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2047   } arm_cfft_radix4_instance_q31;
2048
2049
2050   void arm_cfft_radix4_q31(
2051   const arm_cfft_radix4_instance_q31 * S,
2052   q31_t * pSrc);
2053
2054   arm_status arm_cfft_radix4_init_q31(
2055   arm_cfft_radix4_instance_q31 * S,
2056   uint16_t fftLen,
2057   uint8_t ifftFlag,
2058   uint8_t bitReverseFlag);
2059
2060   /**
2061    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2062    */
2063
2064   typedef struct
2065   {
2066     uint16_t fftLen;                   /**< length of the FFT. */
2067     uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2068     uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2069     float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
2070     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
2071     uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2072     uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2073     float32_t onebyfftLen;                 /**< value of 1/fftLen. */
2074   } arm_cfft_radix2_instance_f32;
2075
2076 /* Deprecated */
2077   arm_status arm_cfft_radix2_init_f32(
2078   arm_cfft_radix2_instance_f32 * S,
2079   uint16_t fftLen,
2080   uint8_t ifftFlag,
2081   uint8_t bitReverseFlag);
2082
2083 /* Deprecated */
2084   void arm_cfft_radix2_f32(
2085   const arm_cfft_radix2_instance_f32 * S,
2086   float32_t * pSrc);
2087
2088   /**
2089    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2090    */
2091
2092   typedef struct
2093   {
2094     uint16_t fftLen;                   /**< length of the FFT. */
2095     uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2096     uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2097     float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
2098     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
2099     uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2100     uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2101     float32_t onebyfftLen;                 /**< value of 1/fftLen. */
2102   } arm_cfft_radix4_instance_f32;
2103
2104 /* Deprecated */
2105   arm_status arm_cfft_radix4_init_f32(
2106   arm_cfft_radix4_instance_f32 * S,
2107   uint16_t fftLen,
2108   uint8_t ifftFlag,
2109   uint8_t bitReverseFlag);
2110
2111 /* Deprecated */
2112   void arm_cfft_radix4_f32(
2113   const arm_cfft_radix4_instance_f32 * S,
2114   float32_t * pSrc);
2115
2116   /**
2117    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2118    */
2119
2120   typedef struct
2121   {
2122     uint16_t fftLen;                   /**< length of the FFT. */
2123     const float32_t *pTwiddle;         /**< points to the Twiddle factor table. */
2124     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2125     uint16_t bitRevLength;             /**< bit reversal table length. */
2126   } arm_cfft_instance_f32;
2127
2128   void arm_cfft_f32(
2129   const arm_cfft_instance_f32 * S,
2130   float32_t * p1,
2131   uint8_t ifftFlag,
2132   uint8_t bitReverseFlag);
2133
2134   /**
2135    * @brief Instance structure for the Q15 RFFT/RIFFT function.
2136    */
2137
2138   typedef struct
2139   {
2140     uint32_t fftLenReal;                      /**< length of the real FFT. */
2141     uint32_t fftLenBy2;                       /**< length of the complex FFT. */
2142     uint8_t ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2143     uint8_t bitReverseFlagR;                      /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2144     uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2145     q15_t *pTwiddleAReal;                     /**< points to the real twiddle factor table. */
2146     q15_t *pTwiddleBReal;                     /**< points to the imag twiddle factor table. */
2147     arm_cfft_radix4_instance_q15 *pCfft;          /**< points to the complex FFT instance. */
2148   } arm_rfft_instance_q15;
2149
2150   arm_status arm_rfft_init_q15(
2151   arm_rfft_instance_q15 * S,
2152   arm_cfft_radix4_instance_q15 * S_CFFT,
2153   uint32_t fftLenReal,
2154   uint32_t ifftFlagR,
2155   uint32_t bitReverseFlag);
2156
2157   void arm_rfft_q15(
2158   const arm_rfft_instance_q15 * S,
2159   q15_t * pSrc,
2160   q15_t * pDst);
2161
2162   /**
2163    * @brief Instance structure for the Q31 RFFT/RIFFT function.
2164    */
2165
2166   typedef struct
2167   {
2168     uint32_t fftLenReal;                        /**< length of the real FFT. */
2169     uint32_t fftLenBy2;                         /**< length of the complex FFT. */
2170     uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2171     uint8_t bitReverseFlagR;                        /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2172     uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2173     q31_t *pTwiddleAReal;                       /**< points to the real twiddle factor table. */
2174     q31_t *pTwiddleBReal;                       /**< points to the imag twiddle factor table. */
2175     arm_cfft_radix4_instance_q31 *pCfft;        /**< points to the complex FFT instance. */
2176   } arm_rfft_instance_q31;
2177
2178   arm_status arm_rfft_init_q31(
2179   arm_rfft_instance_q31 * S,
2180   arm_cfft_radix4_instance_q31 * S_CFFT,
2181   uint32_t fftLenReal,
2182   uint32_t ifftFlagR,
2183   uint32_t bitReverseFlag);
2184
2185   void arm_rfft_q31(
2186   const arm_rfft_instance_q31 * S,
2187   q31_t * pSrc,
2188   q31_t * pDst);
2189
2190   /**
2191    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2192    */
2193
2194   typedef struct
2195   {
2196     uint32_t fftLenReal;                        /**< length of the real FFT. */
2197     uint16_t fftLenBy2;                         /**< length of the complex FFT. */
2198     uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2199     uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2200     uint32_t twidCoefRModifier;                     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2201     float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
2202     float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
2203     arm_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
2204   } arm_rfft_instance_f32;
2205
2206   arm_status arm_rfft_init_f32(
2207   arm_rfft_instance_f32 * S,
2208   arm_cfft_radix4_instance_f32 * S_CFFT,
2209   uint32_t fftLenReal,
2210   uint32_t ifftFlagR,
2211   uint32_t bitReverseFlag);
2212
2213   void arm_rfft_f32(
2214   const arm_rfft_instance_f32 * S,
2215   float32_t * pSrc,
2216   float32_t * pDst);
2217
2218   /**
2219    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2220    */
2221
2222 typedef struct
2223   {
2224     arm_cfft_instance_f32 Sint;      /**< Internal CFFT structure. */
2225     uint16_t fftLenRFFT;                        /**< length of the real sequence */
2226         float32_t * pTwiddleRFFT;                                       /**< Twiddle factors real stage  */
2227   } arm_rfft_fast_instance_f32 ;
2228
2229 arm_status arm_rfft_fast_init_f32 (
2230         arm_rfft_fast_instance_f32 * S,
2231         uint16_t fftLen);
2232
2233 void arm_rfft_fast_f32(
2234   arm_rfft_fast_instance_f32 * S,
2235   float32_t * p, float32_t * pOut,
2236   uint8_t ifftFlag);
2237
2238   /**
2239    * @brief Instance structure for the floating-point DCT4/IDCT4 function.
2240    */
2241
2242   typedef struct
2243   {
2244     uint16_t N;                         /**< length of the DCT4. */
2245     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2246     float32_t normalize;                /**< normalizing factor. */
2247     float32_t *pTwiddle;                /**< points to the twiddle factor table. */
2248     float32_t *pCosFactor;              /**< points to the cosFactor table. */
2249     arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
2250     arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
2251   } arm_dct4_instance_f32;
2252
2253   /**
2254    * @brief  Initialization function for the floating-point DCT4/IDCT4.
2255    * @param[in,out] *S         points to an instance of floating-point DCT4/IDCT4 structure.
2256    * @param[in]     *S_RFFT    points to an instance of floating-point RFFT/RIFFT structure.
2257    * @param[in]     *S_CFFT    points to an instance of floating-point CFFT/CIFFT structure.
2258    * @param[in]     N          length of the DCT4.
2259    * @param[in]     Nby2       half of the length of the DCT4.
2260    * @param[in]     normalize  normalizing factor.
2261    * @return            arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
2262    */
2263
2264   arm_status arm_dct4_init_f32(
2265   arm_dct4_instance_f32 * S,
2266   arm_rfft_instance_f32 * S_RFFT,
2267   arm_cfft_radix4_instance_f32 * S_CFFT,
2268   uint16_t N,
2269   uint16_t Nby2,
2270   float32_t normalize);
2271
2272   /**
2273    * @brief Processing function for the floating-point DCT4/IDCT4.
2274    * @param[in]       *S             points to an instance of the floating-point DCT4/IDCT4 structure.
2275    * @param[in]       *pState        points to state buffer.
2276    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2277    * @return none.
2278    */
2279
2280   void arm_dct4_f32(
2281   const arm_dct4_instance_f32 * S,
2282   float32_t * pState,
2283   float32_t * pInlineBuffer);
2284
2285   /**
2286    * @brief Instance structure for the Q31 DCT4/IDCT4 function.
2287    */
2288
2289   typedef struct
2290   {
2291     uint16_t N;                         /**< length of the DCT4. */
2292     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2293     q31_t normalize;                    /**< normalizing factor. */
2294     q31_t *pTwiddle;                    /**< points to the twiddle factor table. */
2295     q31_t *pCosFactor;                  /**< points to the cosFactor table. */
2296     arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
2297     arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
2298   } arm_dct4_instance_q31;
2299
2300   /**
2301    * @brief  Initialization function for the Q31 DCT4/IDCT4.
2302    * @param[in,out] *S         points to an instance of Q31 DCT4/IDCT4 structure.
2303    * @param[in]     *S_RFFT    points to an instance of Q31 RFFT/RIFFT structure
2304    * @param[in]     *S_CFFT    points to an instance of Q31 CFFT/CIFFT structure
2305    * @param[in]     N          length of the DCT4.
2306    * @param[in]     Nby2       half of the length of the DCT4.
2307    * @param[in]     normalize  normalizing factor.
2308    * @return            arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2309    */
2310
2311   arm_status arm_dct4_init_q31(
2312   arm_dct4_instance_q31 * S,
2313   arm_rfft_instance_q31 * S_RFFT,
2314   arm_cfft_radix4_instance_q31 * S_CFFT,
2315   uint16_t N,
2316   uint16_t Nby2,
2317   q31_t normalize);
2318
2319   /**
2320    * @brief Processing function for the Q31 DCT4/IDCT4.
2321    * @param[in]       *S             points to an instance of the Q31 DCT4 structure.
2322    * @param[in]       *pState        points to state buffer.
2323    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2324    * @return none.
2325    */
2326
2327   void arm_dct4_q31(
2328   const arm_dct4_instance_q31 * S,
2329   q31_t * pState,
2330   q31_t * pInlineBuffer);
2331
2332   /**
2333    * @brief Instance structure for the Q15 DCT4/IDCT4 function.
2334    */
2335
2336   typedef struct
2337   {
2338     uint16_t N;                         /**< length of the DCT4. */
2339     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2340     q15_t normalize;                    /**< normalizing factor. */
2341     q15_t *pTwiddle;                    /**< points to the twiddle factor table. */
2342     q15_t *pCosFactor;                  /**< points to the cosFactor table. */
2343     arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
2344     arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
2345   } arm_dct4_instance_q15;
2346
2347   /**
2348    * @brief  Initialization function for the Q15 DCT4/IDCT4.
2349    * @param[in,out] *S         points to an instance of Q15 DCT4/IDCT4 structure.
2350    * @param[in]     *S_RFFT    points to an instance of Q15 RFFT/RIFFT structure.
2351    * @param[in]     *S_CFFT    points to an instance of Q15 CFFT/CIFFT structure.
2352    * @param[in]     N          length of the DCT4.
2353    * @param[in]     Nby2       half of the length of the DCT4.
2354    * @param[in]     normalize  normalizing factor.
2355    * @return            arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2356    */
2357
2358   arm_status arm_dct4_init_q15(
2359   arm_dct4_instance_q15 * S,
2360   arm_rfft_instance_q15 * S_RFFT,
2361   arm_cfft_radix4_instance_q15 * S_CFFT,
2362   uint16_t N,
2363   uint16_t Nby2,
2364   q15_t normalize);
2365
2366   /**
2367    * @brief Processing function for the Q15 DCT4/IDCT4.
2368    * @param[in]       *S             points to an instance of the Q15 DCT4 structure.
2369    * @param[in]       *pState        points to state buffer.
2370    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2371    * @return none.
2372    */
2373
2374   void arm_dct4_q15(
2375   const arm_dct4_instance_q15 * S,
2376   q15_t * pState,
2377   q15_t * pInlineBuffer);
2378
2379   /**
2380    * @brief Floating-point vector addition.
2381    * @param[in]       *pSrcA points to the first input vector
2382    * @param[in]       *pSrcB points to the second input vector
2383    * @param[out]      *pDst points to the output vector
2384    * @param[in]       blockSize number of samples in each vector
2385    * @return none.
2386    */
2387
2388   void arm_add_f32(
2389   float32_t * pSrcA,
2390   float32_t * pSrcB,
2391   float32_t * pDst,
2392   uint32_t blockSize);
2393
2394   /**
2395    * @brief Q7 vector addition.
2396    * @param[in]       *pSrcA points to the first input vector
2397    * @param[in]       *pSrcB points to the second input vector
2398    * @param[out]      *pDst points to the output vector
2399    * @param[in]       blockSize number of samples in each vector
2400    * @return none.
2401    */
2402
2403   void arm_add_q7(
2404   q7_t * pSrcA,
2405   q7_t * pSrcB,
2406   q7_t * pDst,
2407   uint32_t blockSize);
2408
2409   /**
2410    * @brief Q15 vector addition.
2411    * @param[in]       *pSrcA points to the first input vector
2412    * @param[in]       *pSrcB points to the second input vector
2413    * @param[out]      *pDst points to the output vector
2414    * @param[in]       blockSize number of samples in each vector
2415    * @return none.
2416    */
2417
2418   void arm_add_q15(
2419   q15_t * pSrcA,
2420   q15_t * pSrcB,
2421   q15_t * pDst,
2422   uint32_t blockSize);
2423
2424   /**
2425    * @brief Q31 vector addition.
2426    * @param[in]       *pSrcA points to the first input vector
2427    * @param[in]       *pSrcB points to the second input vector
2428    * @param[out]      *pDst points to the output vector
2429    * @param[in]       blockSize number of samples in each vector
2430    * @return none.
2431    */
2432
2433   void arm_add_q31(
2434   q31_t * pSrcA,
2435   q31_t * pSrcB,
2436   q31_t * pDst,
2437   uint32_t blockSize);
2438
2439   /**
2440    * @brief Floating-point vector subtraction.
2441    * @param[in]       *pSrcA points to the first input vector
2442    * @param[in]       *pSrcB points to the second input vector
2443    * @param[out]      *pDst points to the output vector
2444    * @param[in]       blockSize number of samples in each vector
2445    * @return none.
2446    */
2447
2448   void arm_sub_f32(
2449   float32_t * pSrcA,
2450   float32_t * pSrcB,
2451   float32_t * pDst,
2452   uint32_t blockSize);
2453
2454   /**
2455    * @brief Q7 vector subtraction.
2456    * @param[in]       *pSrcA points to the first input vector
2457    * @param[in]       *pSrcB points to the second input vector
2458    * @param[out]      *pDst points to the output vector
2459    * @param[in]       blockSize number of samples in each vector
2460    * @return none.
2461    */
2462
2463   void arm_sub_q7(
2464   q7_t * pSrcA,
2465   q7_t * pSrcB,
2466   q7_t * pDst,
2467   uint32_t blockSize);
2468
2469   /**
2470    * @brief Q15 vector subtraction.
2471    * @param[in]       *pSrcA points to the first input vector
2472    * @param[in]       *pSrcB points to the second input vector
2473    * @param[out]      *pDst points to the output vector
2474    * @param[in]       blockSize number of samples in each vector
2475    * @return none.
2476    */
2477
2478   void arm_sub_q15(
2479   q15_t * pSrcA,
2480   q15_t * pSrcB,
2481   q15_t * pDst,
2482   uint32_t blockSize);
2483
2484   /**
2485    * @brief Q31 vector subtraction.
2486    * @param[in]       *pSrcA points to the first input vector
2487    * @param[in]       *pSrcB points to the second input vector
2488    * @param[out]      *pDst points to the output vector
2489    * @param[in]       blockSize number of samples in each vector
2490    * @return none.
2491    */
2492
2493   void arm_sub_q31(
2494   q31_t * pSrcA,
2495   q31_t * pSrcB,
2496   q31_t * pDst,
2497   uint32_t blockSize);
2498
2499   /**
2500    * @brief Multiplies a floating-point vector by a scalar.
2501    * @param[in]       *pSrc points to the input vector
2502    * @param[in]       scale scale factor to be applied
2503    * @param[out]      *pDst points to the output vector
2504    * @param[in]       blockSize number of samples in the vector
2505    * @return none.
2506    */
2507
2508   void arm_scale_f32(
2509   float32_t * pSrc,
2510   float32_t scale,
2511   float32_t * pDst,
2512   uint32_t blockSize);
2513
2514   /**
2515    * @brief Multiplies a Q7 vector by a scalar.
2516    * @param[in]       *pSrc points to the input vector
2517    * @param[in]       scaleFract fractional portion of the scale value
2518    * @param[in]       shift number of bits to shift the result by
2519    * @param[out]      *pDst points to the output vector
2520    * @param[in]       blockSize number of samples in the vector
2521    * @return none.
2522    */
2523
2524   void arm_scale_q7(
2525   q7_t * pSrc,
2526   q7_t scaleFract,
2527   int8_t shift,
2528   q7_t * pDst,
2529   uint32_t blockSize);
2530
2531   /**
2532    * @brief Multiplies a Q15 vector by a scalar.
2533    * @param[in]       *pSrc points to the input vector
2534    * @param[in]       scaleFract fractional portion of the scale value
2535    * @param[in]       shift number of bits to shift the result by
2536    * @param[out]      *pDst points to the output vector
2537    * @param[in]       blockSize number of samples in the vector
2538    * @return none.
2539    */
2540
2541   void arm_scale_q15(
2542   q15_t * pSrc,
2543   q15_t scaleFract,
2544   int8_t shift,
2545   q15_t * pDst,
2546   uint32_t blockSize);
2547
2548   /**
2549    * @brief Multiplies a Q31 vector by a scalar.
2550    * @param[in]       *pSrc points to the input vector
2551    * @param[in]       scaleFract fractional portion of the scale value
2552    * @param[in]       shift number of bits to shift the result by
2553    * @param[out]      *pDst points to the output vector
2554    * @param[in]       blockSize number of samples in the vector
2555    * @return none.
2556    */
2557
2558   void arm_scale_q31(
2559   q31_t * pSrc,
2560   q31_t scaleFract,
2561   int8_t shift,
2562   q31_t * pDst,
2563   uint32_t blockSize);
2564
2565   /**
2566    * @brief Q7 vector absolute value.
2567    * @param[in]       *pSrc points to the input buffer
2568    * @param[out]      *pDst points to the output buffer
2569    * @param[in]       blockSize number of samples in each vector
2570    * @return none.
2571    */
2572
2573   void arm_abs_q7(
2574   q7_t * pSrc,
2575   q7_t * pDst,
2576   uint32_t blockSize);
2577
2578   /**
2579    * @brief Floating-point vector absolute value.
2580    * @param[in]       *pSrc points to the input buffer
2581    * @param[out]      *pDst points to the output buffer
2582    * @param[in]       blockSize number of samples in each vector
2583    * @return none.
2584    */
2585
2586   void arm_abs_f32(
2587   float32_t * pSrc,
2588   float32_t * pDst,
2589   uint32_t blockSize);
2590
2591   /**
2592    * @brief Q15 vector absolute value.
2593    * @param[in]       *pSrc points to the input buffer
2594    * @param[out]      *pDst points to the output buffer
2595    * @param[in]       blockSize number of samples in each vector
2596    * @return none.
2597    */
2598
2599   void arm_abs_q15(
2600   q15_t * pSrc,
2601   q15_t * pDst,
2602   uint32_t blockSize);
2603
2604   /**
2605    * @brief Q31 vector absolute value.
2606    * @param[in]       *pSrc points to the input buffer
2607    * @param[out]      *pDst points to the output buffer
2608    * @param[in]       blockSize number of samples in each vector
2609    * @return none.
2610    */
2611
2612   void arm_abs_q31(
2613   q31_t * pSrc,
2614   q31_t * pDst,
2615   uint32_t blockSize);
2616
2617   /**
2618    * @brief Dot product of floating-point vectors.
2619    * @param[in]       *pSrcA points to the first input vector
2620    * @param[in]       *pSrcB points to the second input vector
2621    * @param[in]       blockSize number of samples in each vector
2622    * @param[out]      *result output result returned here
2623    * @return none.
2624    */
2625
2626   void arm_dot_prod_f32(
2627   float32_t * pSrcA,
2628   float32_t * pSrcB,
2629   uint32_t blockSize,
2630   float32_t * result);
2631
2632   /**
2633    * @brief Dot product of Q7 vectors.
2634    * @param[in]       *pSrcA points to the first input vector
2635    * @param[in]       *pSrcB points to the second input vector
2636    * @param[in]       blockSize number of samples in each vector
2637    * @param[out]      *result output result returned here
2638    * @return none.
2639    */
2640
2641   void arm_dot_prod_q7(
2642   q7_t * pSrcA,
2643   q7_t * pSrcB,
2644   uint32_t blockSize,
2645   q31_t * result);
2646
2647   /**
2648    * @brief Dot product of Q15 vectors.
2649    * @param[in]       *pSrcA points to the first input vector
2650    * @param[in]       *pSrcB points to the second input vector
2651    * @param[in]       blockSize number of samples in each vector
2652    * @param[out]      *result output result returned here
2653    * @return none.
2654    */
2655
2656   void arm_dot_prod_q15(
2657   q15_t * pSrcA,
2658   q15_t * pSrcB,
2659   uint32_t blockSize,
2660   q63_t * result);
2661
2662   /**
2663    * @brief Dot product of Q31 vectors.
2664    * @param[in]       *pSrcA points to the first input vector
2665    * @param[in]       *pSrcB points to the second input vector
2666    * @param[in]       blockSize number of samples in each vector
2667    * @param[out]      *result output result returned here
2668    * @return none.
2669    */
2670
2671   void arm_dot_prod_q31(
2672   q31_t * pSrcA,
2673   q31_t * pSrcB,
2674   uint32_t blockSize,
2675   q63_t * result);
2676
2677   /**
2678    * @brief  Shifts the elements of a Q7 vector a specified number of bits.
2679    * @param[in]  *pSrc points to the input vector
2680    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2681    * @param[out]  *pDst points to the output vector
2682    * @param[in]  blockSize number of samples in the vector
2683    * @return none.
2684    */
2685
2686   void arm_shift_q7(
2687   q7_t * pSrc,
2688   int8_t shiftBits,
2689   q7_t * pDst,
2690   uint32_t blockSize);
2691
2692   /**
2693    * @brief  Shifts the elements of a Q15 vector a specified number of bits.
2694    * @param[in]  *pSrc points to the input vector
2695    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2696    * @param[out]  *pDst points to the output vector
2697    * @param[in]  blockSize number of samples in the vector
2698    * @return none.
2699    */
2700
2701   void arm_shift_q15(
2702   q15_t * pSrc,
2703   int8_t shiftBits,
2704   q15_t * pDst,
2705   uint32_t blockSize);
2706
2707   /**
2708    * @brief  Shifts the elements of a Q31 vector a specified number of bits.
2709    * @param[in]  *pSrc points to the input vector
2710    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2711    * @param[out]  *pDst points to the output vector
2712    * @param[in]  blockSize number of samples in the vector
2713    * @return none.
2714    */
2715
2716   void arm_shift_q31(
2717   q31_t * pSrc,
2718   int8_t shiftBits,
2719   q31_t * pDst,
2720   uint32_t blockSize);
2721
2722   /**
2723    * @brief  Adds a constant offset to a floating-point vector.
2724    * @param[in]  *pSrc points to the input vector
2725    * @param[in]  offset is the offset to be added
2726    * @param[out]  *pDst points to the output vector
2727    * @param[in]  blockSize number of samples in the vector
2728    * @return none.
2729    */
2730
2731   void arm_offset_f32(
2732   float32_t * pSrc,
2733   float32_t offset,
2734   float32_t * pDst,
2735   uint32_t blockSize);
2736
2737   /**
2738    * @brief  Adds a constant offset to a Q7 vector.
2739    * @param[in]  *pSrc points to the input vector
2740    * @param[in]  offset is the offset to be added
2741    * @param[out]  *pDst points to the output vector
2742    * @param[in]  blockSize number of samples in the vector
2743    * @return none.
2744    */
2745
2746   void arm_offset_q7(
2747   q7_t * pSrc,
2748   q7_t offset,
2749   q7_t * pDst,
2750   uint32_t blockSize);
2751
2752   /**
2753    * @brief  Adds a constant offset to a Q15 vector.
2754    * @param[in]  *pSrc points to the input vector
2755    * @param[in]  offset is the offset to be added
2756    * @param[out]  *pDst points to the output vector
2757    * @param[in]  blockSize number of samples in the vector
2758    * @return none.
2759    */
2760
2761   void arm_offset_q15(
2762   q15_t * pSrc,
2763   q15_t offset,
2764   q15_t * pDst,
2765   uint32_t blockSize);
2766
2767   /**
2768    * @brief  Adds a constant offset to a Q31 vector.
2769    * @param[in]  *pSrc points to the input vector
2770    * @param[in]  offset is the offset to be added
2771    * @param[out]  *pDst points to the output vector
2772    * @param[in]  blockSize number of samples in the vector
2773    * @return none.
2774    */
2775
2776   void arm_offset_q31(
2777   q31_t * pSrc,
2778   q31_t offset,
2779   q31_t * pDst,
2780   uint32_t blockSize);
2781
2782   /**
2783    * @brief  Negates the elements of a floating-point vector.
2784    * @param[in]  *pSrc points to the input vector
2785    * @param[out]  *pDst points to the output vector
2786    * @param[in]  blockSize number of samples in the vector
2787    * @return none.
2788    */
2789
2790   void arm_negate_f32(
2791   float32_t * pSrc,
2792   float32_t * pDst,
2793   uint32_t blockSize);
2794
2795   /**
2796    * @brief  Negates the elements of a Q7 vector.
2797    * @param[in]  *pSrc points to the input vector
2798    * @param[out]  *pDst points to the output vector
2799    * @param[in]  blockSize number of samples in the vector
2800    * @return none.
2801    */
2802
2803   void arm_negate_q7(
2804   q7_t * pSrc,
2805   q7_t * pDst,
2806   uint32_t blockSize);
2807
2808   /**
2809    * @brief  Negates the elements of a Q15 vector.
2810    * @param[in]  *pSrc points to the input vector
2811    * @param[out]  *pDst points to the output vector
2812    * @param[in]  blockSize number of samples in the vector
2813    * @return none.
2814    */
2815
2816   void arm_negate_q15(
2817   q15_t * pSrc,
2818   q15_t * pDst,
2819   uint32_t blockSize);
2820
2821   /**
2822    * @brief  Negates the elements of a Q31 vector.
2823    * @param[in]  *pSrc points to the input vector
2824    * @param[out]  *pDst points to the output vector
2825    * @param[in]  blockSize number of samples in the vector
2826    * @return none.
2827    */
2828
2829   void arm_negate_q31(
2830   q31_t * pSrc,
2831   q31_t * pDst,
2832   uint32_t blockSize);
2833   /**
2834    * @brief  Copies the elements of a floating-point vector.
2835    * @param[in]  *pSrc input pointer
2836    * @param[out]  *pDst output pointer
2837    * @param[in]  blockSize number of samples to process
2838    * @return none.
2839    */
2840   void arm_copy_f32(
2841   float32_t * pSrc,
2842   float32_t * pDst,
2843   uint32_t blockSize);
2844
2845   /**
2846    * @brief  Copies the elements of a Q7 vector.
2847    * @param[in]  *pSrc input pointer
2848    * @param[out]  *pDst output pointer
2849    * @param[in]  blockSize number of samples to process
2850    * @return none.
2851    */
2852   void arm_copy_q7(
2853   q7_t * pSrc,
2854   q7_t * pDst,
2855   uint32_t blockSize);
2856
2857   /**
2858    * @brief  Copies the elements of a Q15 vector.
2859    * @param[in]  *pSrc input pointer
2860    * @param[out]  *pDst output pointer
2861    * @param[in]  blockSize number of samples to process
2862    * @return none.
2863    */
2864   void arm_copy_q15(
2865   q15_t * pSrc,
2866   q15_t * pDst,
2867   uint32_t blockSize);
2868
2869   /**
2870    * @brief  Copies the elements of a Q31 vector.
2871    * @param[in]  *pSrc input pointer
2872    * @param[out]  *pDst output pointer
2873    * @param[in]  blockSize number of samples to process
2874    * @return none.
2875    */
2876   void arm_copy_q31(
2877   q31_t * pSrc,
2878   q31_t * pDst,
2879   uint32_t blockSize);
2880   /**
2881    * @brief  Fills a constant value into a floating-point vector.
2882    * @param[in]  value input value to be filled
2883    * @param[out]  *pDst output pointer
2884    * @param[in]  blockSize number of samples to process
2885    * @return none.
2886    */
2887   void arm_fill_f32(
2888   float32_t value,
2889   float32_t * pDst,
2890   uint32_t blockSize);
2891
2892   /**
2893    * @brief  Fills a constant value into a Q7 vector.
2894    * @param[in]  value input value to be filled
2895    * @param[out]  *pDst output pointer
2896    * @param[in]  blockSize number of samples to process
2897    * @return none.
2898    */
2899   void arm_fill_q7(
2900   q7_t value,
2901   q7_t * pDst,
2902   uint32_t blockSize);
2903
2904   /**
2905    * @brief  Fills a constant value into a Q15 vector.
2906    * @param[in]  value input value to be filled
2907    * @param[out]  *pDst output pointer
2908    * @param[in]  blockSize number of samples to process
2909    * @return none.
2910    */
2911   void arm_fill_q15(
2912   q15_t value,
2913   q15_t * pDst,
2914   uint32_t blockSize);
2915
2916   /**
2917    * @brief  Fills a constant value into a Q31 vector.
2918    * @param[in]  value input value to be filled
2919    * @param[out]  *pDst output pointer
2920    * @param[in]  blockSize number of samples to process
2921    * @return none.
2922    */
2923   void arm_fill_q31(
2924   q31_t value,
2925   q31_t * pDst,
2926   uint32_t blockSize);
2927
2928 /**
2929  * @brief Convolution of floating-point sequences.
2930  * @param[in] *pSrcA points to the first input sequence.
2931  * @param[in] srcALen length of the first input sequence.
2932  * @param[in] *pSrcB points to the second input sequence.
2933  * @param[in] srcBLen length of the second input sequence.
2934  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
2935  * @return none.
2936  */
2937
2938   void arm_conv_f32(
2939   float32_t * pSrcA,
2940   uint32_t srcALen,
2941   float32_t * pSrcB,
2942   uint32_t srcBLen,
2943   float32_t * pDst);
2944
2945
2946   /**
2947    * @brief Convolution of Q15 sequences.
2948    * @param[in] *pSrcA points to the first input sequence.
2949    * @param[in] srcALen length of the first input sequence.
2950    * @param[in] *pSrcB points to the second input sequence.
2951    * @param[in] srcBLen length of the second input sequence.
2952    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
2953    * @param[in]  *pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
2954    * @param[in]  *pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
2955    * @return none.
2956    */
2957
2958
2959   void arm_conv_opt_q15(
2960   q15_t * pSrcA,
2961   uint32_t srcALen,
2962   q15_t * pSrcB,
2963   uint32_t srcBLen,
2964   q15_t * pDst,
2965   q15_t * pScratch1,
2966   q15_t * pScratch2);
2967
2968
2969 /**
2970  * @brief Convolution of Q15 sequences.
2971  * @param[in] *pSrcA points to the first input sequence.
2972  * @param[in] srcALen length of the first input sequence.
2973  * @param[in] *pSrcB points to the second input sequence.
2974  * @param[in] srcBLen length of the second input sequence.
2975  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
2976  * @return none.
2977  */
2978
2979   void arm_conv_q15(
2980   q15_t * pSrcA,
2981   uint32_t srcALen,
2982   q15_t * pSrcB,
2983   uint32_t srcBLen,
2984   q15_t * pDst);
2985
2986   /**
2987    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
2988    * @param[in] *pSrcA points to the first input sequence.
2989    * @param[in] srcALen length of the first input sequence.
2990    * @param[in] *pSrcB points to the second input sequence.
2991    * @param[in] srcBLen length of the second input sequence.
2992    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
2993    * @return none.
2994    */
2995
2996   void arm_conv_fast_q15(
2997                           q15_t * pSrcA,
2998                          uint32_t srcALen,
2999                           q15_t * pSrcB,
3000                          uint32_t srcBLen,
3001                          q15_t * pDst);
3002
3003   /**
3004    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3005    * @param[in] *pSrcA points to the first input sequence.
3006    * @param[in] srcALen length of the first input sequence.
3007    * @param[in] *pSrcB points to the second input sequence.
3008    * @param[in] srcBLen length of the second input sequence.
3009    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3010    * @param[in]  *pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3011    * @param[in]  *pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3012    * @return none.
3013    */
3014
3015   void arm_conv_fast_opt_q15(
3016   q15_t * pSrcA,
3017   uint32_t srcALen,
3018   q15_t * pSrcB,
3019   uint32_t srcBLen,
3020   q15_t * pDst,
3021   q15_t * pScratch1,
3022   q15_t * pScratch2);
3023
3024
3025
3026   /**
3027    * @brief Convolution of Q31 sequences.
3028    * @param[in] *pSrcA points to the first input sequence.
3029    * @param[in] srcALen length of the first input sequence.
3030    * @param[in] *pSrcB points to the second input sequence.
3031    * @param[in] srcBLen length of the second input sequence.
3032    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3033    * @return none.
3034    */
3035
3036   void arm_conv_q31(
3037   q31_t * pSrcA,
3038   uint32_t srcALen,
3039   q31_t * pSrcB,
3040   uint32_t srcBLen,
3041   q31_t * pDst);
3042
3043   /**
3044    * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3045    * @param[in] *pSrcA points to the first input sequence.
3046    * @param[in] srcALen length of the first input sequence.
3047    * @param[in] *pSrcB points to the second input sequence.
3048    * @param[in] srcBLen length of the second input sequence.
3049    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3050    * @return none.
3051    */
3052
3053   void arm_conv_fast_q31(
3054   q31_t * pSrcA,
3055   uint32_t srcALen,
3056   q31_t * pSrcB,
3057   uint32_t srcBLen,
3058   q31_t * pDst);
3059
3060
3061     /**
3062    * @brief Convolution of Q7 sequences.
3063    * @param[in] *pSrcA points to the first input sequence.
3064    * @param[in] srcALen length of the first input sequence.
3065    * @param[in] *pSrcB points to the second input sequence.
3066    * @param[in] srcBLen length of the second input sequence.
3067    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3068    * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3069    * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
3070    * @return none.
3071    */
3072
3073   void arm_conv_opt_q7(
3074   q7_t * pSrcA,
3075   uint32_t srcALen,
3076   q7_t * pSrcB,
3077   uint32_t srcBLen,
3078   q7_t * pDst,
3079   q15_t * pScratch1,
3080   q15_t * pScratch2);
3081
3082
3083
3084   /**
3085    * @brief Convolution of Q7 sequences.
3086    * @param[in] *pSrcA points to the first input sequence.
3087    * @param[in] srcALen length of the first input sequence.
3088    * @param[in] *pSrcB points to the second input sequence.
3089    * @param[in] srcBLen length of the second input sequence.
3090    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3091    * @return none.
3092    */
3093
3094   void arm_conv_q7(
3095   q7_t * pSrcA,
3096   uint32_t srcALen,
3097   q7_t * pSrcB,
3098   uint32_t srcBLen,
3099   q7_t * pDst);
3100
3101
3102   /**
3103    * @brief Partial convolution of floating-point sequences.
3104    * @param[in]       *pSrcA points to the first input sequence.
3105    * @param[in]       srcALen length of the first input sequence.
3106    * @param[in]       *pSrcB points to the second input sequence.
3107    * @param[in]       srcBLen length of the second input sequence.
3108    * @param[out]      *pDst points to the block of output data
3109    * @param[in]       firstIndex is the first output sample to start with.
3110    * @param[in]       numPoints is the number of output points to be computed.
3111    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3112    */
3113
3114   arm_status arm_conv_partial_f32(
3115   float32_t * pSrcA,
3116   uint32_t srcALen,
3117   float32_t * pSrcB,
3118   uint32_t srcBLen,
3119   float32_t * pDst,
3120   uint32_t firstIndex,
3121   uint32_t numPoints);
3122
3123     /**
3124    * @brief Partial convolution of Q15 sequences.
3125    * @param[in]       *pSrcA points to the first input sequence.
3126    * @param[in]       srcALen length of the first input sequence.
3127    * @param[in]       *pSrcB points to the second input sequence.
3128    * @param[in]       srcBLen length of the second input sequence.
3129    * @param[out]      *pDst points to the block of output data
3130    * @param[in]       firstIndex is the first output sample to start with.
3131    * @param[in]       numPoints is the number of output points to be computed.
3132    * @param[in]       * pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3133    * @param[in]       * pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3134    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3135    */
3136
3137   arm_status arm_conv_partial_opt_q15(
3138   q15_t * pSrcA,
3139   uint32_t srcALen,
3140   q15_t * pSrcB,
3141   uint32_t srcBLen,
3142   q15_t * pDst,
3143   uint32_t firstIndex,
3144   uint32_t numPoints,
3145   q15_t * pScratch1,
3146   q15_t * pScratch2);
3147
3148
3149 /**
3150    * @brief Partial convolution of Q15 sequences.
3151    * @param[in]       *pSrcA points to the first input sequence.
3152    * @param[in]       srcALen length of the first input sequence.
3153    * @param[in]       *pSrcB points to the second input sequence.
3154    * @param[in]       srcBLen length of the second input sequence.
3155    * @param[out]      *pDst points to the block of output data
3156    * @param[in]       firstIndex is the first output sample to start with.
3157    * @param[in]       numPoints is the number of output points to be computed.
3158    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3159    */
3160
3161   arm_status arm_conv_partial_q15(
3162   q15_t * pSrcA,
3163   uint32_t srcALen,
3164   q15_t * pSrcB,
3165   uint32_t srcBLen,
3166   q15_t * pDst,
3167   uint32_t firstIndex,
3168   uint32_t numPoints);
3169
3170   /**
3171    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3172    * @param[in]       *pSrcA points to the first input sequence.
3173    * @param[in]       srcALen length of the first input sequence.
3174    * @param[in]       *pSrcB points to the second input sequence.
3175    * @param[in]       srcBLen length of the second input sequence.
3176    * @param[out]      *pDst points to the block of output data
3177    * @param[in]       firstIndex is the first output sample to start with.
3178    * @param[in]       numPoints is the number of output points to be computed.
3179    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3180    */
3181
3182   arm_status arm_conv_partial_fast_q15(
3183                                         q15_t * pSrcA,
3184                                        uint32_t srcALen,
3185                                         q15_t * pSrcB,
3186                                        uint32_t srcBLen,
3187                                        q15_t * pDst,
3188                                        uint32_t firstIndex,
3189                                        uint32_t numPoints);
3190
3191
3192   /**
3193    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3194    * @param[in]       *pSrcA points to the first input sequence.
3195    * @param[in]       srcALen length of the first input sequence.
3196    * @param[in]       *pSrcB points to the second input sequence.
3197    * @param[in]       srcBLen length of the second input sequence.
3198    * @param[out]      *pDst points to the block of output data
3199    * @param[in]       firstIndex is the first output sample to start with.
3200    * @param[in]       numPoints is the number of output points to be computed.
3201    * @param[in]       * pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3202    * @param[in]       * pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3203    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3204    */
3205
3206   arm_status arm_conv_partial_fast_opt_q15(
3207   q15_t * pSrcA,
3208   uint32_t srcALen,
3209   q15_t * pSrcB,
3210   uint32_t srcBLen,
3211   q15_t * pDst,
3212   uint32_t firstIndex,
3213   uint32_t numPoints,
3214   q15_t * pScratch1,
3215   q15_t * pScratch2);
3216
3217
3218   /**
3219    * @brief Partial convolution of Q31 sequences.
3220    * @param[in]       *pSrcA points to the first input sequence.
3221    * @param[in]       srcALen length of the first input sequence.
3222    * @param[in]       *pSrcB points to the second input sequence.
3223    * @param[in]       srcBLen length of the second input sequence.
3224    * @param[out]      *pDst points to the block of output data
3225    * @param[in]       firstIndex is the first output sample to start with.
3226    * @param[in]       numPoints is the number of output points to be computed.
3227    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3228    */
3229
3230   arm_status arm_conv_partial_q31(
3231   q31_t * pSrcA,
3232   uint32_t srcALen,
3233   q31_t * pSrcB,
3234   uint32_t srcBLen,
3235   q31_t * pDst,
3236   uint32_t firstIndex,
3237   uint32_t numPoints);
3238
3239
3240   /**
3241    * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3242    * @param[in]       *pSrcA points to the first input sequence.
3243    * @param[in]       srcALen length of the first input sequence.
3244    * @param[in]       *pSrcB points to the second input sequence.
3245    * @param[in]       srcBLen length of the second input sequence.
3246    * @param[out]      *pDst points to the block of output data
3247    * @param[in]       firstIndex is the first output sample to start with.
3248    * @param[in]       numPoints is the number of output points to be computed.
3249    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3250    */
3251
3252   arm_status arm_conv_partial_fast_q31(
3253   q31_t * pSrcA,
3254   uint32_t srcALen,
3255   q31_t * pSrcB,
3256   uint32_t srcBLen,
3257   q31_t * pDst,
3258   uint32_t firstIndex,
3259   uint32_t numPoints);
3260
3261
3262   /**
3263    * @brief Partial convolution of Q7 sequences
3264    * @param[in]       *pSrcA points to the first input sequence.
3265    * @param[in]       srcALen length of the first input sequence.
3266    * @param[in]       *pSrcB points to the second input sequence.
3267    * @param[in]       srcBLen length of the second input sequence.
3268    * @param[out]      *pDst points to the block of output data
3269    * @param[in]       firstIndex is the first output sample to start with.
3270    * @param[in]       numPoints is the number of output points to be computed.
3271    * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3272    * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
3273    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3274    */
3275
3276   arm_status arm_conv_partial_opt_q7(
3277   q7_t * pSrcA,
3278   uint32_t srcALen,
3279   q7_t * pSrcB,
3280   uint32_t srcBLen,
3281   q7_t * pDst,
3282   uint32_t firstIndex,
3283   uint32_t numPoints,
3284   q15_t * pScratch1,
3285   q15_t * pScratch2);
3286
3287
3288 /**
3289    * @brief Partial convolution of Q7 sequences.
3290    * @param[in]       *pSrcA points to the first input sequence.
3291    * @param[in]       srcALen length of the first input sequence.
3292    * @param[in]       *pSrcB points to the second input sequence.
3293    * @param[in]       srcBLen length of the second input sequence.
3294    * @param[out]      *pDst points to the block of output data
3295    * @param[in]       firstIndex is the first output sample to start with.
3296    * @param[in]       numPoints is the number of output points to be computed.
3297    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3298    */
3299
3300   arm_status arm_conv_partial_q7(
3301   q7_t * pSrcA,
3302   uint32_t srcALen,
3303   q7_t * pSrcB,
3304   uint32_t srcBLen,
3305   q7_t * pDst,
3306   uint32_t firstIndex,
3307   uint32_t numPoints);
3308
3309
3310
3311   /**
3312    * @brief Instance structure for the Q15 FIR decimator.
3313    */
3314
3315   typedef struct
3316   {
3317     uint8_t M;                      /**< decimation factor. */
3318     uint16_t numTaps;               /**< number of coefficients in the filter. */
3319     q15_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
3320     q15_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3321   } arm_fir_decimate_instance_q15;
3322
3323   /**
3324    * @brief Instance structure for the Q31 FIR decimator.
3325    */
3326
3327   typedef struct
3328   {
3329     uint8_t M;                  /**< decimation factor. */
3330     uint16_t numTaps;           /**< number of coefficients in the filter. */
3331     q31_t *pCoeffs;              /**< points to the coefficient array. The array is of length numTaps.*/
3332     q31_t *pState;               /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3333
3334   } arm_fir_decimate_instance_q31;
3335
3336   /**
3337    * @brief Instance structure for the floating-point FIR decimator.
3338    */
3339
3340   typedef struct
3341   {
3342     uint8_t M;                          /**< decimation factor. */
3343     uint16_t numTaps;                   /**< number of coefficients in the filter. */
3344     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
3345     float32_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3346
3347   } arm_fir_decimate_instance_f32;
3348
3349
3350
3351   /**
3352    * @brief Processing function for the floating-point FIR decimator.
3353    * @param[in] *S points to an instance of the floating-point FIR decimator structure.
3354    * @param[in] *pSrc points to the block of input data.
3355    * @param[out] *pDst points to the block of output data
3356    * @param[in] blockSize number of input samples to process per call.
3357    * @return none
3358    */
3359
3360   void arm_fir_decimate_f32(
3361   const arm_fir_decimate_instance_f32 * S,
3362   float32_t * pSrc,
3363   float32_t * pDst,
3364   uint32_t blockSize);
3365
3366
3367   /**
3368    * @brief  Initialization function for the floating-point FIR decimator.
3369    * @param[in,out] *S points to an instance of the floating-point FIR decimator structure.
3370    * @param[in] numTaps  number of coefficients in the filter.
3371    * @param[in] M  decimation factor.
3372    * @param[in] *pCoeffs points to the filter coefficients.
3373    * @param[in] *pState points to the state buffer.
3374    * @param[in] blockSize number of input samples to process per call.
3375    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3376    * <code>blockSize</code> is not a multiple of <code>M</code>.
3377    */
3378
3379   arm_status arm_fir_decimate_init_f32(
3380   arm_fir_decimate_instance_f32 * S,
3381   uint16_t numTaps,
3382   uint8_t M,
3383   float32_t * pCoeffs,
3384   float32_t * pState,
3385   uint32_t blockSize);
3386
3387   /**
3388    * @brief Processing function for the Q15 FIR decimator.
3389    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
3390    * @param[in] *pSrc points to the block of input data.
3391    * @param[out] *pDst points to the block of output data
3392    * @param[in] blockSize number of input samples to process per call.
3393    * @return none
3394    */
3395
3396   void arm_fir_decimate_q15(
3397   const arm_fir_decimate_instance_q15 * S,
3398   q15_t * pSrc,
3399   q15_t * pDst,
3400   uint32_t blockSize);
3401
3402   /**
3403    * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3404    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
3405    * @param[in] *pSrc points to the block of input data.
3406    * @param[out] *pDst points to the block of output data
3407    * @param[in] blockSize number of input samples to process per call.
3408    * @return none
3409    */
3410
3411   void arm_fir_decimate_fast_q15(
3412   const arm_fir_decimate_instance_q15 * S,
3413   q15_t * pSrc,
3414   q15_t * pDst,
3415   uint32_t blockSize);
3416
3417
3418
3419   /**
3420    * @brief  Initialization function for the Q15 FIR decimator.
3421    * @param[in,out] *S points to an instance of the Q15 FIR decimator structure.
3422    * @param[in] numTaps  number of coefficients in the filter.
3423    * @param[in] M  decimation factor.
3424    * @param[in] *pCoeffs points to the filter coefficients.
3425    * @param[in] *pState points to the state buffer.
3426    * @param[in] blockSize number of input samples to process per call.
3427    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3428    * <code>blockSize</code> is not a multiple of <code>M</code>.
3429    */
3430
3431   arm_status arm_fir_decimate_init_q15(
3432   arm_fir_decimate_instance_q15 * S,
3433   uint16_t numTaps,
3434   uint8_t M,
3435   q15_t * pCoeffs,
3436   q15_t * pState,
3437   uint32_t blockSize);
3438
3439   /**
3440    * @brief Processing function for the Q31 FIR decimator.
3441    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
3442    * @param[in] *pSrc points to the block of input data.
3443    * @param[out] *pDst points to the block of output data
3444    * @param[in] blockSize number of input samples to process per call.
3445    * @return none
3446    */
3447
3448   void arm_fir_decimate_q31(
3449   const arm_fir_decimate_instance_q31 * S,
3450   q31_t * pSrc,
3451   q31_t * pDst,
3452   uint32_t blockSize);
3453
3454   /**
3455    * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3456    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
3457    * @param[in] *pSrc points to the block of input data.
3458    * @param[out] *pDst points to the block of output data
3459    * @param[in] blockSize number of input samples to process per call.
3460    * @return none
3461    */
3462
3463   void arm_fir_decimate_fast_q31(
3464   arm_fir_decimate_instance_q31 * S,
3465   q31_t * pSrc,
3466   q31_t * pDst,
3467   uint32_t blockSize);
3468
3469
3470   /**
3471    * @brief  Initialization function for the Q31 FIR decimator.
3472    * @param[in,out] *S points to an instance of the Q31 FIR decimator structure.
3473    * @param[in] numTaps  number of coefficients in the filter.
3474    * @param[in] M  decimation factor.
3475    * @param[in] *pCoeffs points to the filter coefficients.
3476    * @param[in] *pState points to the state buffer.
3477    * @param[in] blockSize number of input samples to process per call.
3478    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3479    * <code>blockSize</code> is not a multiple of <code>M</code>.
3480    */
3481
3482   arm_status arm_fir_decimate_init_q31(
3483   arm_fir_decimate_instance_q31 * S,
3484   uint16_t numTaps,
3485   uint8_t M,
3486   q31_t * pCoeffs,
3487   q31_t * pState,
3488   uint32_t blockSize);
3489
3490
3491
3492   /**
3493    * @brief Instance structure for the Q15 FIR interpolator.
3494    */
3495
3496   typedef struct
3497   {
3498     uint8_t L;                      /**< upsample factor. */
3499     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3500     q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
3501     q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3502   } arm_fir_interpolate_instance_q15;
3503
3504   /**
3505    * @brief Instance structure for the Q31 FIR interpolator.
3506    */
3507
3508   typedef struct
3509   {
3510     uint8_t L;                      /**< upsample factor. */
3511     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3512     q31_t *pCoeffs;                  /**< points to the coefficient array. The array is of length L*phaseLength. */
3513     q31_t *pState;                   /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3514   } arm_fir_interpolate_instance_q31;
3515
3516   /**
3517    * @brief Instance structure for the floating-point FIR interpolator.
3518    */
3519
3520   typedef struct
3521   {
3522     uint8_t L;                     /**< upsample factor. */
3523     uint16_t phaseLength;          /**< length of each polyphase filter component. */
3524     float32_t *pCoeffs;             /**< points to the coefficient array. The array is of length L*phaseLength. */
3525     float32_t *pState;              /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
3526   } arm_fir_interpolate_instance_f32;
3527
3528
3529   /**
3530    * @brief Processing function for the Q15 FIR interpolator.
3531    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
3532    * @param[in] *pSrc     points to the block of input data.
3533    * @param[out] *pDst    points to the block of output data.
3534    * @param[in] blockSize number of input samples to process per call.
3535    * @return none.
3536    */
3537
3538   void arm_fir_interpolate_q15(
3539   const arm_fir_interpolate_instance_q15 * S,
3540   q15_t * pSrc,
3541   q15_t * pDst,
3542   uint32_t blockSize);
3543
3544
3545   /**
3546    * @brief  Initialization function for the Q15 FIR interpolator.
3547    * @param[in,out] *S        points to an instance of the Q15 FIR interpolator structure.
3548    * @param[in]     L         upsample factor.
3549    * @param[in]     numTaps   number of filter coefficients in the filter.
3550    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3551    * @param[in]     *pState   points to the state buffer.
3552    * @param[in]     blockSize number of input samples to process per call.
3553    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3554    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3555    */
3556
3557   arm_status arm_fir_interpolate_init_q15(
3558   arm_fir_interpolate_instance_q15 * S,
3559   uint8_t L,
3560   uint16_t numTaps,
3561   q15_t * pCoeffs,
3562   q15_t * pState,
3563   uint32_t blockSize);
3564
3565   /**
3566    * @brief Processing function for the Q31 FIR interpolator.
3567    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
3568    * @param[in] *pSrc     points to the block of input data.
3569    * @param[out] *pDst    points to the block of output data.
3570    * @param[in] blockSize number of input samples to process per call.
3571    * @return none.
3572    */
3573
3574   void arm_fir_interpolate_q31(
3575   const arm_fir_interpolate_instance_q31 * S,
3576   q31_t * pSrc,
3577   q31_t * pDst,
3578   uint32_t blockSize);
3579
3580   /**
3581    * @brief  Initialization function for the Q31 FIR interpolator.
3582    * @param[in,out] *S        points to an instance of the Q31 FIR interpolator structure.
3583    * @param[in]     L         upsample factor.
3584    * @param[in]     numTaps   number of filter coefficients in the filter.
3585    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3586    * @param[in]     *pState   points to the state buffer.
3587    * @param[in]     blockSize number of input samples to process per call.
3588    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3589    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3590    */
3591
3592   arm_status arm_fir_interpolate_init_q31(
3593   arm_fir_interpolate_instance_q31 * S,
3594   uint8_t L,
3595   uint16_t numTaps,
3596   q31_t * pCoeffs,
3597   q31_t * pState,
3598   uint32_t blockSize);
3599
3600
3601   /**
3602    * @brief Processing function for the floating-point FIR interpolator.
3603    * @param[in] *S        points to an instance of the floating-point FIR interpolator structure.
3604    * @param[in] *pSrc     points to the block of input data.
3605    * @param[out] *pDst    points to the block of output data.
3606    * @param[in] blockSize number of input samples to process per call.
3607    * @return none.
3608    */
3609
3610   void arm_fir_interpolate_f32(
3611   const arm_fir_interpolate_instance_f32 * S,
3612   float32_t * pSrc,
3613   float32_t * pDst,
3614   uint32_t blockSize);
3615
3616   /**
3617    * @brief  Initialization function for the floating-point FIR interpolator.
3618    * @param[in,out] *S        points to an instance of the floating-point FIR interpolator structure.
3619    * @param[in]     L         upsample factor.
3620    * @param[in]     numTaps   number of filter coefficients in the filter.
3621    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3622    * @param[in]     *pState   points to the state buffer.
3623    * @param[in]     blockSize number of input samples to process per call.
3624    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3625    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3626    */
3627
3628   arm_status arm_fir_interpolate_init_f32(
3629   arm_fir_interpolate_instance_f32 * S,
3630   uint8_t L,
3631   uint16_t numTaps,
3632   float32_t * pCoeffs,
3633   float32_t * pState,
3634   uint32_t blockSize);
3635
3636   /**
3637    * @brief Instance structure for the high precision Q31 Biquad cascade filter.
3638    */
3639
3640   typedef struct
3641   {
3642     uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3643     q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
3644     q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
3645     uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
3646
3647   } arm_biquad_cas_df1_32x64_ins_q31;
3648
3649
3650   /**
3651    * @param[in]  *S        points to an instance of the high precision Q31 Biquad cascade filter structure.
3652    * @param[in]  *pSrc     points to the block of input data.
3653    * @param[out] *pDst     points to the block of output data
3654    * @param[in]  blockSize number of samples to process.
3655    * @return none.
3656    */
3657
3658   void arm_biquad_cas_df1_32x64_q31(
3659   const arm_biquad_cas_df1_32x64_ins_q31 * S,
3660   q31_t * pSrc,
3661   q31_t * pDst,
3662   uint32_t blockSize);
3663
3664
3665   /**
3666    * @param[in,out] *S           points to an instance of the high precision Q31 Biquad cascade filter structure.
3667    * @param[in]     numStages    number of 2nd order stages in the filter.
3668    * @param[in]     *pCoeffs     points to the filter coefficients.
3669    * @param[in]     *pState      points to the state buffer.
3670    * @param[in]     postShift    shift to be applied to the output. Varies according to the coefficients format
3671    * @return        none
3672    */
3673
3674   void arm_biquad_cas_df1_32x64_init_q31(
3675   arm_biquad_cas_df1_32x64_ins_q31 * S,
3676   uint8_t numStages,
3677   q31_t * pCoeffs,
3678   q63_t * pState,
3679   uint8_t postShift);
3680
3681
3682
3683   /**
3684    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3685    */
3686
3687   typedef struct
3688   {
3689     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3690     float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
3691     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3692   } arm_biquad_cascade_df2T_instance_f32;
3693
3694
3695   /**
3696    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
3697    * @param[in]  *S        points to an instance of the filter data structure.
3698    * @param[in]  *pSrc     points to the block of input data.
3699    * @param[out] *pDst     points to the block of output data
3700    * @param[in]  blockSize number of samples to process.
3701    * @return none.
3702    */
3703
3704   void arm_biquad_cascade_df2T_f32(
3705   const arm_biquad_cascade_df2T_instance_f32 * S,
3706   float32_t * pSrc,
3707   float32_t * pDst,
3708   uint32_t blockSize);
3709
3710
3711   /**
3712    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3713    * @param[in,out] *S           points to an instance of the filter data structure.
3714    * @param[in]     numStages    number of 2nd order stages in the filter.
3715    * @param[in]     *pCoeffs     points to the filter coefficients.
3716    * @param[in]     *pState      points to the state buffer.
3717    * @return        none
3718    */
3719
3720   void arm_biquad_cascade_df2T_init_f32(
3721   arm_biquad_cascade_df2T_instance_f32 * S,
3722   uint8_t numStages,
3723   float32_t * pCoeffs,
3724   float32_t * pState);
3725
3726
3727
3728   /**
3729    * @brief Instance structure for the Q15 FIR lattice filter.
3730    */
3731
3732   typedef struct
3733   {
3734     uint16_t numStages;                          /**< number of filter stages. */
3735     q15_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
3736     q15_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
3737   } arm_fir_lattice_instance_q15;
3738
3739   /**
3740    * @brief Instance structure for the Q31 FIR lattice filter.
3741    */
3742
3743   typedef struct
3744   {
3745     uint16_t numStages;                          /**< number of filter stages. */
3746     q31_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
3747     q31_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
3748   } arm_fir_lattice_instance_q31;
3749
3750   /**
3751    * @brief Instance structure for the floating-point FIR lattice filter.
3752    */
3753
3754   typedef struct
3755   {
3756     uint16_t numStages;                  /**< number of filter stages. */
3757     float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
3758     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
3759   } arm_fir_lattice_instance_f32;
3760
3761   /**
3762    * @brief Initialization function for the Q15 FIR lattice filter.
3763    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
3764    * @param[in] numStages  number of filter stages.
3765    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3766    * @param[in] *pState points to the state buffer.  The array is of length numStages.
3767    * @return none.
3768    */
3769
3770   void arm_fir_lattice_init_q15(
3771   arm_fir_lattice_instance_q15 * S,
3772   uint16_t numStages,
3773   q15_t * pCoeffs,
3774   q15_t * pState);
3775
3776
3777   /**
3778    * @brief Processing function for the Q15 FIR lattice filter.
3779    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
3780    * @param[in] *pSrc points to the block of input data.
3781    * @param[out] *pDst points to the block of output data.
3782    * @param[in] blockSize number of samples to process.
3783    * @return none.
3784    */
3785   void arm_fir_lattice_q15(
3786   const arm_fir_lattice_instance_q15 * S,
3787   q15_t * pSrc,
3788   q15_t * pDst,
3789   uint32_t blockSize);
3790
3791   /**
3792    * @brief Initialization function for the Q31 FIR lattice filter.
3793    * @param[in] *S points to an instance of the Q31 FIR lattice structure.
3794    * @param[in] numStages  number of filter stages.
3795    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3796    * @param[in] *pState points to the state buffer.   The array is of length numStages.
3797    * @return none.
3798    */
3799
3800   void arm_fir_lattice_init_q31(
3801   arm_fir_lattice_instance_q31 * S,
3802   uint16_t numStages,
3803   q31_t * pCoeffs,
3804   q31_t * pState);
3805
3806
3807   /**
3808    * @brief Processing function for the Q31 FIR lattice filter.
3809    * @param[in]  *S        points to an instance of the Q31 FIR lattice structure.
3810    * @param[in]  *pSrc     points to the block of input data.
3811    * @param[out] *pDst     points to the block of output data
3812    * @param[in]  blockSize number of samples to process.
3813    * @return none.
3814    */
3815
3816   void arm_fir_lattice_q31(
3817   const arm_fir_lattice_instance_q31 * S,
3818   q31_t * pSrc,
3819   q31_t * pDst,
3820   uint32_t blockSize);
3821
3822 /**
3823  * @brief Initialization function for the floating-point FIR lattice filter.
3824  * @param[in] *S points to an instance of the floating-point FIR lattice structure.
3825  * @param[in] numStages  number of filter stages.
3826  * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3827  * @param[in] *pState points to the state buffer.  The array is of length numStages.
3828  * @return none.
3829  */
3830
3831   void arm_fir_lattice_init_f32(
3832   arm_fir_lattice_instance_f32 * S,
3833   uint16_t numStages,
3834   float32_t * pCoeffs,
3835   float32_t * pState);
3836
3837   /**
3838    * @brief Processing function for the floating-point FIR lattice filter.
3839    * @param[in]  *S        points to an instance of the floating-point FIR lattice structure.
3840    * @param[in]  *pSrc     points to the block of input data.
3841    * @param[out] *pDst     points to the block of output data
3842    * @param[in]  blockSize number of samples to process.
3843    * @return none.
3844    */
3845
3846   void arm_fir_lattice_f32(
3847   const arm_fir_lattice_instance_f32 * S,
3848   float32_t * pSrc,
3849   float32_t * pDst,
3850   uint32_t blockSize);
3851
3852   /**
3853    * @brief Instance structure for the Q15 IIR lattice filter.
3854    */
3855   typedef struct
3856   {
3857     uint16_t numStages;                         /**< number of stages in the filter. */
3858     q15_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
3859     q15_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
3860     q15_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
3861   } arm_iir_lattice_instance_q15;
3862
3863   /**
3864    * @brief Instance structure for the Q31 IIR lattice filter.
3865    */
3866   typedef struct
3867   {
3868     uint16_t numStages;                         /**< number of stages in the filter. */
3869     q31_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
3870     q31_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
3871     q31_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
3872   } arm_iir_lattice_instance_q31;
3873
3874   /**
3875    * @brief Instance structure for the floating-point IIR lattice filter.
3876    */
3877   typedef struct
3878   {
3879     uint16_t numStages;                         /**< number of stages in the filter. */
3880     float32_t *pState;                          /**< points to the state variable array. The array is of length numStages+blockSize. */
3881     float32_t *pkCoeffs;                        /**< points to the reflection coefficient array. The array is of length numStages. */
3882     float32_t *pvCoeffs;                        /**< points to the ladder coefficient array. The array is of length numStages+1. */
3883   } arm_iir_lattice_instance_f32;
3884
3885   /**
3886    * @brief Processing function for the floating-point IIR lattice filter.
3887    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
3888    * @param[in] *pSrc points to the block of input data.
3889    * @param[out] *pDst points to the block of output data.
3890    * @param[in] blockSize number of samples to process.
3891    * @return none.
3892    */
3893
3894   void arm_iir_lattice_f32(
3895   const arm_iir_lattice_instance_f32 * S,
3896   float32_t * pSrc,
3897   float32_t * pDst,
3898   uint32_t blockSize);
3899
3900   /**
3901    * @brief Initialization function for the floating-point IIR lattice filter.
3902    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
3903    * @param[in] numStages number of stages in the filter.
3904    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
3905    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
3906    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize-1.
3907    * @param[in] blockSize number of samples to process.
3908    * @return none.
3909    */
3910
3911   void arm_iir_lattice_init_f32(
3912   arm_iir_lattice_instance_f32 * S,
3913   uint16_t numStages,
3914   float32_t * pkCoeffs,
3915   float32_t * pvCoeffs,
3916   float32_t * pState,
3917   uint32_t blockSize);
3918
3919
3920   /**
3921    * @brief Processing function for the Q31 IIR lattice filter.
3922    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
3923    * @param[in] *pSrc points to the block of input data.
3924    * @param[out] *pDst points to the block of output data.
3925    * @param[in] blockSize number of samples to process.
3926    * @return none.
3927    */
3928
3929   void arm_iir_lattice_q31(
3930   const arm_iir_lattice_instance_q31 * S,
3931   q31_t * pSrc,
3932   q31_t * pDst,
3933   uint32_t blockSize);
3934
3935
3936   /**
3937    * @brief Initialization function for the Q31 IIR lattice filter.
3938    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
3939    * @param[in] numStages number of stages in the filter.
3940    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
3941    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
3942    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize.
3943    * @param[in] blockSize number of samples to process.
3944    * @return none.
3945    */
3946
3947   void arm_iir_lattice_init_q31(
3948   arm_iir_lattice_instance_q31 * S,
3949   uint16_t numStages,
3950   q31_t * pkCoeffs,
3951   q31_t * pvCoeffs,
3952   q31_t * pState,
3953   uint32_t blockSize);
3954
3955
3956   /**
3957    * @brief Processing function for the Q15 IIR lattice filter.
3958    * @param[in] *S points to an instance of the Q15 IIR lattice structure.
3959    * @param[in] *pSrc points to the block of input data.
3960    * @param[out] *pDst points to the block of output data.
3961    * @param[in] blockSize number of samples to process.
3962    * @return none.
3963    */
3964
3965   void arm_iir_lattice_q15(
3966   const arm_iir_lattice_instance_q15 * S,
3967   q15_t * pSrc,
3968   q15_t * pDst,
3969   uint32_t blockSize);
3970
3971
3972 /**
3973  * @brief Initialization function for the Q15 IIR lattice filter.
3974  * @param[in] *S points to an instance of the fixed-point Q15 IIR lattice structure.
3975  * @param[in] numStages  number of stages in the filter.
3976  * @param[in] *pkCoeffs points to reflection coefficient buffer.  The array is of length numStages.
3977  * @param[in] *pvCoeffs points to ladder coefficient buffer.  The array is of length numStages+1.
3978  * @param[in] *pState points to state buffer.  The array is of length numStages+blockSize.
3979  * @param[in] blockSize number of samples to process per call.
3980  * @return none.
3981  */
3982
3983   void arm_iir_lattice_init_q15(
3984   arm_iir_lattice_instance_q15 * S,
3985   uint16_t numStages,
3986   q15_t * pkCoeffs,
3987   q15_t * pvCoeffs,
3988   q15_t * pState,
3989   uint32_t blockSize);
3990
3991   /**
3992    * @brief Instance structure for the floating-point LMS filter.
3993    */
3994
3995   typedef struct
3996   {
3997     uint16_t numTaps;    /**< number of coefficients in the filter. */
3998     float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3999     float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
4000     float32_t mu;        /**< step size that controls filter coefficient updates. */
4001   } arm_lms_instance_f32;
4002
4003   /**
4004    * @brief Processing function for floating-point LMS filter.
4005    * @param[in]  *S points to an instance of the floating-point LMS filter structure.
4006    * @param[in]  *pSrc points to the block of input data.
4007    * @param[in]  *pRef points to the block of reference data.
4008    * @param[out] *pOut points to the block of output data.
4009    * @param[out] *pErr points to the block of error data.
4010    * @param[in]  blockSize number of samples to process.
4011    * @return     none.
4012    */
4013
4014   void arm_lms_f32(
4015   const arm_lms_instance_f32 * S,
4016   float32_t * pSrc,
4017   float32_t * pRef,
4018   float32_t * pOut,
4019   float32_t * pErr,
4020   uint32_t blockSize);
4021
4022   /**
4023    * @brief Initialization function for floating-point LMS filter.
4024    * @param[in] *S points to an instance of the floating-point LMS filter structure.
4025    * @param[in] numTaps  number of filter coefficients.
4026    * @param[in] *pCoeffs points to the coefficient buffer.
4027    * @param[in] *pState points to state buffer.
4028    * @param[in] mu step size that controls filter coefficient updates.
4029    * @param[in] blockSize number of samples to process.
4030    * @return none.
4031    */
4032
4033   void arm_lms_init_f32(
4034   arm_lms_instance_f32 * S,
4035   uint16_t numTaps,
4036   float32_t * pCoeffs,
4037   float32_t * pState,
4038   float32_t mu,
4039   uint32_t blockSize);
4040
4041   /**
4042    * @brief Instance structure for the Q15 LMS filter.
4043    */
4044
4045   typedef struct
4046   {
4047     uint16_t numTaps;    /**< number of coefficients in the filter. */
4048     q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4049     q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4050     q15_t mu;            /**< step size that controls filter coefficient updates. */
4051     uint32_t postShift;  /**< bit shift applied to coefficients. */
4052   } arm_lms_instance_q15;
4053
4054
4055   /**
4056    * @brief Initialization function for the Q15 LMS filter.
4057    * @param[in] *S points to an instance of the Q15 LMS filter structure.
4058    * @param[in] numTaps  number of filter coefficients.
4059    * @param[in] *pCoeffs points to the coefficient buffer.
4060    * @param[in] *pState points to the state buffer.
4061    * @param[in] mu step size that controls filter coefficient updates.
4062    * @param[in] blockSize number of samples to process.
4063    * @param[in] postShift bit shift applied to coefficients.
4064    * @return    none.
4065    */
4066
4067   void arm_lms_init_q15(
4068   arm_lms_instance_q15 * S,
4069   uint16_t numTaps,
4070   q15_t * pCoeffs,
4071   q15_t * pState,
4072   q15_t mu,
4073   uint32_t blockSize,
4074   uint32_t postShift);
4075
4076   /**
4077    * @brief Processing function for Q15 LMS filter.
4078    * @param[in] *S points to an instance of the Q15 LMS filter structure.
4079    * @param[in] *pSrc points to the block of input data.
4080    * @param[in] *pRef points to the block of reference data.
4081    * @param[out] *pOut points to the block of output data.
4082    * @param[out] *pErr points to the block of error data.
4083    * @param[in] blockSize number of samples to process.
4084    * @return none.
4085    */
4086
4087   void arm_lms_q15(
4088   const arm_lms_instance_q15 * S,
4089   q15_t * pSrc,
4090   q15_t * pRef,
4091   q15_t * pOut,
4092   q15_t * pErr,
4093   uint32_t blockSize);
4094
4095
4096   /**
4097    * @brief Instance structure for the Q31 LMS filter.
4098    */
4099
4100   typedef struct
4101   {
4102     uint16_t numTaps;    /**< number of coefficients in the filter. */
4103     q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4104     q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4105     q31_t mu;            /**< step size that controls filter coefficient updates. */
4106     uint32_t postShift;  /**< bit shift applied to coefficients. */
4107
4108   } arm_lms_instance_q31;
4109
4110   /**
4111    * @brief Processing function for Q31 LMS filter.
4112    * @param[in]  *S points to an instance of the Q15 LMS filter structure.
4113    * @param[in]  *pSrc points to the block of input data.
4114    * @param[in]  *pRef points to the block of reference data.
4115    * @param[out] *pOut points to the block of output data.
4116    * @param[out] *pErr points to the block of error data.
4117    * @param[in]  blockSize number of samples to process.
4118    * @return     none.
4119    */
4120
4121   void arm_lms_q31(
4122   const arm_lms_instance_q31 * S,
4123   q31_t * pSrc,
4124   q31_t * pRef,
4125   q31_t * pOut,
4126   q31_t * pErr,
4127   uint32_t blockSize);
4128
4129   /**
4130    * @brief Initialization function for Q31 LMS filter.
4131    * @param[in] *S points to an instance of the Q31 LMS filter structure.
4132    * @param[in] numTaps  number of filter coefficients.
4133    * @param[in] *pCoeffs points to coefficient buffer.
4134    * @param[in] *pState points to state buffer.
4135    * @param[in] mu step size that controls filter coefficient updates.
4136    * @param[in] blockSize number of samples to process.
4137    * @param[in] postShift bit shift applied to coefficients.
4138    * @return none.
4139    */
4140
4141   void arm_lms_init_q31(
4142   arm_lms_instance_q31 * S,
4143   uint16_t numTaps,
4144   q31_t * pCoeffs,
4145   q31_t * pState,
4146   q31_t mu,
4147   uint32_t blockSize,
4148   uint32_t postShift);
4149
4150   /**
4151    * @brief Instance structure for the floating-point normalized LMS filter.
4152    */
4153
4154   typedef struct
4155   {
4156     uint16_t numTaps;     /**< number of coefficients in the filter. */
4157     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4158     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
4159     float32_t mu;        /**< step size that control filter coefficient updates. */
4160     float32_t energy;    /**< saves previous frame energy. */
4161     float32_t x0;        /**< saves previous input sample. */
4162   } arm_lms_norm_instance_f32;
4163
4164   /**
4165    * @brief Processing function for floating-point normalized LMS filter.
4166    * @param[in] *S points to an instance of the floating-point normalized LMS filter structure.
4167    * @param[in] *pSrc points to the block of input data.
4168    * @param[in] *pRef points to the block of reference data.
4169    * @param[out] *pOut points to the block of output data.
4170    * @param[out] *pErr points to the block of error data.
4171    * @param[in] blockSize number of samples to process.
4172    * @return none.
4173    */
4174
4175   void arm_lms_norm_f32(
4176   arm_lms_norm_instance_f32 * S,
4177   float32_t * pSrc,
4178   float32_t * pRef,
4179   float32_t * pOut,
4180   float32_t * pErr,
4181   uint32_t blockSize);
4182
4183   /**
4184    * @brief Initialization function for floating-point normalized LMS filter.
4185    * @param[in] *S points to an instance of the floating-point LMS filter structure.
4186    * @param[in] numTaps  number of filter coefficients.
4187    * @param[in] *pCoeffs points to coefficient buffer.
4188    * @param[in] *pState points to state buffer.
4189    * @param[in] mu step size that controls filter coefficient updates.
4190    * @param[in] blockSize number of samples to process.
4191    * @return none.
4192    */
4193
4194   void arm_lms_norm_init_f32(
4195   arm_lms_norm_instance_f32 * S,
4196   uint16_t numTaps,
4197   float32_t * pCoeffs,
4198   float32_t * pState,
4199   float32_t mu,
4200   uint32_t blockSize);
4201
4202
4203   /**
4204    * @brief Instance structure for the Q31 normalized LMS filter.
4205    */
4206   typedef struct
4207   {
4208     uint16_t numTaps;     /**< number of coefficients in the filter. */
4209     q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4210     q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4211     q31_t mu;             /**< step size that controls filter coefficient updates. */
4212     uint8_t postShift;    /**< bit shift applied to coefficients. */
4213     q31_t *recipTable;    /**< points to the reciprocal initial value table. */
4214     q31_t energy;         /**< saves previous frame energy. */
4215     q31_t x0;             /**< saves previous input sample. */
4216   } arm_lms_norm_instance_q31;
4217
4218   /**
4219    * @brief Processing function for Q31 normalized LMS filter.
4220    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
4221    * @param[in] *pSrc points to the block of input data.
4222    * @param[in] *pRef points to the block of reference data.
4223    * @param[out] *pOut points to the block of output data.
4224    * @param[out] *pErr points to the block of error data.
4225    * @param[in] blockSize number of samples to process.
4226    * @return none.
4227    */
4228
4229   void arm_lms_norm_q31(
4230   arm_lms_norm_instance_q31 * S,
4231   q31_t * pSrc,
4232   q31_t * pRef,
4233   q31_t * pOut,
4234   q31_t * pErr,
4235   uint32_t blockSize);
4236
4237   /**
4238    * @brief Initialization function for Q31 normalized LMS filter.
4239    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
4240    * @param[in] numTaps  number of filter coefficients.
4241    * @param[in] *pCoeffs points to coefficient buffer.
4242    * @param[in] *pState points to state buffer.
4243    * @param[in] mu step size that controls filter coefficient updates.
4244    * @param[in] blockSize number of samples to process.
4245    * @param[in] postShift bit shift applied to coefficients.
4246    * @return none.
4247    */
4248
4249   void arm_lms_norm_init_q31(
4250   arm_lms_norm_instance_q31 * S,
4251   uint16_t numTaps,
4252   q31_t * pCoeffs,
4253   q31_t * pState,
4254   q31_t mu,
4255   uint32_t blockSize,
4256   uint8_t postShift);
4257
4258   /**
4259    * @brief Instance structure for the Q15 normalized LMS filter.
4260    */
4261
4262   typedef struct
4263   {
4264     uint16_t numTaps;    /**< Number of coefficients in the filter. */
4265     q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4266     q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4267     q15_t mu;            /**< step size that controls filter coefficient updates. */
4268     uint8_t postShift;   /**< bit shift applied to coefficients. */
4269     q15_t *recipTable;   /**< Points to the reciprocal initial value table. */
4270     q15_t energy;        /**< saves previous frame energy. */
4271     q15_t x0;            /**< saves previous input sample. */
4272   } arm_lms_norm_instance_q15;
4273
4274   /**
4275    * @brief Processing function for Q15 normalized LMS filter.
4276    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
4277    * @param[in] *pSrc points to the block of input data.
4278    * @param[in] *pRef points to the block of reference data.
4279    * @param[out] *pOut points to the block of output data.
4280    * @param[out] *pErr points to the block of error data.
4281    * @param[in] blockSize number of samples to process.
4282    * @return none.
4283    */
4284
4285   void arm_lms_norm_q15(
4286   arm_lms_norm_instance_q15 * S,
4287   q15_t * pSrc,
4288   q15_t * pRef,
4289   q15_t * pOut,
4290   q15_t * pErr,
4291   uint32_t blockSize);
4292
4293
4294   /**
4295    * @brief Initialization function for Q15 normalized LMS filter.
4296    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
4297    * @param[in] numTaps  number of filter coefficients.
4298    * @param[in] *pCoeffs points to coefficient buffer.
4299    * @param[in] *pState points to state buffer.
4300    * @param[in] mu step size that controls filter coefficient updates.
4301    * @param[in] blockSize number of samples to process.
4302    * @param[in] postShift bit shift applied to coefficients.
4303    * @return none.
4304    */
4305
4306   void arm_lms_norm_init_q15(
4307   arm_lms_norm_instance_q15 * S,
4308   uint16_t numTaps,
4309   q15_t * pCoeffs,
4310   q15_t * pState,
4311   q15_t mu,
4312   uint32_t blockSize,
4313   uint8_t postShift);
4314
4315   /**
4316    * @brief Correlation of floating-point sequences.
4317    * @param[in] *pSrcA points to the first input sequence.
4318    * @param[in] srcALen length of the first input sequence.
4319    * @param[in] *pSrcB points to the second input sequence.
4320    * @param[in] srcBLen length of the second input sequence.
4321    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4322    * @return none.
4323    */
4324
4325   void arm_correlate_f32(
4326   float32_t * pSrcA,
4327   uint32_t srcALen,
4328   float32_t * pSrcB,
4329   uint32_t srcBLen,
4330   float32_t * pDst);
4331
4332
4333    /**
4334    * @brief Correlation of Q15 sequences
4335    * @param[in] *pSrcA points to the first input sequence.
4336    * @param[in] srcALen length of the first input sequence.
4337    * @param[in] *pSrcB points to the second input sequence.
4338    * @param[in] srcBLen length of the second input sequence.
4339    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4340    * @param[in]  *pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4341    * @return none.
4342    */
4343   void arm_correlate_opt_q15(
4344   q15_t * pSrcA,
4345   uint32_t srcALen,
4346   q15_t * pSrcB,
4347   uint32_t srcBLen,
4348   q15_t * pDst,
4349   q15_t * pScratch);
4350
4351
4352   /**
4353    * @brief Correlation of Q15 sequences.
4354    * @param[in] *pSrcA points to the first input sequence.
4355    * @param[in] srcALen length of the first input sequence.
4356    * @param[in] *pSrcB points to the second input sequence.
4357    * @param[in] srcBLen length of the second input sequence.
4358    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4359    * @return none.
4360    */
4361
4362   void arm_correlate_q15(
4363   q15_t * pSrcA,
4364   uint32_t srcALen,
4365   q15_t * pSrcB,
4366   uint32_t srcBLen,
4367   q15_t * pDst);
4368
4369   /**
4370    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
4371    * @param[in] *pSrcA points to the first input sequence.
4372    * @param[in] srcALen length of the first input sequence.
4373    * @param[in] *pSrcB points to the second input sequence.
4374    * @param[in] srcBLen length of the second input sequence.
4375    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4376    * @return none.
4377    */
4378
4379   void arm_correlate_fast_q15(
4380                                q15_t * pSrcA,
4381                               uint32_t srcALen,
4382                                q15_t * pSrcB,
4383                               uint32_t srcBLen,
4384                               q15_t * pDst);
4385
4386
4387
4388   /**
4389    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
4390    * @param[in] *pSrcA points to the first input sequence.
4391    * @param[in] srcALen length of the first input sequence.
4392    * @param[in] *pSrcB points to the second input sequence.
4393    * @param[in] srcBLen length of the second input sequence.
4394    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4395    * @param[in]  *pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4396    * @return none.
4397    */
4398
4399   void arm_correlate_fast_opt_q15(
4400   q15_t * pSrcA,
4401   uint32_t srcALen,
4402   q15_t * pSrcB,
4403   uint32_t srcBLen,
4404   q15_t * pDst,
4405   q15_t * pScratch);
4406
4407   /**
4408    * @brief Correlation of Q31 sequences.
4409    * @param[in] *pSrcA points to the first input sequence.
4410    * @param[in] srcALen length of the first input sequence.
4411    * @param[in] *pSrcB points to the second input sequence.
4412    * @param[in] srcBLen length of the second input sequence.
4413    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4414    * @return none.
4415    */
4416
4417   void arm_correlate_q31(
4418   q31_t * pSrcA,
4419   uint32_t srcALen,
4420   q31_t * pSrcB,
4421   uint32_t srcBLen,
4422   q31_t * pDst);
4423
4424   /**
4425    * @brief Correlation of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
4426    * @param[in] *pSrcA points to the first input sequence.
4427    * @param[in] srcALen length of the first input sequence.
4428    * @param[in] *pSrcB points to the second input sequence.
4429    * @param[in] srcBLen length of the second input sequence.
4430    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4431    * @return none.
4432    */
4433
4434   void arm_correlate_fast_q31(
4435   q31_t * pSrcA,
4436   uint32_t srcALen,
4437   q31_t * pSrcB,
4438   uint32_t srcBLen,
4439   q31_t * pDst);
4440
4441
4442
4443  /**
4444    * @brief Correlation of Q7 sequences.
4445    * @param[in] *pSrcA points to the first input sequence.
4446    * @param[in] srcALen length of the first input sequence.
4447    * @param[in] *pSrcB points to the second input sequence.
4448    * @param[in] srcBLen length of the second input sequence.
4449    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4450    * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4451    * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
4452    * @return none.
4453    */
4454
4455   void arm_correlate_opt_q7(
4456   q7_t * pSrcA,
4457   uint32_t srcALen,
4458   q7_t * pSrcB,
4459   uint32_t srcBLen,
4460   q7_t * pDst,
4461   q15_t * pScratch1,
4462   q15_t * pScratch2);
4463
4464
4465   /**
4466    * @brief Correlation of Q7 sequences.
4467    * @param[in] *pSrcA points to the first input sequence.
4468    * @param[in] srcALen length of the first input sequence.
4469    * @param[in] *pSrcB points to the second input sequence.
4470    * @param[in] srcBLen length of the second input sequence.
4471    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4472    * @return none.
4473    */
4474
4475   void arm_correlate_q7(
4476   q7_t * pSrcA,
4477   uint32_t srcALen,
4478   q7_t * pSrcB,
4479   uint32_t srcBLen,
4480   q7_t * pDst);
4481
4482
4483   /**
4484    * @brief Instance structure for the floating-point sparse FIR filter.
4485    */
4486   typedef struct
4487   {
4488     uint16_t numTaps;             /**< number of coefficients in the filter. */
4489     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4490     float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4491     float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
4492     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4493     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4494   } arm_fir_sparse_instance_f32;
4495
4496   /**
4497    * @brief Instance structure for the Q31 sparse FIR filter.
4498    */
4499
4500   typedef struct
4501   {
4502     uint16_t numTaps;             /**< number of coefficients in the filter. */
4503     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4504     q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4505     q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4506     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4507     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4508   } arm_fir_sparse_instance_q31;
4509
4510   /**
4511    * @brief Instance structure for the Q15 sparse FIR filter.
4512    */
4513
4514   typedef struct
4515   {
4516     uint16_t numTaps;             /**< number of coefficients in the filter. */
4517     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4518     q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4519     q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4520     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4521     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4522   } arm_fir_sparse_instance_q15;
4523
4524   /**
4525    * @brief Instance structure for the Q7 sparse FIR filter.
4526    */
4527
4528   typedef struct
4529   {
4530     uint16_t numTaps;             /**< number of coefficients in the filter. */
4531     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4532     q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4533     q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
4534     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4535     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4536   } arm_fir_sparse_instance_q7;
4537
4538   /**
4539    * @brief Processing function for the floating-point sparse FIR filter.
4540    * @param[in]  *S          points to an instance of the floating-point sparse FIR structure.
4541    * @param[in]  *pSrc       points to the block of input data.
4542    * @param[out] *pDst       points to the block of output data
4543    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
4544    * @param[in]  blockSize   number of input samples to process per call.
4545    * @return none.
4546    */
4547
4548   void arm_fir_sparse_f32(
4549   arm_fir_sparse_instance_f32 * S,
4550   float32_t * pSrc,
4551   float32_t * pDst,
4552   float32_t * pScratchIn,
4553   uint32_t blockSize);
4554
4555   /**
4556    * @brief  Initialization function for the floating-point sparse FIR filter.
4557    * @param[in,out] *S         points to an instance of the floating-point sparse FIR structure.
4558    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4559    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4560    * @param[in]     *pState    points to the state buffer.
4561    * @param[in]     *pTapDelay points to the array of offset times.
4562    * @param[in]     maxDelay   maximum offset time supported.
4563    * @param[in]     blockSize  number of samples that will be processed per block.
4564    * @return none
4565    */
4566
4567   void arm_fir_sparse_init_f32(
4568   arm_fir_sparse_instance_f32 * S,
4569   uint16_t numTaps,
4570   float32_t * pCoeffs,
4571   float32_t * pState,
4572   int32_t * pTapDelay,
4573   uint16_t maxDelay,
4574   uint32_t blockSize);
4575
4576   /**
4577    * @brief Processing function for the Q31 sparse FIR filter.
4578    * @param[in]  *S          points to an instance of the Q31 sparse FIR structure.
4579    * @param[in]  *pSrc       points to the block of input data.
4580    * @param[out] *pDst       points to the block of output data
4581    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
4582    * @param[in]  blockSize   number of input samples to process per call.
4583    * @return none.
4584    */
4585
4586   void arm_fir_sparse_q31(
4587   arm_fir_sparse_instance_q31 * S,
4588   q31_t * pSrc,
4589   q31_t * pDst,
4590   q31_t * pScratchIn,
4591   uint32_t blockSize);
4592
4593   /**
4594    * @brief  Initialization function for the Q31 sparse FIR filter.
4595    * @param[in,out] *S         points to an instance of the Q31 sparse FIR structure.
4596    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4597    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4598    * @param[in]     *pState    points to the state buffer.
4599    * @param[in]     *pTapDelay points to the array of offset times.
4600    * @param[in]     maxDelay   maximum offset time supported.
4601    * @param[in]     blockSize  number of samples that will be processed per block.
4602    * @return none
4603    */
4604
4605   void arm_fir_sparse_init_q31(
4606   arm_fir_sparse_instance_q31 * S,
4607   uint16_t numTaps,
4608   q31_t * pCoeffs,
4609   q31_t * pState,
4610   int32_t * pTapDelay,
4611   uint16_t maxDelay,
4612   uint32_t blockSize);
4613
4614   /**
4615    * @brief Processing function for the Q15 sparse FIR filter.
4616    * @param[in]  *S           points to an instance of the Q15 sparse FIR structure.
4617    * @param[in]  *pSrc        points to the block of input data.
4618    * @param[out] *pDst        points to the block of output data
4619    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
4620    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
4621    * @param[in]  blockSize    number of input samples to process per call.
4622    * @return none.
4623    */
4624
4625   void arm_fir_sparse_q15(
4626   arm_fir_sparse_instance_q15 * S,
4627   q15_t * pSrc,
4628   q15_t * pDst,
4629   q15_t * pScratchIn,
4630   q31_t * pScratchOut,
4631   uint32_t blockSize);
4632
4633
4634   /**
4635    * @brief  Initialization function for the Q15 sparse FIR filter.
4636    * @param[in,out] *S         points to an instance of the Q15 sparse FIR structure.
4637    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4638    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4639    * @param[in]     *pState    points to the state buffer.
4640    * @param[in]     *pTapDelay points to the array of offset times.
4641    * @param[in]     maxDelay   maximum offset time supported.
4642    * @param[in]     blockSize  number of samples that will be processed per block.
4643    * @return none
4644    */
4645
4646   void arm_fir_sparse_init_q15(
4647   arm_fir_sparse_instance_q15 * S,
4648   uint16_t numTaps,
4649   q15_t * pCoeffs,
4650   q15_t * pState,
4651   int32_t * pTapDelay,
4652   uint16_t maxDelay,
4653   uint32_t blockSize);
4654
4655   /**
4656    * @brief Processing function for the Q7 sparse FIR filter.
4657    * @param[in]  *S           points to an instance of the Q7 sparse FIR structure.
4658    * @param[in]  *pSrc        points to the block of input data.
4659    * @param[out] *pDst        points to the block of output data
4660    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
4661    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
4662    * @param[in]  blockSize    number of input samples to process per call.
4663    * @return none.
4664    */
4665
4666   void arm_fir_sparse_q7(
4667   arm_fir_sparse_instance_q7 * S,
4668   q7_t * pSrc,
4669   q7_t * pDst,
4670   q7_t * pScratchIn,
4671   q31_t * pScratchOut,
4672   uint32_t blockSize);
4673
4674   /**
4675    * @brief  Initialization function for the Q7 sparse FIR filter.
4676    * @param[in,out] *S         points to an instance of the Q7 sparse FIR structure.
4677    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4678    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4679    * @param[in]     *pState    points to the state buffer.
4680    * @param[in]     *pTapDelay points to the array of offset times.
4681    * @param[in]     maxDelay   maximum offset time supported.
4682    * @param[in]     blockSize  number of samples that will be processed per block.
4683    * @return none
4684    */
4685
4686   void arm_fir_sparse_init_q7(
4687   arm_fir_sparse_instance_q7 * S,
4688   uint16_t numTaps,
4689   q7_t * pCoeffs,
4690   q7_t * pState,
4691   int32_t * pTapDelay,
4692   uint16_t maxDelay,
4693   uint32_t blockSize);
4694
4695
4696   /*
4697    * @brief  Floating-point sin_cos function.
4698    * @param[in]  theta    input value in degrees
4699    * @param[out] *pSinVal points to the processed sine output.
4700    * @param[out] *pCosVal points to the processed cos output.
4701    * @return none.
4702    */
4703
4704   void arm_sin_cos_f32(
4705   float32_t theta,
4706   float32_t * pSinVal,
4707   float32_t * pCcosVal);
4708
4709   /*
4710    * @brief  Q31 sin_cos function.
4711    * @param[in]  theta    scaled input value in degrees
4712    * @param[out] *pSinVal points to the processed sine output.
4713    * @param[out] *pCosVal points to the processed cosine output.
4714    * @return none.
4715    */
4716
4717   void arm_sin_cos_q31(
4718   q31_t theta,
4719   q31_t * pSinVal,
4720   q31_t * pCosVal);
4721
4722
4723   /**
4724    * @brief  Floating-point complex conjugate.
4725    * @param[in]  *pSrc points to the input vector
4726    * @param[out]  *pDst points to the output vector
4727    * @param[in]  numSamples number of complex samples in each vector
4728    * @return none.
4729    */
4730
4731   void arm_cmplx_conj_f32(
4732   float32_t * pSrc,
4733   float32_t * pDst,
4734   uint32_t numSamples);
4735
4736   /**
4737    * @brief  Q31 complex conjugate.
4738    * @param[in]  *pSrc points to the input vector
4739    * @param[out]  *pDst points to the output vector
4740    * @param[in]  numSamples number of complex samples in each vector
4741    * @return none.
4742    */
4743
4744   void arm_cmplx_conj_q31(
4745   q31_t * pSrc,
4746   q31_t * pDst,
4747   uint32_t numSamples);
4748
4749   /**
4750    * @brief  Q15 complex conjugate.
4751    * @param[in]  *pSrc points to the input vector
4752    * @param[out]  *pDst points to the output vector
4753    * @param[in]  numSamples number of complex samples in each vector
4754    * @return none.
4755    */
4756
4757   void arm_cmplx_conj_q15(
4758   q15_t * pSrc,
4759   q15_t * pDst,
4760   uint32_t numSamples);
4761
4762
4763
4764   /**
4765    * @brief  Floating-point complex magnitude squared
4766    * @param[in]  *pSrc points to the complex input vector
4767    * @param[out]  *pDst points to the real output vector
4768    * @param[in]  numSamples number of complex samples in the input vector
4769    * @return none.
4770    */
4771
4772   void arm_cmplx_mag_squared_f32(
4773   float32_t * pSrc,
4774   float32_t * pDst,
4775   uint32_t numSamples);
4776
4777   /**
4778    * @brief  Q31 complex magnitude squared
4779    * @param[in]  *pSrc points to the complex input vector
4780    * @param[out]  *pDst points to the real output vector
4781    * @param[in]  numSamples number of complex samples in the input vector
4782    * @return none.
4783    */
4784
4785   void arm_cmplx_mag_squared_q31(
4786   q31_t * pSrc,
4787   q31_t * pDst,
4788   uint32_t numSamples);
4789
4790   /**
4791    * @brief  Q15 complex magnitude squared
4792    * @param[in]  *pSrc points to the complex input vector
4793    * @param[out]  *pDst points to the real output vector
4794    * @param[in]  numSamples number of complex samples in the input vector
4795    * @return none.
4796    */
4797
4798   void arm_cmplx_mag_squared_q15(
4799   q15_t * pSrc,
4800   q15_t * pDst,
4801   uint32_t numSamples);
4802
4803
4804  /**
4805    * @ingroup groupController
4806    */
4807
4808   /**
4809    * @defgroup PID PID Motor Control
4810    *
4811    * A Proportional Integral Derivative (PID) controller is a generic feedback control
4812    * loop mechanism widely used in industrial control systems.
4813    * A PID controller is the most commonly used type of feedback controller.
4814    *
4815    * This set of functions implements (PID) controllers
4816    * for Q15, Q31, and floating-point data types.  The functions operate on a single sample
4817    * of data and each call to the function returns a single processed value.
4818    * <code>S</code> points to an instance of the PID control data structure.  <code>in</code>
4819    * is the input sample value. The functions return the output value.
4820    *
4821    * \par Algorithm:
4822    * <pre>
4823    *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
4824    *    A0 = Kp + Ki + Kd
4825    *    A1 = (-Kp ) - (2 * Kd )
4826    *    A2 = Kd  </pre>
4827    *
4828    * \par
4829    * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
4830    *
4831    * \par
4832    * \image html PID.gif "Proportional Integral Derivative Controller"
4833    *
4834    * \par
4835    * The PID controller calculates an "error" value as the difference between
4836    * the measured output and the reference input.
4837    * The controller attempts to minimize the error by adjusting the process control inputs.
4838    * The proportional value determines the reaction to the current error,
4839    * the integral value determines the reaction based on the sum of recent errors,
4840    * and the derivative value determines the reaction based on the rate at which the error has been changing.
4841    *
4842    * \par Instance Structure
4843    * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
4844    * A separate instance structure must be defined for each PID Controller.
4845    * There are separate instance structure declarations for each of the 3 supported data types.
4846    *
4847    * \par Reset Functions
4848    * There is also an associated reset function for each data type which clears the state array.
4849    *
4850    * \par Initialization Functions
4851    * There is also an associated initialization function for each data type.
4852    * The initialization function performs the following operations:
4853    * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
4854    * - Zeros out the values in the state buffer.
4855    *
4856    * \par
4857    * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.
4858    *
4859    * \par Fixed-Point Behavior
4860    * Care must be taken when using the fixed-point versions of the PID Controller functions.
4861    * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
4862    * Refer to the function specific documentation below for usage guidelines.
4863    */
4864
4865   /**
4866    * @addtogroup PID
4867    * @{
4868    */
4869
4870   /**
4871    * @brief  Process function for the floating-point PID Control.
4872    * @param[in,out] *S is an instance of the floating-point PID Control structure
4873    * @param[in] in input sample to process
4874    * @return out processed output sample.
4875    */
4876
4877
4878   static __INLINE float32_t arm_pid_f32(
4879   arm_pid_instance_f32 * S,
4880   float32_t in)
4881   {
4882     float32_t out;
4883
4884     /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
4885     out = (S->A0 * in) +
4886       (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
4887
4888     /* Update state */
4889     S->state[1] = S->state[0];
4890     S->state[0] = in;
4891     S->state[2] = out;
4892
4893     /* return to application */
4894     return (out);
4895
4896   }
4897
4898   /**
4899    * @brief  Process function for the Q31 PID Control.
4900    * @param[in,out] *S points to an instance of the Q31 PID Control structure
4901    * @param[in] in input sample to process
4902    * @return out processed output sample.
4903    *
4904    * <b>Scaling and Overflow Behavior:</b>
4905    * \par
4906    * The function is implemented using an internal 64-bit accumulator.
4907    * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
4908    * Thus, if the accumulator result overflows it wraps around rather than clip.
4909    * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
4910    * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
4911    */
4912
4913   static __INLINE q31_t arm_pid_q31(
4914   arm_pid_instance_q31 * S,
4915   q31_t in)
4916   {
4917     q63_t acc;
4918     q31_t out;
4919
4920     /* acc = A0 * x[n]  */
4921     acc = (q63_t) S->A0 * in;
4922
4923     /* acc += A1 * x[n-1] */
4924     acc += (q63_t) S->A1 * S->state[0];
4925
4926     /* acc += A2 * x[n-2]  */
4927     acc += (q63_t) S->A2 * S->state[1];
4928
4929     /* convert output to 1.31 format to add y[n-1] */
4930     out = (q31_t) (acc >> 31u);
4931
4932     /* out += y[n-1] */
4933     out += S->state[2];
4934
4935     /* Update state */
4936     S->state[1] = S->state[0];
4937     S->state[0] = in;
4938     S->state[2] = out;
4939
4940     /* return to application */
4941     return (out);
4942
4943   }
4944
4945   /**
4946    * @brief  Process function for the Q15 PID Control.
4947    * @param[in,out] *S points to an instance of the Q15 PID Control structure
4948    * @param[in] in input sample to process
4949    * @return out processed output sample.
4950    *
4951    * <b>Scaling and Overflow Behavior:</b>
4952    * \par
4953    * The function is implemented using a 64-bit internal accumulator.
4954    * Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
4955    * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
4956    * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
4957    * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
4958    * Lastly, the accumulator is saturated to yield a result in 1.15 format.
4959    */
4960
4961   static __INLINE q15_t arm_pid_q15(
4962   arm_pid_instance_q15 * S,
4963   q15_t in)
4964   {
4965     q63_t acc;
4966     q15_t out;
4967
4968 #ifndef ARM_MATH_CM0_FAMILY
4969     __SIMD32_TYPE *vstate;
4970
4971     /* Implementation of PID controller */
4972
4973     /* acc = A0 * x[n]  */
4974     acc = (q31_t) __SMUAD(S->A0, in);
4975
4976     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
4977     vstate = __SIMD32_CONST(S->state);
4978     acc = __SMLALD(S->A1, (q31_t) *vstate, acc);
4979
4980 #else
4981     /* acc = A0 * x[n]  */
4982     acc = ((q31_t) S->A0) * in;
4983
4984     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
4985     acc += (q31_t) S->A1 * S->state[0];
4986     acc += (q31_t) S->A2 * S->state[1];
4987
4988 #endif
4989
4990     /* acc += y[n-1] */
4991     acc += (q31_t) S->state[2] << 15;
4992
4993     /* saturate the output */
4994     out = (q15_t) (__SSAT((acc >> 15), 16));
4995
4996     /* Update state */
4997     S->state[1] = S->state[0];
4998     S->state[0] = in;
4999     S->state[2] = out;
5000
5001     /* return to application */
5002     return (out);
5003
5004   }
5005
5006   /**
5007    * @} end of PID group
5008    */
5009
5010
5011   /**
5012    * @brief Floating-point matrix inverse.
5013    * @param[in]  *src points to the instance of the input floating-point matrix structure.
5014    * @param[out] *dst points to the instance of the output floating-point matrix structure.
5015    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
5016    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
5017    */
5018
5019   arm_status arm_mat_inverse_f32(
5020   const arm_matrix_instance_f32 * src,
5021   arm_matrix_instance_f32 * dst);
5022
5023
5024
5025   /**
5026    * @ingroup groupController
5027    */
5028
5029
5030   /**
5031    * @defgroup clarke Vector Clarke Transform
5032    * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
5033    * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
5034    * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
5035    * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
5036    * \image html clarke.gif Stator current space vector and its components in (a,b).
5037    * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
5038    * can be calculated using only <code>Ia</code> and <code>Ib</code>.
5039    *
5040    * The function operates on a single sample of data and each call to the function returns the processed output.
5041    * The library provides separate functions for Q31 and floating-point data types.
5042    * \par Algorithm
5043    * \image html clarkeFormula.gif
5044    * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
5045    * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
5046    * \par Fixed-Point Behavior
5047    * Care must be taken when using the Q31 version of the Clarke transform.
5048    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5049    * Refer to the function specific documentation below for usage guidelines.
5050    */
5051
5052   /**
5053    * @addtogroup clarke
5054    * @{
5055    */
5056
5057   /**
5058    *
5059    * @brief  Floating-point Clarke transform
5060    * @param[in]       Ia       input three-phase coordinate <code>a</code>
5061    * @param[in]       Ib       input three-phase coordinate <code>b</code>
5062    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
5063    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
5064    * @return none.
5065    */
5066
5067   static __INLINE void arm_clarke_f32(
5068   float32_t Ia,
5069   float32_t Ib,
5070   float32_t * pIalpha,
5071   float32_t * pIbeta)
5072   {
5073     /* Calculate pIalpha using the equation, pIalpha = Ia */
5074     *pIalpha = Ia;
5075
5076     /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
5077     *pIbeta =
5078       ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
5079
5080   }
5081
5082   /**
5083    * @brief  Clarke transform for Q31 version
5084    * @param[in]       Ia       input three-phase coordinate <code>a</code>
5085    * @param[in]       Ib       input three-phase coordinate <code>b</code>
5086    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
5087    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
5088    * @return none.
5089    *
5090    * <b>Scaling and Overflow Behavior:</b>
5091    * \par
5092    * The function is implemented using an internal 32-bit accumulator.
5093    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5094    * There is saturation on the addition, hence there is no risk of overflow.
5095    */
5096
5097   static __INLINE void arm_clarke_q31(
5098   q31_t Ia,
5099   q31_t Ib,
5100   q31_t * pIalpha,
5101   q31_t * pIbeta)
5102   {
5103     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5104
5105     /* Calculating pIalpha from Ia by equation pIalpha = Ia */
5106     *pIalpha = Ia;
5107
5108     /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
5109     product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
5110
5111     /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
5112     product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
5113
5114     /* pIbeta is calculated by adding the intermediate products */
5115     *pIbeta = __QADD(product1, product2);
5116   }
5117
5118   /**
5119    * @} end of clarke group
5120    */
5121
5122   /**
5123    * @brief  Converts the elements of the Q7 vector to Q31 vector.
5124    * @param[in]  *pSrc     input pointer
5125    * @param[out]  *pDst    output pointer
5126    * @param[in]  blockSize number of samples to process
5127    * @return none.
5128    */
5129   void arm_q7_to_q31(
5130   q7_t * pSrc,
5131   q31_t * pDst,
5132   uint32_t blockSize);
5133
5134
5135
5136
5137   /**
5138    * @ingroup groupController
5139    */
5140
5141   /**
5142    * @defgroup inv_clarke Vector Inverse Clarke Transform
5143    * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
5144    *
5145    * The function operates on a single sample of data and each call to the function returns the processed output.
5146    * The library provides separate functions for Q31 and floating-point data types.
5147    * \par Algorithm
5148    * \image html clarkeInvFormula.gif
5149    * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
5150    * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
5151    * \par Fixed-Point Behavior
5152    * Care must be taken when using the Q31 version of the Clarke transform.
5153    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5154    * Refer to the function specific documentation below for usage guidelines.
5155    */
5156
5157   /**
5158    * @addtogroup inv_clarke
5159    * @{
5160    */
5161
5162    /**
5163    * @brief  Floating-point Inverse Clarke transform
5164    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
5165    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
5166    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
5167    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
5168    * @return none.
5169    */
5170
5171
5172   static __INLINE void arm_inv_clarke_f32(
5173   float32_t Ialpha,
5174   float32_t Ibeta,
5175   float32_t * pIa,
5176   float32_t * pIb)
5177   {
5178     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5179     *pIa = Ialpha;
5180
5181     /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
5182     *pIb = -0.5 * Ialpha + (float32_t) 0.8660254039 *Ibeta;
5183
5184   }
5185
5186   /**
5187    * @brief  Inverse Clarke transform for Q31 version
5188    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
5189    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
5190    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
5191    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
5192    * @return none.
5193    *
5194    * <b>Scaling and Overflow Behavior:</b>
5195    * \par
5196    * The function is implemented using an internal 32-bit accumulator.
5197    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5198    * There is saturation on the subtraction, hence there is no risk of overflow.
5199    */
5200
5201   static __INLINE void arm_inv_clarke_q31(
5202   q31_t Ialpha,
5203   q31_t Ibeta,
5204   q31_t * pIa,
5205   q31_t * pIb)
5206   {
5207     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5208
5209     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5210     *pIa = Ialpha;
5211
5212     /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
5213     product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
5214
5215     /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
5216     product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
5217
5218     /* pIb is calculated by subtracting the products */
5219     *pIb = __QSUB(product2, product1);
5220
5221   }
5222
5223   /**
5224    * @} end of inv_clarke group
5225    */
5226
5227   /**
5228    * @brief  Converts the elements of the Q7 vector to Q15 vector.
5229    * @param[in]  *pSrc     input pointer
5230    * @param[out] *pDst     output pointer
5231    * @param[in]  blockSize number of samples to process
5232    * @return none.
5233    */
5234   void arm_q7_to_q15(
5235   q7_t * pSrc,
5236   q15_t * pDst,
5237   uint32_t blockSize);
5238
5239
5240
5241   /**
5242    * @ingroup groupController
5243    */
5244
5245   /**
5246    * @defgroup park Vector Park Transform
5247    *
5248    * Forward Park transform converts the input two-coordinate vector to flux and torque components.
5249    * The Park transform can be used to realize the transformation of the <code>Ialpha</code> and the <code>Ibeta</code> currents
5250    * from the stationary to the moving reference frame and control the spatial relationship between
5251    * the stator vector current and rotor flux vector.
5252    * If we consider the d axis aligned with the rotor flux, the diagram below shows the
5253    * current vector and the relationship from the two reference frames:
5254    * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
5255    *
5256    * The function operates on a single sample of data and each call to the function returns the processed output.
5257    * The library provides separate functions for Q31 and floating-point data types.
5258    * \par Algorithm
5259    * \image html parkFormula.gif
5260    * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
5261    * <code>pId</code> and <code>pIq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5262    * cosine and sine values of theta (rotor flux position).
5263    * \par Fixed-Point Behavior
5264    * Care must be taken when using the Q31 version of the Park transform.
5265    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5266    * Refer to the function specific documentation below for usage guidelines.
5267    */
5268
5269   /**
5270    * @addtogroup park
5271    * @{
5272    */
5273
5274   /**
5275    * @brief Floating-point Park transform
5276    * @param[in]       Ialpha input two-phase vector coordinate alpha
5277    * @param[in]       Ibeta  input two-phase vector coordinate beta
5278    * @param[out]      *pId   points to output   rotor reference frame d
5279    * @param[out]      *pIq   points to output   rotor reference frame q
5280    * @param[in]       sinVal sine value of rotation angle theta
5281    * @param[in]       cosVal cosine value of rotation angle theta
5282    * @return none.
5283    *
5284    * The function implements the forward Park transform.
5285    *
5286    */
5287
5288   static __INLINE void arm_park_f32(
5289   float32_t Ialpha,
5290   float32_t Ibeta,
5291   float32_t * pId,
5292   float32_t * pIq,
5293   float32_t sinVal,
5294   float32_t cosVal)
5295   {
5296     /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
5297     *pId = Ialpha * cosVal + Ibeta * sinVal;
5298
5299     /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
5300     *pIq = -Ialpha * sinVal + Ibeta * cosVal;
5301
5302   }
5303
5304   /**
5305    * @brief  Park transform for Q31 version
5306    * @param[in]       Ialpha input two-phase vector coordinate alpha
5307    * @param[in]       Ibeta  input two-phase vector coordinate beta
5308    * @param[out]      *pId   points to output rotor reference frame d
5309    * @param[out]      *pIq   points to output rotor reference frame q
5310    * @param[in]       sinVal sine value of rotation angle theta
5311    * @param[in]       cosVal cosine value of rotation angle theta
5312    * @return none.
5313    *
5314    * <b>Scaling and Overflow Behavior:</b>
5315    * \par
5316    * The function is implemented using an internal 32-bit accumulator.
5317    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5318    * There is saturation on the addition and subtraction, hence there is no risk of overflow.
5319    */
5320
5321
5322   static __INLINE void arm_park_q31(
5323   q31_t Ialpha,
5324   q31_t Ibeta,
5325   q31_t * pId,
5326   q31_t * pIq,
5327   q31_t sinVal,
5328   q31_t cosVal)
5329   {
5330     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5331     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5332
5333     /* Intermediate product is calculated by (Ialpha * cosVal) */
5334     product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
5335
5336     /* Intermediate product is calculated by (Ibeta * sinVal) */
5337     product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
5338
5339
5340     /* Intermediate product is calculated by (Ialpha * sinVal) */
5341     product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
5342
5343     /* Intermediate product is calculated by (Ibeta * cosVal) */
5344     product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
5345
5346     /* Calculate pId by adding the two intermediate products 1 and 2 */
5347     *pId = __QADD(product1, product2);
5348
5349     /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
5350     *pIq = __QSUB(product4, product3);
5351   }
5352
5353   /**
5354    * @} end of park group
5355    */
5356
5357   /**
5358    * @brief  Converts the elements of the Q7 vector to floating-point vector.
5359    * @param[in]  *pSrc is input pointer
5360    * @param[out]  *pDst is output pointer
5361    * @param[in]  blockSize is the number of samples to process
5362    * @return none.
5363    */
5364   void arm_q7_to_float(
5365   q7_t * pSrc,
5366   float32_t * pDst,
5367   uint32_t blockSize);
5368
5369
5370   /**
5371    * @ingroup groupController
5372    */
5373
5374   /**
5375    * @defgroup inv_park Vector Inverse Park transform
5376    * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
5377    *
5378    * The function operates on a single sample of data and each call to the function returns the processed output.
5379    * The library provides separate functions for Q31 and floating-point data types.
5380    * \par Algorithm
5381    * \image html parkInvFormula.gif
5382    * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
5383    * <code>Id</code> and <code>Iq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5384    * cosine and sine values of theta (rotor flux position).
5385    * \par Fixed-Point Behavior
5386    * Care must be taken when using the Q31 version of the Park transform.
5387    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5388    * Refer to the function specific documentation below for usage guidelines.
5389    */
5390
5391   /**
5392    * @addtogroup inv_park
5393    * @{
5394    */
5395
5396    /**
5397    * @brief  Floating-point Inverse Park transform
5398    * @param[in]       Id        input coordinate of rotor reference frame d
5399    * @param[in]       Iq        input coordinate of rotor reference frame q
5400    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
5401    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
5402    * @param[in]       sinVal    sine value of rotation angle theta
5403    * @param[in]       cosVal    cosine value of rotation angle theta
5404    * @return none.
5405    */
5406
5407   static __INLINE void arm_inv_park_f32(
5408   float32_t Id,
5409   float32_t Iq,
5410   float32_t * pIalpha,
5411   float32_t * pIbeta,
5412   float32_t sinVal,
5413   float32_t cosVal)
5414   {
5415     /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
5416     *pIalpha = Id * cosVal - Iq * sinVal;
5417
5418     /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
5419     *pIbeta = Id * sinVal + Iq * cosVal;
5420
5421   }
5422
5423
5424   /**
5425    * @brief  Inverse Park transform for Q31 version
5426    * @param[in]       Id        input coordinate of rotor reference frame d
5427    * @param[in]       Iq        input coordinate of rotor reference frame q
5428    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
5429    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
5430    * @param[in]       sinVal    sine value of rotation angle theta
5431    * @param[in]       cosVal    cosine value of rotation angle theta
5432    * @return none.
5433    *
5434    * <b>Scaling and Overflow Behavior:</b>
5435    * \par
5436    * The function is implemented using an internal 32-bit accumulator.
5437    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5438    * There is saturation on the addition, hence there is no risk of overflow.
5439    */
5440
5441
5442   static __INLINE void arm_inv_park_q31(
5443   q31_t Id,
5444   q31_t Iq,
5445   q31_t * pIalpha,
5446   q31_t * pIbeta,
5447   q31_t sinVal,
5448   q31_t cosVal)
5449   {
5450     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5451     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5452
5453     /* Intermediate product is calculated by (Id * cosVal) */
5454     product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
5455
5456     /* Intermediate product is calculated by (Iq * sinVal) */
5457     product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
5458
5459
5460     /* Intermediate product is calculated by (Id * sinVal) */
5461     product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
5462
5463     /* Intermediate product is calculated by (Iq * cosVal) */
5464     product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
5465
5466     /* Calculate pIalpha by using the two intermediate products 1 and 2 */
5467     *pIalpha = __QSUB(product1, product2);
5468
5469     /* Calculate pIbeta by using the two intermediate products 3 and 4 */
5470     *pIbeta = __QADD(product4, product3);
5471
5472   }
5473
5474   /**
5475    * @} end of Inverse park group
5476    */
5477
5478
5479   /**
5480    * @brief  Converts the elements of the Q31 vector to floating-point vector.
5481    * @param[in]  *pSrc is input pointer
5482    * @param[out]  *pDst is output pointer
5483    * @param[in]  blockSize is the number of samples to process
5484    * @return none.
5485    */
5486   void arm_q31_to_float(
5487   q31_t * pSrc,
5488   float32_t * pDst,
5489   uint32_t blockSize);
5490
5491   /**
5492    * @ingroup groupInterpolation
5493    */
5494
5495   /**
5496    * @defgroup LinearInterpolate Linear Interpolation
5497    *
5498    * Linear interpolation is a method of curve fitting using linear polynomials.
5499    * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line
5500    *
5501    * \par
5502    * \image html LinearInterp.gif "Linear interpolation"
5503    *
5504    * \par
5505    * A  Linear Interpolate function calculates an output value(y), for the input(x)
5506    * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values)
5507    *
5508    * \par Algorithm:
5509    * <pre>
5510    *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
5511    *       where x0, x1 are nearest values of input x
5512    *             y0, y1 are nearest values to output y
5513    * </pre>
5514    *
5515    * \par
5516    * This set of functions implements Linear interpolation process
5517    * for Q7, Q15, Q31, and floating-point data types.  The functions operate on a single
5518    * sample of data and each call to the function returns a single processed value.
5519    * <code>S</code> points to an instance of the Linear Interpolate function data structure.
5520    * <code>x</code> is the input sample value. The functions returns the output value.
5521    *
5522    * \par
5523    * if x is outside of the table boundary, Linear interpolation returns first value of the table
5524    * if x is below input range and returns last value of table if x is above range.
5525    */
5526
5527   /**
5528    * @addtogroup LinearInterpolate
5529    * @{
5530    */
5531
5532   /**
5533    * @brief  Process function for the floating-point Linear Interpolation Function.
5534    * @param[in,out] *S is an instance of the floating-point Linear Interpolation structure
5535    * @param[in] x input sample to process
5536    * @return y processed output sample.
5537    *
5538    */
5539
5540   static __INLINE float32_t arm_linear_interp_f32(
5541   arm_linear_interp_instance_f32 * S,
5542   float32_t x)
5543   {
5544
5545     float32_t y;
5546     float32_t x0, x1;                            /* Nearest input values */
5547     float32_t y0, y1;                            /* Nearest output values */
5548     float32_t xSpacing = S->xSpacing;            /* spacing between input values */
5549     int32_t i;                                   /* Index variable */
5550     float32_t *pYData = S->pYData;               /* pointer to output table */
5551
5552     /* Calculation of index */
5553     i = (int32_t) ((x - S->x1) / xSpacing);
5554
5555     if(i < 0)
5556     {
5557       /* Iniatilize output for below specified range as least output value of table */
5558       y = pYData[0];
5559     }
5560     else if((uint32_t)i >= S->nValues)
5561     {
5562       /* Iniatilize output for above specified range as last output value of table */
5563       y = pYData[S->nValues - 1];
5564     }
5565     else
5566     {
5567       /* Calculation of nearest input values */
5568       x0 = S->x1 + i * xSpacing;
5569       x1 = S->x1 + (i + 1) * xSpacing;
5570
5571       /* Read of nearest output values */
5572       y0 = pYData[i];
5573       y1 = pYData[i + 1];
5574
5575       /* Calculation of output */
5576       y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0));
5577
5578     }
5579
5580     /* returns output value */
5581     return (y);
5582   }
5583
5584    /**
5585    *
5586    * @brief  Process function for the Q31 Linear Interpolation Function.
5587    * @param[in] *pYData  pointer to Q31 Linear Interpolation table
5588    * @param[in] x input sample to process
5589    * @param[in] nValues number of table values
5590    * @return y processed output sample.
5591    *
5592    * \par
5593    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5594    * This function can support maximum of table size 2^12.
5595    *
5596    */
5597
5598
5599   static __INLINE q31_t arm_linear_interp_q31(
5600   q31_t * pYData,
5601   q31_t x,
5602   uint32_t nValues)
5603   {
5604     q31_t y;                                     /* output */
5605     q31_t y0, y1;                                /* Nearest output values */
5606     q31_t fract;                                 /* fractional part */
5607     int32_t index;                               /* Index to read nearest output values */
5608
5609     /* Input is in 12.20 format */
5610     /* 12 bits for the table index */
5611     /* Index value calculation */
5612     index = ((x & 0xFFF00000) >> 20);
5613
5614     if(index >= (int32_t)(nValues - 1))
5615     {
5616       return (pYData[nValues - 1]);
5617     }
5618     else if(index < 0)
5619     {
5620       return (pYData[0]);
5621     }
5622     else
5623     {
5624
5625       /* 20 bits for the fractional part */
5626       /* shift left by 11 to keep fract in 1.31 format */
5627       fract = (x & 0x000FFFFF) << 11;
5628
5629       /* Read two nearest output values from the index in 1.31(q31) format */
5630       y0 = pYData[index];
5631       y1 = pYData[index + 1u];
5632
5633       /* Calculation of y0 * (1-fract) and y is in 2.30 format */
5634       y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32));
5635
5636       /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
5637       y += ((q31_t) (((q63_t) y1 * fract) >> 32));
5638
5639       /* Convert y to 1.31 format */
5640       return (y << 1u);
5641
5642     }
5643
5644   }
5645
5646   /**
5647    *
5648    * @brief  Process function for the Q15 Linear Interpolation Function.
5649    * @param[in] *pYData  pointer to Q15 Linear Interpolation table
5650    * @param[in] x input sample to process
5651    * @param[in] nValues number of table values
5652    * @return y processed output sample.
5653    *
5654    * \par
5655    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5656    * This function can support maximum of table size 2^12.
5657    *
5658    */
5659
5660
5661   static __INLINE q15_t arm_linear_interp_q15(
5662   q15_t * pYData,
5663   q31_t x,
5664   uint32_t nValues)
5665   {
5666     q63_t y;                                     /* output */
5667     q15_t y0, y1;                                /* Nearest output values */
5668     q31_t fract;                                 /* fractional part */
5669     int32_t index;                               /* Index to read nearest output values */
5670
5671     /* Input is in 12.20 format */
5672     /* 12 bits for the table index */
5673     /* Index value calculation */
5674     index = ((x & 0xFFF00000) >> 20u);
5675
5676     if(index >= (int32_t)(nValues - 1))
5677     {
5678       return (pYData[nValues - 1]);
5679     }
5680     else if(index < 0)
5681     {
5682       return (pYData[0]);
5683     }
5684     else
5685     {
5686       /* 20 bits for the fractional part */
5687       /* fract is in 12.20 format */
5688       fract = (x & 0x000FFFFF);
5689
5690       /* Read two nearest output values from the index */
5691       y0 = pYData[index];
5692       y1 = pYData[index + 1u];
5693
5694       /* Calculation of y0 * (1-fract) and y is in 13.35 format */
5695       y = ((q63_t) y0 * (0xFFFFF - fract));
5696
5697       /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
5698       y += ((q63_t) y1 * (fract));
5699
5700       /* convert y to 1.15 format */
5701       return (y >> 20);
5702     }
5703
5704
5705   }
5706
5707   /**
5708    *
5709    * @brief  Process function for the Q7 Linear Interpolation Function.
5710    * @param[in] *pYData  pointer to Q7 Linear Interpolation table
5711    * @param[in] x input sample to process
5712    * @param[in] nValues number of table values
5713    * @return y processed output sample.
5714    *
5715    * \par
5716    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5717    * This function can support maximum of table size 2^12.
5718    */
5719
5720
5721   static __INLINE q7_t arm_linear_interp_q7(
5722   q7_t * pYData,
5723   q31_t x,
5724   uint32_t nValues)
5725   {
5726     q31_t y;                                     /* output */
5727     q7_t y0, y1;                                 /* Nearest output values */
5728     q31_t fract;                                 /* fractional part */
5729     uint32_t index;                              /* Index to read nearest output values */
5730
5731     /* Input is in 12.20 format */
5732     /* 12 bits for the table index */
5733     /* Index value calculation */
5734     if (x < 0)
5735     {
5736       return (pYData[0]);
5737     }
5738     index = (x >> 20) & 0xfff;
5739
5740
5741     if(index >= (nValues - 1))
5742     {
5743       return (pYData[nValues - 1]);
5744     }
5745     else
5746     {
5747
5748       /* 20 bits for the fractional part */
5749       /* fract is in 12.20 format */
5750       fract = (x & 0x000FFFFF);
5751
5752       /* Read two nearest output values from the index and are in 1.7(q7) format */
5753       y0 = pYData[index];
5754       y1 = pYData[index + 1u];
5755
5756       /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */
5757       y = ((y0 * (0xFFFFF - fract)));
5758
5759       /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
5760       y += (y1 * fract);
5761
5762       /* convert y to 1.7(q7) format */
5763       return (y >> 20u);
5764
5765     }
5766
5767   }
5768   /**
5769    * @} end of LinearInterpolate group
5770    */
5771
5772   /**
5773    * @brief  Fast approximation to the trigonometric sine function for floating-point data.
5774    * @param[in] x input value in radians.
5775    * @return  sin(x).
5776    */
5777
5778   float32_t arm_sin_f32(
5779   float32_t x);
5780
5781   /**
5782    * @brief  Fast approximation to the trigonometric sine function for Q31 data.
5783    * @param[in] x Scaled input value in radians.
5784    * @return  sin(x).
5785    */
5786
5787   q31_t arm_sin_q31(
5788   q31_t x);
5789
5790   /**
5791    * @brief  Fast approximation to the trigonometric sine function for Q15 data.
5792    * @param[in] x Scaled input value in radians.
5793    * @return  sin(x).
5794    */
5795
5796   q15_t arm_sin_q15(
5797   q15_t x);
5798
5799   /**
5800    * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
5801    * @param[in] x input value in radians.
5802    * @return  cos(x).
5803    */
5804
5805   float32_t arm_cos_f32(
5806   float32_t x);
5807
5808   /**
5809    * @brief Fast approximation to the trigonometric cosine function for Q31 data.
5810    * @param[in] x Scaled input value in radians.
5811    * @return  cos(x).
5812    */
5813
5814   q31_t arm_cos_q31(
5815   q31_t x);
5816
5817   /**
5818    * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
5819    * @param[in] x Scaled input value in radians.
5820    * @return  cos(x).
5821    */
5822
5823   q15_t arm_cos_q15(
5824   q15_t x);
5825
5826
5827   /**
5828    * @ingroup groupFastMath
5829    */
5830
5831
5832   /**
5833    * @defgroup SQRT Square Root
5834    *
5835    * Computes the square root of a number.
5836    * There are separate functions for Q15, Q31, and floating-point data types.
5837    * The square root function is computed using the Newton-Raphson algorithm.
5838    * This is an iterative algorithm of the form:
5839    * <pre>
5840    *      x1 = x0 - f(x0)/f'(x0)
5841    * </pre>
5842    * where <code>x1</code> is the current estimate,
5843    * <code>x0</code> is the previous estimate, and
5844    * <code>f'(x0)</code> is the derivative of <code>f()</code> evaluated at <code>x0</code>.
5845    * For the square root function, the algorithm reduces to:
5846    * <pre>
5847    *     x0 = in/2                         [initial guess]
5848    *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
5849    * </pre>
5850    */
5851
5852
5853   /**
5854    * @addtogroup SQRT
5855    * @{
5856    */
5857
5858   /**
5859    * @brief  Floating-point square root function.
5860    * @param[in]  in     input value.
5861    * @param[out] *pOut  square root of input value.
5862    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5863    * <code>in</code> is negative value and returns zero output for negative values.
5864    */
5865
5866   static __INLINE arm_status arm_sqrt_f32(
5867   float32_t in,
5868   float32_t * pOut)
5869   {
5870     if(in > 0)
5871     {
5872
5873 //      #if __FPU_USED
5874 #if (__FPU_USED == 1) && defined ( __CC_ARM   )
5875       *pOut = __sqrtf(in);
5876 #else
5877       *pOut = sqrtf(in);
5878 #endif
5879
5880       return (ARM_MATH_SUCCESS);
5881     }
5882     else
5883     {
5884       *pOut = 0.0f;
5885       return (ARM_MATH_ARGUMENT_ERROR);
5886     }
5887
5888   }
5889
5890
5891   /**
5892    * @brief Q31 square root function.
5893    * @param[in]   in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
5894    * @param[out]  *pOut square root of input value.
5895    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5896    * <code>in</code> is negative value and returns zero output for negative values.
5897    */
5898   arm_status arm_sqrt_q31(
5899   q31_t in,
5900   q31_t * pOut);
5901
5902   /**
5903    * @brief  Q15 square root function.
5904    * @param[in]   in     input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
5905    * @param[out]  *pOut  square root of input value.
5906    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5907    * <code>in</code> is negative value and returns zero output for negative values.
5908    */
5909   arm_status arm_sqrt_q15(
5910   q15_t in,
5911   q15_t * pOut);
5912
5913   /**
5914    * @} end of SQRT group
5915    */
5916
5917
5918
5919
5920
5921
5922   /**
5923    * @brief floating-point Circular write function.
5924    */
5925
5926   static __INLINE void arm_circularWrite_f32(
5927   int32_t * circBuffer,
5928   int32_t L,
5929   uint16_t * writeOffset,
5930   int32_t bufferInc,
5931   const int32_t * src,
5932   int32_t srcInc,
5933   uint32_t blockSize)
5934   {
5935     uint32_t i = 0u;
5936     int32_t wOffset;
5937
5938     /* Copy the value of Index pointer that points
5939      * to the current location where the input samples to be copied */
5940     wOffset = *writeOffset;
5941
5942     /* Loop over the blockSize */
5943     i = blockSize;
5944
5945     while(i > 0u)
5946     {
5947       /* copy the input sample to the circular buffer */
5948       circBuffer[wOffset] = *src;
5949
5950       /* Update the input pointer */
5951       src += srcInc;
5952
5953       /* Circularly update wOffset.  Watch out for positive and negative value */
5954       wOffset += bufferInc;
5955       if(wOffset >= L)
5956         wOffset -= L;
5957
5958       /* Decrement the loop counter */
5959       i--;
5960     }
5961
5962     /* Update the index pointer */
5963     *writeOffset = wOffset;
5964   }
5965
5966
5967
5968   /**
5969    * @brief floating-point Circular Read function.
5970    */
5971   static __INLINE void arm_circularRead_f32(
5972   int32_t * circBuffer,
5973   int32_t L,
5974   int32_t * readOffset,
5975   int32_t bufferInc,
5976   int32_t * dst,
5977   int32_t * dst_base,
5978   int32_t dst_length,
5979   int32_t dstInc,
5980   uint32_t blockSize)
5981   {
5982     uint32_t i = 0u;
5983     int32_t rOffset, dst_end;
5984
5985     /* Copy the value of Index pointer that points
5986      * to the current location from where the input samples to be read */
5987     rOffset = *readOffset;
5988     dst_end = (int32_t) (dst_base + dst_length);
5989
5990     /* Loop over the blockSize */
5991     i = blockSize;
5992
5993     while(i > 0u)
5994     {
5995       /* copy the sample from the circular buffer to the destination buffer */
5996       *dst = circBuffer[rOffset];
5997
5998       /* Update the input pointer */
5999       dst += dstInc;
6000
6001       if(dst == (int32_t *) dst_end)
6002       {
6003         dst = dst_base;
6004       }
6005
6006       /* Circularly update rOffset.  Watch out for positive and negative value  */
6007       rOffset += bufferInc;
6008
6009       if(rOffset >= L)
6010       {
6011         rOffset -= L;
6012       }
6013
6014       /* Decrement the loop counter */
6015       i--;
6016     }
6017
6018     /* Update the index pointer */
6019     *readOffset = rOffset;
6020   }
6021
6022   /**
6023    * @brief Q15 Circular write function.
6024    */
6025
6026   static __INLINE void arm_circularWrite_q15(
6027   q15_t * circBuffer,
6028   int32_t L,
6029   uint16_t * writeOffset,
6030   int32_t bufferInc,
6031   const q15_t * src,
6032   int32_t srcInc,
6033   uint32_t blockSize)
6034   {
6035     uint32_t i = 0u;
6036     int32_t wOffset;
6037
6038     /* Copy the value of Index pointer that points
6039      * to the current location where the input samples to be copied */
6040     wOffset = *writeOffset;
6041
6042     /* Loop over the blockSize */
6043     i = blockSize;
6044
6045     while(i > 0u)
6046     {
6047       /* copy the input sample to the circular buffer */
6048       circBuffer[wOffset] = *src;
6049
6050       /* Update the input pointer */
6051       src += srcInc;
6052
6053       /* Circularly update wOffset.  Watch out for positive and negative value */
6054       wOffset += bufferInc;
6055       if(wOffset >= L)
6056         wOffset -= L;
6057
6058       /* Decrement the loop counter */
6059       i--;
6060     }
6061
6062     /* Update the index pointer */
6063     *writeOffset = wOffset;
6064   }
6065
6066
6067
6068   /**
6069    * @brief Q15 Circular Read function.
6070    */
6071   static __INLINE void arm_circularRead_q15(
6072   q15_t * circBuffer,
6073   int32_t L,
6074   int32_t * readOffset,
6075   int32_t bufferInc,
6076   q15_t * dst,
6077   q15_t * dst_base,
6078   int32_t dst_length,
6079   int32_t dstInc,
6080   uint32_t blockSize)
6081   {
6082     uint32_t i = 0;
6083     int32_t rOffset, dst_end;
6084
6085     /* Copy the value of Index pointer that points
6086      * to the current location from where the input samples to be read */
6087     rOffset = *readOffset;
6088
6089     dst_end = (int32_t) (dst_base + dst_length);
6090
6091     /* Loop over the blockSize */
6092     i = blockSize;
6093
6094     while(i > 0u)
6095     {
6096       /* copy the sample from the circular buffer to the destination buffer */
6097       *dst = circBuffer[rOffset];
6098
6099       /* Update the input pointer */
6100       dst += dstInc;
6101
6102       if(dst == (q15_t *) dst_end)
6103       {
6104         dst = dst_base;
6105       }
6106
6107       /* Circularly update wOffset.  Watch out for positive and negative value */
6108       rOffset += bufferInc;
6109
6110       if(rOffset >= L)
6111       {
6112         rOffset -= L;
6113       }
6114
6115       /* Decrement the loop counter */
6116       i--;
6117     }
6118
6119     /* Update the index pointer */
6120     *readOffset = rOffset;
6121   }
6122
6123
6124   /**
6125    * @brief Q7 Circular write function.
6126    */
6127
6128   static __INLINE void arm_circularWrite_q7(
6129   q7_t * circBuffer,
6130   int32_t L,
6131   uint16_t * writeOffset,
6132   int32_t bufferInc,
6133   const q7_t * src,
6134   int32_t srcInc,
6135   uint32_t blockSize)
6136   {
6137     uint32_t i = 0u;
6138     int32_t wOffset;
6139
6140     /* Copy the value of Index pointer that points
6141      * to the current location where the input samples to be copied */
6142     wOffset = *writeOffset;
6143
6144     /* Loop over the blockSize */
6145     i = blockSize;
6146
6147     while(i > 0u)
6148     {
6149       /* copy the input sample to the circular buffer */
6150       circBuffer[wOffset] = *src;
6151
6152       /* Update the input pointer */
6153       src += srcInc;
6154
6155       /* Circularly update wOffset.  Watch out for positive and negative value */
6156       wOffset += bufferInc;
6157       if(wOffset >= L)
6158         wOffset -= L;
6159
6160       /* Decrement the loop counter */
6161       i--;
6162     }
6163
6164     /* Update the index pointer */
6165     *writeOffset = wOffset;
6166   }
6167
6168
6169
6170   /**
6171    * @brief Q7 Circular Read function.
6172    */
6173   static __INLINE void arm_circularRead_q7(
6174   q7_t * circBuffer,
6175   int32_t L,
6176   int32_t * readOffset,
6177   int32_t bufferInc,
6178   q7_t * dst,
6179   q7_t * dst_base,
6180   int32_t dst_length,
6181   int32_t dstInc,
6182   uint32_t blockSize)
6183   {
6184     uint32_t i = 0;
6185     int32_t rOffset, dst_end;
6186
6187     /* Copy the value of Index pointer that points
6188      * to the current location from where the input samples to be read */
6189     rOffset = *readOffset;
6190
6191     dst_end = (int32_t) (dst_base + dst_length);
6192
6193     /* Loop over the blockSize */
6194     i = blockSize;
6195
6196     while(i > 0u)
6197     {
6198       /* copy the sample from the circular buffer to the destination buffer */
6199       *dst = circBuffer[rOffset];
6200
6201       /* Update the input pointer */
6202       dst += dstInc;
6203
6204       if(dst == (q7_t *) dst_end)
6205       {
6206         dst = dst_base;
6207       }
6208
6209       /* Circularly update rOffset.  Watch out for positive and negative value */
6210       rOffset += bufferInc;
6211
6212       if(rOffset >= L)
6213       {
6214         rOffset -= L;
6215       }
6216
6217       /* Decrement the loop counter */
6218       i--;
6219     }
6220
6221     /* Update the index pointer */
6222     *readOffset = rOffset;
6223   }
6224
6225
6226   /**
6227    * @brief  Sum of the squares of the elements of a Q31 vector.
6228    * @param[in]  *pSrc is input pointer
6229    * @param[in]  blockSize is the number of samples to process
6230    * @param[out]  *pResult is output value.
6231    * @return none.
6232    */
6233
6234   void arm_power_q31(
6235   q31_t * pSrc,
6236   uint32_t blockSize,
6237   q63_t * pResult);
6238
6239   /**
6240    * @brief  Sum of the squares of the elements of a floating-point vector.
6241    * @param[in]  *pSrc is input pointer
6242    * @param[in]  blockSize is the number of samples to process
6243    * @param[out]  *pResult is output value.
6244    * @return none.
6245    */
6246
6247   void arm_power_f32(
6248   float32_t * pSrc,
6249   uint32_t blockSize,
6250   float32_t * pResult);
6251
6252   /**
6253    * @brief  Sum of the squares of the elements of a Q15 vector.
6254    * @param[in]  *pSrc is input pointer
6255    * @param[in]  blockSize is the number of samples to process
6256    * @param[out]  *pResult is output value.
6257    * @return none.
6258    */
6259
6260   void arm_power_q15(
6261   q15_t * pSrc,
6262   uint32_t blockSize,
6263   q63_t * pResult);
6264
6265   /**
6266    * @brief  Sum of the squares of the elements of a Q7 vector.
6267    * @param[in]  *pSrc is input pointer
6268    * @param[in]  blockSize is the number of samples to process
6269    * @param[out]  *pResult is output value.
6270    * @return none.
6271    */
6272
6273   void arm_power_q7(
6274   q7_t * pSrc,
6275   uint32_t blockSize,
6276   q31_t * pResult);
6277
6278   /**
6279    * @brief  Mean value of a Q7 vector.
6280    * @param[in]  *pSrc is input pointer
6281    * @param[in]  blockSize is the number of samples to process
6282    * @param[out]  *pResult is output value.
6283    * @return none.
6284    */
6285
6286   void arm_mean_q7(
6287   q7_t * pSrc,
6288   uint32_t blockSize,
6289   q7_t * pResult);
6290
6291   /**
6292    * @brief  Mean value of a Q15 vector.
6293    * @param[in]  *pSrc is input pointer
6294    * @param[in]  blockSize is the number of samples to process
6295    * @param[out]  *pResult is output value.
6296    * @return none.
6297    */
6298   void arm_mean_q15(
6299   q15_t * pSrc,
6300   uint32_t blockSize,
6301   q15_t * pResult);
6302
6303   /**
6304    * @brief  Mean value of a Q31 vector.
6305    * @param[in]  *pSrc is input pointer
6306    * @param[in]  blockSize is the number of samples to process
6307    * @param[out]  *pResult is output value.
6308    * @return none.
6309    */
6310   void arm_mean_q31(
6311   q31_t * pSrc,
6312   uint32_t blockSize,
6313   q31_t * pResult);
6314
6315   /**
6316    * @brief  Mean value of a floating-point vector.
6317    * @param[in]  *pSrc is input pointer
6318    * @param[in]  blockSize is the number of samples to process
6319    * @param[out]  *pResult is output value.
6320    * @return none.
6321    */
6322   void arm_mean_f32(
6323   float32_t * pSrc,
6324   uint32_t blockSize,
6325   float32_t * pResult);
6326
6327   /**
6328    * @brief  Variance of the elements of a floating-point vector.
6329    * @param[in]  *pSrc is input pointer
6330    * @param[in]  blockSize is the number of samples to process
6331    * @param[out]  *pResult is output value.
6332    * @return none.
6333    */
6334
6335   void arm_var_f32(
6336   float32_t * pSrc,
6337   uint32_t blockSize,
6338   float32_t * pResult);
6339
6340   /**
6341    * @brief  Variance of the elements of a Q31 vector.
6342    * @param[in]  *pSrc is input pointer
6343    * @param[in]  blockSize is the number of samples to process
6344    * @param[out]  *pResult is output value.
6345    * @return none.
6346    */
6347
6348   void arm_var_q31(
6349   q31_t * pSrc,
6350   uint32_t blockSize,
6351   q63_t * pResult);
6352
6353   /**
6354    * @brief  Variance of the elements of a Q15 vector.
6355    * @param[in]  *pSrc is input pointer
6356    * @param[in]  blockSize is the number of samples to process
6357    * @param[out]  *pResult is output value.
6358    * @return none.
6359    */
6360
6361   void arm_var_q15(
6362   q15_t * pSrc,
6363   uint32_t blockSize,
6364   q31_t * pResult);
6365
6366   /**
6367    * @brief  Root Mean Square of the elements of a floating-point vector.
6368    * @param[in]  *pSrc is input pointer
6369    * @param[in]  blockSize is the number of samples to process
6370    * @param[out]  *pResult is output value.
6371    * @return none.
6372    */
6373
6374   void arm_rms_f32(
6375   float32_t * pSrc,
6376   uint32_t blockSize,
6377   float32_t * pResult);
6378
6379   /**
6380    * @brief  Root Mean Square of the elements of a Q31 vector.
6381    * @param[in]  *pSrc is input pointer
6382    * @param[in]  blockSize is the number of samples to process
6383    * @param[out]  *pResult is output value.
6384    * @return none.
6385    */
6386
6387   void arm_rms_q31(
6388   q31_t * pSrc,
6389   uint32_t blockSize,
6390   q31_t * pResult);
6391
6392   /**
6393    * @brief  Root Mean Square of the elements of a Q15 vector.
6394    * @param[in]  *pSrc is input pointer
6395    * @param[in]  blockSize is the number of samples to process
6396    * @param[out]  *pResult is output value.
6397    * @return none.
6398    */
6399
6400   void arm_rms_q15(
6401   q15_t * pSrc,
6402   uint32_t blockSize,
6403   q15_t * pResult);
6404
6405   /**
6406    * @brief  Standard deviation of the elements of a floating-point vector.
6407    * @param[in]  *pSrc is input pointer
6408    * @param[in]  blockSize is the number of samples to process
6409    * @param[out]  *pResult is output value.
6410    * @return none.
6411    */
6412
6413   void arm_std_f32(
6414   float32_t * pSrc,
6415   uint32_t blockSize,
6416   float32_t * pResult);
6417
6418   /**
6419    * @brief  Standard deviation of the elements of a Q31 vector.
6420    * @param[in]  *pSrc is input pointer
6421    * @param[in]  blockSize is the number of samples to process
6422    * @param[out]  *pResult is output value.
6423    * @return none.
6424    */
6425
6426   void arm_std_q31(
6427   q31_t * pSrc,
6428   uint32_t blockSize,
6429   q31_t * pResult);
6430
6431   /**
6432    * @brief  Standard deviation of the elements of a Q15 vector.
6433    * @param[in]  *pSrc is input pointer
6434    * @param[in]  blockSize is the number of samples to process
6435    * @param[out]  *pResult is output value.
6436    * @return none.
6437    */
6438
6439   void arm_std_q15(
6440   q15_t * pSrc,
6441   uint32_t blockSize,
6442   q15_t * pResult);
6443
6444   /**
6445    * @brief  Floating-point complex magnitude
6446    * @param[in]  *pSrc points to the complex input vector
6447    * @param[out]  *pDst points to the real output vector
6448    * @param[in]  numSamples number of complex samples in the input vector
6449    * @return none.
6450    */
6451
6452   void arm_cmplx_mag_f32(
6453   float32_t * pSrc,
6454   float32_t * pDst,
6455   uint32_t numSamples);
6456
6457   /**
6458    * @brief  Q31 complex magnitude
6459    * @param[in]  *pSrc points to the complex input vector
6460    * @param[out]  *pDst points to the real output vector
6461    * @param[in]  numSamples number of complex samples in the input vector
6462    * @return none.
6463    */
6464
6465   void arm_cmplx_mag_q31(
6466   q31_t * pSrc,
6467   q31_t * pDst,
6468   uint32_t numSamples);
6469
6470   /**
6471    * @brief  Q15 complex magnitude
6472    * @param[in]  *pSrc points to the complex input vector
6473    * @param[out]  *pDst points to the real output vector
6474    * @param[in]  numSamples number of complex samples in the input vector
6475    * @return none.
6476    */
6477
6478   void arm_cmplx_mag_q15(
6479   q15_t * pSrc,
6480   q15_t * pDst,
6481   uint32_t numSamples);
6482
6483   /**
6484    * @brief  Q15 complex dot product
6485    * @param[in]  *pSrcA points to the first input vector
6486    * @param[in]  *pSrcB points to the second input vector
6487    * @param[in]  numSamples number of complex samples in each vector
6488    * @param[out]  *realResult real part of the result returned here
6489    * @param[out]  *imagResult imaginary part of the result returned here
6490    * @return none.
6491    */
6492
6493   void arm_cmplx_dot_prod_q15(
6494   q15_t * pSrcA,
6495   q15_t * pSrcB,
6496   uint32_t numSamples,
6497   q31_t * realResult,
6498   q31_t * imagResult);
6499
6500   /**
6501    * @brief  Q31 complex dot product
6502    * @param[in]  *pSrcA points to the first input vector
6503    * @param[in]  *pSrcB points to the second input vector
6504    * @param[in]  numSamples number of complex samples in each vector
6505    * @param[out]  *realResult real part of the result returned here
6506    * @param[out]  *imagResult imaginary part of the result returned here
6507    * @return none.
6508    */
6509
6510   void arm_cmplx_dot_prod_q31(
6511   q31_t * pSrcA,
6512   q31_t * pSrcB,
6513   uint32_t numSamples,
6514   q63_t * realResult,
6515   q63_t * imagResult);
6516
6517   /**
6518    * @brief  Floating-point complex dot product
6519    * @param[in]  *pSrcA points to the first input vector
6520    * @param[in]  *pSrcB points to the second input vector
6521    * @param[in]  numSamples number of complex samples in each vector
6522    * @param[out]  *realResult real part of the result returned here
6523    * @param[out]  *imagResult imaginary part of the result returned here
6524    * @return none.
6525    */
6526
6527   void arm_cmplx_dot_prod_f32(
6528   float32_t * pSrcA,
6529   float32_t * pSrcB,
6530   uint32_t numSamples,
6531   float32_t * realResult,
6532   float32_t * imagResult);
6533
6534   /**
6535    * @brief  Q15 complex-by-real multiplication
6536    * @param[in]  *pSrcCmplx points to the complex input vector
6537    * @param[in]  *pSrcReal points to the real input vector
6538    * @param[out]  *pCmplxDst points to the complex output vector
6539    * @param[in]  numSamples number of samples in each vector
6540    * @return none.
6541    */
6542
6543   void arm_cmplx_mult_real_q15(
6544   q15_t * pSrcCmplx,
6545   q15_t * pSrcReal,
6546   q15_t * pCmplxDst,
6547   uint32_t numSamples);
6548
6549   /**
6550    * @brief  Q31 complex-by-real multiplication
6551    * @param[in]  *pSrcCmplx points to the complex input vector
6552    * @param[in]  *pSrcReal points to the real input vector
6553    * @param[out]  *pCmplxDst points to the complex output vector
6554    * @param[in]  numSamples number of samples in each vector
6555    * @return none.
6556    */
6557
6558   void arm_cmplx_mult_real_q31(
6559   q31_t * pSrcCmplx,
6560   q31_t * pSrcReal,
6561   q31_t * pCmplxDst,
6562   uint32_t numSamples);
6563
6564   /**
6565    * @brief  Floating-point complex-by-real multiplication
6566    * @param[in]  *pSrcCmplx points to the complex input vector
6567    * @param[in]  *pSrcReal points to the real input vector
6568    * @param[out]  *pCmplxDst points to the complex output vector
6569    * @param[in]  numSamples number of samples in each vector
6570    * @return none.
6571    */
6572
6573   void arm_cmplx_mult_real_f32(
6574   float32_t * pSrcCmplx,
6575   float32_t * pSrcReal,
6576   float32_t * pCmplxDst,
6577   uint32_t numSamples);
6578
6579   /**
6580    * @brief  Minimum value of a Q7 vector.
6581    * @param[in]  *pSrc is input pointer
6582    * @param[in]  blockSize is the number of samples to process
6583    * @param[out]  *result is output pointer
6584    * @param[in]  index is the array index of the minimum value in the input buffer.
6585    * @return none.
6586    */
6587
6588   void arm_min_q7(
6589   q7_t * pSrc,
6590   uint32_t blockSize,
6591   q7_t * result,
6592   uint32_t * index);
6593
6594   /**
6595    * @brief  Minimum value of a Q15 vector.
6596    * @param[in]  *pSrc is input pointer
6597    * @param[in]  blockSize is the number of samples to process
6598    * @param[out]  *pResult is output pointer
6599    * @param[in]  *pIndex is the array index of the minimum value in the input buffer.
6600    * @return none.
6601    */
6602
6603   void arm_min_q15(
6604   q15_t * pSrc,
6605   uint32_t blockSize,
6606   q15_t * pResult,
6607   uint32_t * pIndex);
6608
6609   /**
6610    * @brief  Minimum value of a Q31 vector.
6611    * @param[in]  *pSrc is input pointer
6612    * @param[in]  blockSize is the number of samples to process
6613    * @param[out]  *pResult is output pointer
6614    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
6615    * @return none.
6616    */
6617   void arm_min_q31(
6618   q31_t * pSrc,
6619   uint32_t blockSize,
6620   q31_t * pResult,
6621   uint32_t * pIndex);
6622
6623   /**
6624    * @brief  Minimum value of a floating-point vector.
6625    * @param[in]  *pSrc is input pointer
6626    * @param[in]  blockSize is the number of samples to process
6627    * @param[out]  *pResult is output pointer
6628    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
6629    * @return none.
6630    */
6631
6632   void arm_min_f32(
6633   float32_t * pSrc,
6634   uint32_t blockSize,
6635   float32_t * pResult,
6636   uint32_t * pIndex);
6637
6638 /**
6639  * @brief Maximum value of a Q7 vector.
6640  * @param[in]       *pSrc points to the input buffer
6641  * @param[in]       blockSize length of the input vector
6642  * @param[out]      *pResult maximum value returned here
6643  * @param[out]      *pIndex index of maximum value returned here
6644  * @return none.
6645  */
6646
6647   void arm_max_q7(
6648   q7_t * pSrc,
6649   uint32_t blockSize,
6650   q7_t * pResult,
6651   uint32_t * pIndex);
6652
6653 /**
6654  * @brief Maximum value of a Q15 vector.
6655  * @param[in]       *pSrc points to the input buffer
6656  * @param[in]       blockSize length of the input vector
6657  * @param[out]      *pResult maximum value returned here
6658  * @param[out]      *pIndex index of maximum value returned here
6659  * @return none.
6660  */
6661
6662   void arm_max_q15(
6663   q15_t * pSrc,
6664   uint32_t blockSize,
6665   q15_t * pResult,
6666   uint32_t * pIndex);
6667
6668 /**
6669  * @brief Maximum value of a Q31 vector.
6670  * @param[in]       *pSrc points to the input buffer
6671  * @param[in]       blockSize length of the input vector
6672  * @param[out]      *pResult maximum value returned here
6673  * @param[out]      *pIndex index of maximum value returned here
6674  * @return none.
6675  */
6676
6677   void arm_max_q31(
6678   q31_t * pSrc,
6679   uint32_t blockSize,
6680   q31_t * pResult,
6681   uint32_t * pIndex);
6682
6683 /**
6684  * @brief Maximum value of a floating-point vector.
6685  * @param[in]       *pSrc points to the input buffer
6686  * @param[in]       blockSize length of the input vector
6687  * @param[out]      *pResult maximum value returned here
6688  * @param[out]      *pIndex index of maximum value returned here
6689  * @return none.
6690  */
6691
6692   void arm_max_f32(
6693   float32_t * pSrc,
6694   uint32_t blockSize,
6695   float32_t * pResult,
6696   uint32_t * pIndex);
6697
6698   /**
6699    * @brief  Q15 complex-by-complex multiplication
6700    * @param[in]  *pSrcA points to the first input vector
6701    * @param[in]  *pSrcB points to the second input vector
6702    * @param[out]  *pDst  points to the output vector
6703    * @param[in]  numSamples number of complex samples in each vector
6704    * @return none.
6705    */
6706
6707   void arm_cmplx_mult_cmplx_q15(
6708   q15_t * pSrcA,
6709   q15_t * pSrcB,
6710   q15_t * pDst,
6711   uint32_t numSamples);
6712
6713   /**
6714    * @brief  Q31 complex-by-complex multiplication
6715    * @param[in]  *pSrcA points to the first input vector
6716    * @param[in]  *pSrcB points to the second input vector
6717    * @param[out]  *pDst  points to the output vector
6718    * @param[in]  numSamples number of complex samples in each vector
6719    * @return none.
6720    */
6721
6722   void arm_cmplx_mult_cmplx_q31(
6723   q31_t * pSrcA,
6724   q31_t * pSrcB,
6725   q31_t * pDst,
6726   uint32_t numSamples);
6727
6728   /**
6729    * @brief  Floating-point complex-by-complex multiplication
6730    * @param[in]  *pSrcA points to the first input vector
6731    * @param[in]  *pSrcB points to the second input vector
6732    * @param[out]  *pDst  points to the output vector
6733    * @param[in]  numSamples number of complex samples in each vector
6734    * @return none.
6735    */
6736
6737   void arm_cmplx_mult_cmplx_f32(
6738   float32_t * pSrcA,
6739   float32_t * pSrcB,
6740   float32_t * pDst,
6741   uint32_t numSamples);
6742
6743   /**
6744    * @brief Converts the elements of the floating-point vector to Q31 vector.
6745    * @param[in]       *pSrc points to the floating-point input vector
6746    * @param[out]      *pDst points to the Q31 output vector
6747    * @param[in]       blockSize length of the input vector
6748    * @return none.
6749    */
6750   void arm_float_to_q31(
6751   float32_t * pSrc,
6752   q31_t * pDst,
6753   uint32_t blockSize);
6754
6755   /**
6756    * @brief Converts the elements of the floating-point vector to Q15 vector.
6757    * @param[in]       *pSrc points to the floating-point input vector
6758    * @param[out]      *pDst points to the Q15 output vector
6759    * @param[in]       blockSize length of the input vector
6760    * @return          none
6761    */
6762   void arm_float_to_q15(
6763   float32_t * pSrc,
6764   q15_t * pDst,
6765   uint32_t blockSize);
6766
6767   /**
6768    * @brief Converts the elements of the floating-point vector to Q7 vector.
6769    * @param[in]       *pSrc points to the floating-point input vector
6770    * @param[out]      *pDst points to the Q7 output vector
6771    * @param[in]       blockSize length of the input vector
6772    * @return          none
6773    */
6774   void arm_float_to_q7(
6775   float32_t * pSrc,
6776   q7_t * pDst,
6777   uint32_t blockSize);
6778
6779
6780   /**
6781    * @brief  Converts the elements of the Q31 vector to Q15 vector.
6782    * @param[in]  *pSrc is input pointer
6783    * @param[out]  *pDst is output pointer
6784    * @param[in]  blockSize is the number of samples to process
6785    * @return none.
6786    */
6787   void arm_q31_to_q15(
6788   q31_t * pSrc,
6789   q15_t * pDst,
6790   uint32_t blockSize);
6791
6792   /**
6793    * @brief  Converts the elements of the Q31 vector to Q7 vector.
6794    * @param[in]  *pSrc is input pointer
6795    * @param[out]  *pDst is output pointer
6796    * @param[in]  blockSize is the number of samples to process
6797    * @return none.
6798    */
6799   void arm_q31_to_q7(
6800   q31_t * pSrc,
6801   q7_t * pDst,
6802   uint32_t blockSize);
6803
6804   /**
6805    * @brief  Converts the elements of the Q15 vector to floating-point vector.
6806    * @param[in]  *pSrc is input pointer
6807    * @param[out]  *pDst is output pointer
6808    * @param[in]  blockSize is the number of samples to process
6809    * @return none.
6810    */
6811   void arm_q15_to_float(
6812   q15_t * pSrc,
6813   float32_t * pDst,
6814   uint32_t blockSize);
6815
6816
6817   /**
6818    * @brief  Converts the elements of the Q15 vector to Q31 vector.
6819    * @param[in]  *pSrc is input pointer
6820    * @param[out]  *pDst is output pointer
6821    * @param[in]  blockSize is the number of samples to process
6822    * @return none.
6823    */
6824   void arm_q15_to_q31(
6825   q15_t * pSrc,
6826   q31_t * pDst,
6827   uint32_t blockSize);
6828
6829
6830   /**
6831    * @brief  Converts the elements of the Q15 vector to Q7 vector.
6832    * @param[in]  *pSrc is input pointer
6833    * @param[out]  *pDst is output pointer
6834    * @param[in]  blockSize is the number of samples to process
6835    * @return none.
6836    */
6837   void arm_q15_to_q7(
6838   q15_t * pSrc,
6839   q7_t * pDst,
6840   uint32_t blockSize);
6841
6842
6843   /**
6844    * @ingroup groupInterpolation
6845    */
6846
6847   /**
6848    * @defgroup BilinearInterpolate Bilinear Interpolation
6849    *
6850    * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
6851    * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
6852    * determines values between the grid points.
6853    * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension.
6854    * Bilinear interpolation is often used in image processing to rescale images.
6855    * The CMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
6856    *
6857    * <b>Algorithm</b>
6858    * \par
6859    * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
6860    * For floating-point, the instance structure is defined as:
6861    * <pre>
6862    *   typedef struct
6863    *   {
6864    *     uint16_t numRows;
6865    *     uint16_t numCols;
6866    *     float32_t *pData;
6867    * } arm_bilinear_interp_instance_f32;
6868    * </pre>
6869    *
6870    * \par
6871    * where <code>numRows</code> specifies the number of rows in the table;
6872    * <code>numCols</code> specifies the number of columns in the table;
6873    * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
6874    * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
6875    * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
6876    *
6877    * \par
6878    * Let <code>(x, y)</code> specify the desired interpolation point.  Then define:
6879    * <pre>
6880    *     XF = floor(x)
6881    *     YF = floor(y)
6882    * </pre>
6883    * \par
6884    * The interpolated output point is computed as:
6885    * <pre>
6886    *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
6887    *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
6888    *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
6889    *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
6890    * </pre>
6891    * Note that the coordinates (x, y) contain integer and fractional components.
6892    * The integer components specify which portion of the table to use while the
6893    * fractional components control the interpolation processor.
6894    *
6895    * \par
6896    * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
6897    */
6898
6899   /**
6900    * @addtogroup BilinearInterpolate
6901    * @{
6902    */
6903
6904   /**
6905   *
6906   * @brief  Floating-point bilinear interpolation.
6907   * @param[in,out] *S points to an instance of the interpolation structure.
6908   * @param[in] X interpolation coordinate.
6909   * @param[in] Y interpolation coordinate.
6910   * @return out interpolated value.
6911   */
6912
6913
6914   static __INLINE float32_t arm_bilinear_interp_f32(
6915   const arm_bilinear_interp_instance_f32 * S,
6916   float32_t X,
6917   float32_t Y)
6918   {
6919     float32_t out;
6920     float32_t f00, f01, f10, f11;
6921     float32_t *pData = S->pData;
6922     int32_t xIndex, yIndex, index;
6923     float32_t xdiff, ydiff;
6924     float32_t b1, b2, b3, b4;
6925
6926     xIndex = (int32_t) X;
6927     yIndex = (int32_t) Y;
6928
6929     /* Care taken for table outside boundary */
6930     /* Returns zero output when values are outside table boundary */
6931     if(xIndex < 0 || xIndex > (S->numRows - 1) || yIndex < 0
6932        || yIndex > (S->numCols - 1))
6933     {
6934       return (0);
6935     }
6936
6937     /* Calculation of index for two nearest points in X-direction */
6938     index = (xIndex - 1) + (yIndex - 1) * S->numCols;
6939
6940
6941     /* Read two nearest points in X-direction */
6942     f00 = pData[index];
6943     f01 = pData[index + 1];
6944
6945     /* Calculation of index for two nearest points in Y-direction */
6946     index = (xIndex - 1) + (yIndex) * S->numCols;
6947
6948
6949     /* Read two nearest points in Y-direction */
6950     f10 = pData[index];
6951     f11 = pData[index + 1];
6952
6953     /* Calculation of intermediate values */
6954     b1 = f00;
6955     b2 = f01 - f00;
6956     b3 = f10 - f00;
6957     b4 = f00 - f01 - f10 + f11;
6958
6959     /* Calculation of fractional part in X */
6960     xdiff = X - xIndex;
6961
6962     /* Calculation of fractional part in Y */
6963     ydiff = Y - yIndex;
6964
6965     /* Calculation of bi-linear interpolated output */
6966     out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
6967
6968     /* return to application */
6969     return (out);
6970
6971   }
6972
6973   /**
6974   *
6975   * @brief  Q31 bilinear interpolation.
6976   * @param[in,out] *S points to an instance of the interpolation structure.
6977   * @param[in] X interpolation coordinate in 12.20 format.
6978   * @param[in] Y interpolation coordinate in 12.20 format.
6979   * @return out interpolated value.
6980   */
6981
6982   static __INLINE q31_t arm_bilinear_interp_q31(
6983   arm_bilinear_interp_instance_q31 * S,
6984   q31_t X,
6985   q31_t Y)
6986   {
6987     q31_t out;                                   /* Temporary output */
6988     q31_t acc = 0;                               /* output */
6989     q31_t xfract, yfract;                        /* X, Y fractional parts */
6990     q31_t x1, x2, y1, y2;                        /* Nearest output values */
6991     int32_t rI, cI;                              /* Row and column indices */
6992     q31_t *pYData = S->pData;                    /* pointer to output table values */
6993     uint32_t nCols = S->numCols;                 /* num of rows */
6994
6995
6996     /* Input is in 12.20 format */
6997     /* 12 bits for the table index */
6998     /* Index value calculation */
6999     rI = ((X & 0xFFF00000) >> 20u);
7000
7001     /* Input is in 12.20 format */
7002     /* 12 bits for the table index */
7003     /* Index value calculation */
7004     cI = ((Y & 0xFFF00000) >> 20u);
7005
7006     /* Care taken for table outside boundary */
7007     /* Returns zero output when values are outside table boundary */
7008     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7009     {
7010       return (0);
7011     }
7012
7013     /* 20 bits for the fractional part */
7014     /* shift left xfract by 11 to keep 1.31 format */
7015     xfract = (X & 0x000FFFFF) << 11u;
7016
7017     /* Read two nearest output values from the index */
7018     x1 = pYData[(rI) + nCols * (cI)];
7019     x2 = pYData[(rI) + nCols * (cI) + 1u];
7020
7021     /* 20 bits for the fractional part */
7022     /* shift left yfract by 11 to keep 1.31 format */
7023     yfract = (Y & 0x000FFFFF) << 11u;
7024
7025     /* Read two nearest output values from the index */
7026     y1 = pYData[(rI) + nCols * (cI + 1)];
7027     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7028
7029     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */
7030     out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32));
7031     acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32));
7032
7033     /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
7034     out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32));
7035     acc += ((q31_t) ((q63_t) out * (xfract) >> 32));
7036
7037     /* y1 * (1 - xfract) * (yfract)  in 3.29(q29) and adding to acc */
7038     out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32));
7039     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
7040
7041     /* y2 * (xfract) * (yfract)  in 3.29(q29) and adding to acc */
7042     out = ((q31_t) ((q63_t) y2 * (xfract) >> 32));
7043     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
7044
7045     /* Convert acc to 1.31(q31) format */
7046     return (acc << 2u);
7047
7048   }
7049
7050   /**
7051   * @brief  Q15 bilinear interpolation.
7052   * @param[in,out] *S points to an instance of the interpolation structure.
7053   * @param[in] X interpolation coordinate in 12.20 format.
7054   * @param[in] Y interpolation coordinate in 12.20 format.
7055   * @return out interpolated value.
7056   */
7057
7058   static __INLINE q15_t arm_bilinear_interp_q15(
7059   arm_bilinear_interp_instance_q15 * S,
7060   q31_t X,
7061   q31_t Y)
7062   {
7063     q63_t acc = 0;                               /* output */
7064     q31_t out;                                   /* Temporary output */
7065     q15_t x1, x2, y1, y2;                        /* Nearest output values */
7066     q31_t xfract, yfract;                        /* X, Y fractional parts */
7067     int32_t rI, cI;                              /* Row and column indices */
7068     q15_t *pYData = S->pData;                    /* pointer to output table values */
7069     uint32_t nCols = S->numCols;                 /* num of rows */
7070
7071     /* Input is in 12.20 format */
7072     /* 12 bits for the table index */
7073     /* Index value calculation */
7074     rI = ((X & 0xFFF00000) >> 20);
7075
7076     /* Input is in 12.20 format */
7077     /* 12 bits for the table index */
7078     /* Index value calculation */
7079     cI = ((Y & 0xFFF00000) >> 20);
7080
7081     /* Care taken for table outside boundary */
7082     /* Returns zero output when values are outside table boundary */
7083     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7084     {
7085       return (0);
7086     }
7087
7088     /* 20 bits for the fractional part */
7089     /* xfract should be in 12.20 format */
7090     xfract = (X & 0x000FFFFF);
7091
7092     /* Read two nearest output values from the index */
7093     x1 = pYData[(rI) + nCols * (cI)];
7094     x2 = pYData[(rI) + nCols * (cI) + 1u];
7095
7096
7097     /* 20 bits for the fractional part */
7098     /* yfract should be in 12.20 format */
7099     yfract = (Y & 0x000FFFFF);
7100
7101     /* Read two nearest output values from the index */
7102     y1 = pYData[(rI) + nCols * (cI + 1)];
7103     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7104
7105     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */
7106
7107     /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
7108     /* convert 13.35 to 13.31 by right shifting  and out is in 1.31 */
7109     out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
7110     acc = ((q63_t) out * (0xFFFFF - yfract));
7111
7112     /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
7113     out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
7114     acc += ((q63_t) out * (xfract));
7115
7116     /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
7117     out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
7118     acc += ((q63_t) out * (yfract));
7119
7120     /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
7121     out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
7122     acc += ((q63_t) out * (yfract));
7123
7124     /* acc is in 13.51 format and down shift acc by 36 times */
7125     /* Convert out to 1.15 format */
7126     return (acc >> 36);
7127
7128   }
7129
7130   /**
7131   * @brief  Q7 bilinear interpolation.
7132   * @param[in,out] *S points to an instance of the interpolation structure.
7133   * @param[in] X interpolation coordinate in 12.20 format.
7134   * @param[in] Y interpolation coordinate in 12.20 format.
7135   * @return out interpolated value.
7136   */
7137
7138   static __INLINE q7_t arm_bilinear_interp_q7(
7139   arm_bilinear_interp_instance_q7 * S,
7140   q31_t X,
7141   q31_t Y)
7142   {
7143     q63_t acc = 0;                               /* output */
7144     q31_t out;                                   /* Temporary output */
7145     q31_t xfract, yfract;                        /* X, Y fractional parts */
7146     q7_t x1, x2, y1, y2;                         /* Nearest output values */
7147     int32_t rI, cI;                              /* Row and column indices */
7148     q7_t *pYData = S->pData;                     /* pointer to output table values */
7149     uint32_t nCols = S->numCols;                 /* num of rows */
7150
7151     /* Input is in 12.20 format */
7152     /* 12 bits for the table index */
7153     /* Index value calculation */
7154     rI = ((X & 0xFFF00000) >> 20);
7155
7156     /* Input is in 12.20 format */
7157     /* 12 bits for the table index */
7158     /* Index value calculation */
7159     cI = ((Y & 0xFFF00000) >> 20);
7160
7161     /* Care taken for table outside boundary */
7162     /* Returns zero output when values are outside table boundary */
7163     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7164     {
7165       return (0);
7166     }
7167
7168     /* 20 bits for the fractional part */
7169     /* xfract should be in 12.20 format */
7170     xfract = (X & 0x000FFFFF);
7171
7172     /* Read two nearest output values from the index */
7173     x1 = pYData[(rI) + nCols * (cI)];
7174     x2 = pYData[(rI) + nCols * (cI) + 1u];
7175
7176
7177     /* 20 bits for the fractional part */
7178     /* yfract should be in 12.20 format */
7179     yfract = (Y & 0x000FFFFF);
7180
7181     /* Read two nearest output values from the index */
7182     y1 = pYData[(rI) + nCols * (cI + 1)];
7183     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7184
7185     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */
7186     out = ((x1 * (0xFFFFF - xfract)));
7187     acc = (((q63_t) out * (0xFFFFF - yfract)));
7188
7189     /* x2 * (xfract) * (1-yfract)  in 2.22 and adding to acc */
7190     out = ((x2 * (0xFFFFF - yfract)));
7191     acc += (((q63_t) out * (xfract)));
7192
7193     /* y1 * (1 - xfract) * (yfract)  in 2.22 and adding to acc */
7194     out = ((y1 * (0xFFFFF - xfract)));
7195     acc += (((q63_t) out * (yfract)));
7196
7197     /* y2 * (xfract) * (yfract)  in 2.22 and adding to acc */
7198     out = ((y2 * (yfract)));
7199     acc += (((q63_t) out * (xfract)));
7200
7201     /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
7202     return (acc >> 40);
7203
7204   }
7205
7206   /**
7207    * @} end of BilinearInterpolate group
7208    */
7209
7210
7211 #if   defined ( __CC_ARM ) //Keil
7212 //SMMLAR
7213   #define multAcc_32x32_keep32_R(a, x, y) \
7214   a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
7215
7216 //SMMLSR
7217   #define multSub_32x32_keep32_R(a, x, y) \
7218   a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
7219
7220 //SMMULR
7221   #define mult_32x32_keep32_R(a, x, y) \
7222   a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
7223
7224 //Enter low optimization region - place directly above function definition
7225   #define LOW_OPTIMIZATION_ENTER \
7226      _Pragma ("push")         \
7227      _Pragma ("O1")
7228
7229 //Exit low optimization region - place directly after end of function definition
7230   #define LOW_OPTIMIZATION_EXIT \
7231      _Pragma ("pop")
7232
7233 //Enter low optimization region - place directly above function definition
7234   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7235
7236 //Exit low optimization region - place directly after end of function definition
7237   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7238
7239 #elif defined(__ICCARM__) //IAR
7240  //SMMLA
7241   #define multAcc_32x32_keep32_R(a, x, y) \
7242   a += (q31_t) (((q63_t) x * y) >> 32)
7243
7244  //SMMLS
7245   #define multSub_32x32_keep32_R(a, x, y) \
7246   a -= (q31_t) (((q63_t) x * y) >> 32)
7247
7248 //SMMUL
7249   #define mult_32x32_keep32_R(a, x, y) \
7250   a = (q31_t) (((q63_t) x * y ) >> 32)
7251
7252 //Enter low optimization region - place directly above function definition
7253   #define LOW_OPTIMIZATION_ENTER \
7254      _Pragma ("optimize=low")
7255
7256 //Exit low optimization region - place directly after end of function definition
7257   #define LOW_OPTIMIZATION_EXIT
7258
7259 //Enter low optimization region - place directly above function definition
7260   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \
7261      _Pragma ("optimize=low")
7262
7263 //Exit low optimization region - place directly after end of function definition
7264   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7265
7266 #elif defined(__GNUC__)
7267  //SMMLA
7268   #define multAcc_32x32_keep32_R(a, x, y) \
7269   a += (q31_t) (((q63_t) x * y) >> 32)
7270
7271  //SMMLS
7272   #define multSub_32x32_keep32_R(a, x, y) \
7273   a -= (q31_t) (((q63_t) x * y) >> 32)
7274
7275 //SMMUL
7276   #define mult_32x32_keep32_R(a, x, y) \
7277   a = (q31_t) (((q63_t) x * y ) >> 32)
7278
7279   #define LOW_OPTIMIZATION_ENTER __attribute__(( optimize("-O1") ))
7280
7281   #define LOW_OPTIMIZATION_EXIT
7282
7283   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7284
7285   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7286
7287 #endif
7288
7289
7290
7291
7292
7293 #ifdef  __cplusplus
7294 }
7295 #endif
7296
7297
7298 #endif /* _ARM_MATH_H */
7299
7300
7301 /**
7302  *
7303  * End of file.
7304  */