tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_fir_lattice_f32.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_fir_lattice_f32.c
   9 *
  10 * Description:  Processing function for the floating-point FIR Lattice filter.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39 * -------------------------------------------------------------------- */
  40
  41 #include "arm_math.h"
  42
  43 /**
  44  * @ingroup groupFilters
  45  */
  46
  47 /**
  48  * @defgroup FIR_Lattice Finite Impulse Response (FIR) Lattice Filters
  49  *
  50  * This set of functions implements Finite Impulse Response (FIR) lattice filters
  51  * for Q15, Q31 and floating-point data types.  Lattice filters are used in a
  52  * variety of adaptive filter applications.  The filter structure is feedforward and
  53  * the net impulse response is finite length.
  54  * The functions operate on blocks
  55  * of input and output data and each call to the function processes
  56  * <code>blockSize</code> samples through the filter.  <code>pSrc</code> and
  57  * <code>pDst</code> point to input and output arrays containing <code>blockSize</code> values.
  58  *
  59  * \par Algorithm:
  60  * \image html FIRLattice.gif "Finite Impulse Response Lattice filter"
  61  * The following difference equation is implemented:
  62  * <pre>
  63  *    f0[n] = g0[n] = x[n]
  64  *    fm[n] = fm-1[n] + km * gm-1[n-1] for m = 1, 2, ...M
  65  *    gm[n] = km * fm-1[n] + gm-1[n-1] for m = 1, 2, ...M
  66  *    y[n] = fM[n]
  67  * </pre>
  68  * \par
  69  * <code>pCoeffs</code> points to tha array of reflection coefficients of size <code>numStages</code>.
  70  * Reflection Coefficients are stored in the following order.
  71  * \par
  72  * <pre>
  73  *    {k1, k2, ..., kM}
  74  * </pre>
  75  * where M is number of stages
  76  * \par
  77  * <code>pState</code> points to a state array of size <code>numStages</code>.
  78  * The state variables (g values) hold previous inputs and are stored in the following order.
  79  * <pre>
  80  *    {g0[n], g1[n], g2[n] ...gM-1[n]}
  81  * </pre>
  82  * The state variables are updated after each block of data is processed; the coefficients are untouched.
  83  * \par Instance Structure
  84  * The coefficients and state variables for a filter are stored together in an instance data structure.
  85  * A separate instance structure must be defined for each filter.
  86  * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
  87  * There are separate instance structure declarations for each of the 3 supported data types.
  88  *
  89  * \par Initialization Functions
  90  * There is also an associated initialization function for each data type.
  91  * The initialization function performs the following operations:
  92  * - Sets the values of the internal structure fields.
  93  * - Zeros out the values in the state buffer.
  94  * To do this manually without calling the init function, assign the follow subfields of the instance structure:
  95  * numStages, pCoeffs, pState. Also set all of the values in pState to zero.
  96  *
  97  * \par
  98  * Use of the initialization function is optional.
  99  * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
 100  * To place an instance structure into a const data section, the instance structure must be manually initialized.
 101  * Set the values in the state buffer to zeros and then manually initialize the instance structure as follows:
 102  * <pre>
 103  *arm_fir_lattice_instance_f32 S = {numStages, pState, pCoeffs};
 104  *arm_fir_lattice_instance_q31 S = {numStages, pState, pCoeffs};
 105  *arm_fir_lattice_instance_q15 S = {numStages, pState, pCoeffs};
 106  * </pre>
 107  * \par
 108  * where <code>numStages</code> is the number of stages in the filter; <code>pState</code> is the address of the state buffer;
 109  * <code>pCoeffs</code> is the address of the coefficient buffer.
 110  * \par Fixed-Point Behavior
 111  * Care must be taken when using the fixed-point versions of the FIR Lattice filter functions.
 112  * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
 113  * Refer to the function specific documentation below for usage guidelines.
 114  */
 115
 116 /**
 117  * @addtogroup FIR_Lattice
 118  * @{
 119  */
 120
 121
 122   /**
 123    * @brief Processing function for the floating-point FIR lattice filter.
 124    * @param[in]  *S        points to an instance of the floating-point FIR lattice structure.
 125    * @param[in]  *pSrc     points to the block of input data.
 126    * @param[out] *pDst     points to the block of output data
 127    * @param[in]  blockSize number of samples to process.
 128    * @return none.
 129    */
 130
 131 void arm_fir_lattice_f32(
 132   const arm_fir_lattice_instance_f32 * S,
 133   float32_t * pSrc,
 134   float32_t * pDst,
 135   uint32_t blockSize)
 136 {
 137   float32_t *pState;                             /* State pointer */
 138   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
 139   float32_t *px;                                 /* temporary state pointer */
 140   float32_t *pk;                                 /* temporary coefficient pointer */
 141
 142
 143 #ifndef ARM_MATH_CM0_FAMILY
 144
 145   /* Run the below code for Cortex-M4 and Cortex-M3 */
 146
 147   float32_t fcurr1, fnext1, gcurr1, gnext1;      /* temporary variables for first sample in loop unrolling */
 148   float32_t fcurr2, fnext2, gnext2;              /* temporary variables for second sample in loop unrolling */
 149   float32_t fcurr3, fnext3, gnext3;              /* temporary variables for third sample in loop unrolling */
 150   float32_t fcurr4, fnext4, gnext4;              /* temporary variables for fourth sample in loop unrolling */
 151   uint32_t numStages = S->numStages;             /* Number of stages in the filter */
 152   uint32_t blkCnt, stageCnt;                     /* temporary variables for counts */
 153
 154   gcurr1 = 0.0f;
 155   pState = &S->pState[0];
 156
 157   blkCnt = blockSize >> 2;
 158
 159   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
 160      a second loop below computes the remaining 1 to 3 samples. */
 161   while(blkCnt > 0u)
 162   {
 163
 164     /* Read two samples from input buffer */
 165     /* f0(n) = x(n) */
 166     fcurr1 = *pSrc++;
 167     fcurr2 = *pSrc++;
 168
 169     /* Initialize coeff pointer */
 170     pk = (pCoeffs);
 171
 172     /* Initialize state pointer */
 173     px = pState;
 174
 175     /* Read g0(n-1) from state */
 176     gcurr1 = *px;
 177
 178     /* Process first sample for first tap */
 179     /* f1(n) = f0(n) +  K1 * g0(n-1) */
 180     fnext1 = fcurr1 + ((*pk) * gcurr1);
 181     /* g1(n) = f0(n) * K1  +  g0(n-1) */
 182     gnext1 = (fcurr1 * (*pk)) + gcurr1;
 183
 184     /* Process second sample for first tap */
 185     /* for sample 2 processing */
 186     fnext2 = fcurr2 + ((*pk) * fcurr1);
 187     gnext2 = (fcurr2 * (*pk)) + fcurr1;
 188
 189     /* Read next two samples from input buffer */
 190     /* f0(n+2) = x(n+2) */
 191     fcurr3 = *pSrc++;
 192     fcurr4 = *pSrc++;
 193
 194     /* Copy only last input samples into the state buffer
 195        which will be used for next four samples processing */
 196     *px++ = fcurr4;
 197
 198     /* Process third sample for first tap */
 199     fnext3 = fcurr3 + ((*pk) * fcurr2);
 200     gnext3 = (fcurr3 * (*pk)) + fcurr2;
 201
 202     /* Process fourth sample for first tap */
 203     fnext4 = fcurr4 + ((*pk) * fcurr3);
 204     gnext4 = (fcurr4 * (*pk++)) + fcurr3;
 205
 206     /* Update of f values for next coefficient set processing */
 207     fcurr1 = fnext1;
 208     fcurr2 = fnext2;
 209     fcurr3 = fnext3;
 210     fcurr4 = fnext4;
 211
 212     /* Loop unrolling.  Process 4 taps at a time . */
 213     stageCnt = (numStages - 1u) >> 2u;
 214
 215     /* Loop over the number of taps.  Unroll by a factor of 4.
 216      ** Repeat until we've computed numStages-3 coefficients. */
 217
 218     /* Process 2nd, 3rd, 4th and 5th taps ... here */
 219     while(stageCnt > 0u)
 220     {
 221       /* Read g1(n-1), g3(n-1) .... from state */
 222       gcurr1 = *px;
 223
 224       /* save g1(n) in state buffer */
 225       *px++ = gnext4;
 226
 227       /* Process first sample for 2nd, 6th .. tap */
 228       /* Sample processing for K2, K6.... */
 229       /* f2(n) = f1(n) +  K2 * g1(n-1) */
 230       fnext1 = fcurr1 + ((*pk) * gcurr1);
 231       /* Process second sample for 2nd, 6th .. tap */
 232       /* for sample 2 processing */
 233       fnext2 = fcurr2 + ((*pk) * gnext1);
 234       /* Process third sample for 2nd, 6th .. tap */
 235       fnext3 = fcurr3 + ((*pk) * gnext2);
 236       /* Process fourth sample for 2nd, 6th .. tap */
 237       fnext4 = fcurr4 + ((*pk) * gnext3);
 238
 239       /* g2(n) = f1(n) * K2  +  g1(n-1) */
 240       /* Calculation of state values for next stage */
 241       gnext4 = (fcurr4 * (*pk)) + gnext3;
 242       gnext3 = (fcurr3 * (*pk)) + gnext2;
 243       gnext2 = (fcurr2 * (*pk)) + gnext1;
 244       gnext1 = (fcurr1 * (*pk++)) + gcurr1;
 245
 246
 247       /* Read g2(n-1), g4(n-1) .... from state */
 248       gcurr1 = *px;
 249
 250       /* save g2(n) in state buffer */
 251       *px++ = gnext4;
 252
 253       /* Sample processing for K3, K7.... */
 254       /* Process first sample for 3rd, 7th .. tap */
 255       /* f3(n) = f2(n) +  K3 * g2(n-1) */
 256       fcurr1 = fnext1 + ((*pk) * gcurr1);
 257       /* Process second sample for 3rd, 7th .. tap */
 258       fcurr2 = fnext2 + ((*pk) * gnext1);
 259       /* Process third sample for 3rd, 7th .. tap */
 260       fcurr3 = fnext3 + ((*pk) * gnext2);
 261       /* Process fourth sample for 3rd, 7th .. tap */
 262       fcurr4 = fnext4 + ((*pk) * gnext3);
 263
 264       /* Calculation of state values for next stage */
 265       /* g3(n) = f2(n) * K3  +  g2(n-1) */
 266       gnext4 = (fnext4 * (*pk)) + gnext3;
 267       gnext3 = (fnext3 * (*pk)) + gnext2;
 268       gnext2 = (fnext2 * (*pk)) + gnext1;
 269       gnext1 = (fnext1 * (*pk++)) + gcurr1;
 270
 271
 272       /* Read g1(n-1), g3(n-1) .... from state */
 273       gcurr1 = *px;
 274
 275       /* save g3(n) in state buffer */
 276       *px++ = gnext4;
 277
 278       /* Sample processing for K4, K8.... */
 279       /* Process first sample for 4th, 8th .. tap */
 280       /* f4(n) = f3(n) +  K4 * g3(n-1) */
 281       fnext1 = fcurr1 + ((*pk) * gcurr1);
 282       /* Process second sample for 4th, 8th .. tap */
 283       /* for sample 2 processing */
 284       fnext2 = fcurr2 + ((*pk) * gnext1);
 285       /* Process third sample for 4th, 8th .. tap */
 286       fnext3 = fcurr3 + ((*pk) * gnext2);
 287       /* Process fourth sample for 4th, 8th .. tap */
 288       fnext4 = fcurr4 + ((*pk) * gnext3);
 289
 290       /* g4(n) = f3(n) * K4  +  g3(n-1) */
 291       /* Calculation of state values for next stage */
 292       gnext4 = (fcurr4 * (*pk)) + gnext3;
 293       gnext3 = (fcurr3 * (*pk)) + gnext2;
 294       gnext2 = (fcurr2 * (*pk)) + gnext1;
 295       gnext1 = (fcurr1 * (*pk++)) + gcurr1;
 296
 297       /* Read g2(n-1), g4(n-1) .... from state */
 298       gcurr1 = *px;
 299
 300       /* save g4(n) in state buffer */
 301       *px++ = gnext4;
 302
 303       /* Sample processing for K5, K9.... */
 304       /* Process first sample for 5th, 9th .. tap */
 305       /* f5(n) = f4(n) +  K5 * g4(n-1) */
 306       fcurr1 = fnext1 + ((*pk) * gcurr1);
 307       /* Process second sample for 5th, 9th .. tap */
 308       fcurr2 = fnext2 + ((*pk) * gnext1);
 309       /* Process third sample for 5th, 9th .. tap */
 310       fcurr3 = fnext3 + ((*pk) * gnext2);
 311       /* Process fourth sample for 5th, 9th .. tap */
 312       fcurr4 = fnext4 + ((*pk) * gnext3);
 313
 314       /* Calculation of state values for next stage */
 315       /* g5(n) = f4(n) * K5  +  g4(n-1) */
 316       gnext4 = (fnext4 * (*pk)) + gnext3;
 317       gnext3 = (fnext3 * (*pk)) + gnext2;
 318       gnext2 = (fnext2 * (*pk)) + gnext1;
 319       gnext1 = (fnext1 * (*pk++)) + gcurr1;
 320
 321       stageCnt--;
 322     }
 323
 324     /* If the (filter length -1) is not a multiple of 4, compute the remaining filter taps */
 325     stageCnt = (numStages - 1u) % 0x4u;
 326
 327     while(stageCnt > 0u)
 328     {
 329       gcurr1 = *px;
 330
 331       /* save g value in state buffer */
 332       *px++ = gnext4;
 333
 334       /* Process four samples for last three taps here */
 335       fnext1 = fcurr1 + ((*pk) * gcurr1);
 336       fnext2 = fcurr2 + ((*pk) * gnext1);
 337       fnext3 = fcurr3 + ((*pk) * gnext2);
 338       fnext4 = fcurr4 + ((*pk) * gnext3);
 339
 340       /* g1(n) = f0(n) * K1  +  g0(n-1) */
 341       gnext4 = (fcurr4 * (*pk)) + gnext3;
 342       gnext3 = (fcurr3 * (*pk)) + gnext2;
 343       gnext2 = (fcurr2 * (*pk)) + gnext1;
 344       gnext1 = (fcurr1 * (*pk++)) + gcurr1;
 345
 346       /* Update of f values for next coefficient set processing */
 347       fcurr1 = fnext1;
 348       fcurr2 = fnext2;
 349       fcurr3 = fnext3;
 350       fcurr4 = fnext4;
 351
 352       stageCnt--;
 353
 354     }
 355
 356     /* The results in the 4 accumulators, store in the destination buffer. */
 357     /* y(n) = fN(n) */
 358     *pDst++ = fcurr1;
 359     *pDst++ = fcurr2;
 360     *pDst++ = fcurr3;
 361     *pDst++ = fcurr4;
 362
 363     blkCnt--;
 364   }
 365
 366   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
 367    ** No loop unrolling is used. */
 368   blkCnt = blockSize % 0x4u;
 369
 370   while(blkCnt > 0u)
 371   {
 372     /* f0(n) = x(n) */
 373     fcurr1 = *pSrc++;
 374
 375     /* Initialize coeff pointer */
 376     pk = (pCoeffs);
 377
 378     /* Initialize state pointer */
 379     px = pState;
 380
 381     /* read g2(n) from state buffer */
 382     gcurr1 = *px;
 383
 384     /* for sample 1 processing */
 385     /* f1(n) = f0(n) +  K1 * g0(n-1) */
 386     fnext1 = fcurr1 + ((*pk) * gcurr1);
 387     /* g1(n) = f0(n) * K1  +  g0(n-1) */
 388     gnext1 = (fcurr1 * (*pk++)) + gcurr1;
 389
 390     /* save g1(n) in state buffer */
 391     *px++ = fcurr1;
 392
 393     /* f1(n) is saved in fcurr1
 394        for next stage processing */
 395     fcurr1 = fnext1;
 396
 397     stageCnt = (numStages - 1u);
 398
 399     /* stage loop */
 400     while(stageCnt > 0u)
 401     {
 402       /* read g2(n) from state buffer */
 403       gcurr1 = *px;
 404
 405       /* save g1(n) in state buffer */
 406       *px++ = gnext1;
 407
 408       /* Sample processing for K2, K3.... */
 409       /* f2(n) = f1(n) +  K2 * g1(n-1) */
 410       fnext1 = fcurr1 + ((*pk) * gcurr1);
 411       /* g2(n) = f1(n) * K2  +  g1(n-1) */
 412       gnext1 = (fcurr1 * (*pk++)) + gcurr1;
 413
 414       /* f1(n) is saved in fcurr1
 415          for next stage processing */
 416       fcurr1 = fnext1;
 417
 418       stageCnt--;
 419
 420     }
 421
 422     /* y(n) = fN(n) */
 423     *pDst++ = fcurr1;
 424
 425     blkCnt--;
 426
 427   }
 428
 429 #else
 430
 431   /* Run the below code for Cortex-M0 */
 432
 433   float32_t fcurr, fnext, gcurr, gnext;          /* temporary variables */
 434   uint32_t numStages = S->numStages;             /* Length of the filter */
 435   uint32_t blkCnt, stageCnt;                     /* temporary variables for counts */
 436
 437   pState = &S->pState[0];
 438
 439   blkCnt = blockSize;
 440
 441   while(blkCnt > 0u)
 442   {
 443     /* f0(n) = x(n) */
 444     fcurr = *pSrc++;
 445
 446     /* Initialize coeff pointer */
 447     pk = pCoeffs;
 448
 449     /* Initialize state pointer */
 450     px = pState;
 451
 452     /* read g0(n-1) from state buffer */
 453     gcurr = *px;
 454
 455     /* for sample 1 processing */
 456     /* f1(n) = f0(n) +  K1 * g0(n-1) */
 457     fnext = fcurr + ((*pk) * gcurr);
 458     /* g1(n) = f0(n) * K1  +  g0(n-1) */
 459     gnext = (fcurr * (*pk++)) + gcurr;
 460
 461     /* save f0(n) in state buffer */
 462     *px++ = fcurr;
 463
 464     /* f1(n) is saved in fcurr
 465        for next stage processing */
 466     fcurr = fnext;
 467
 468     stageCnt = (numStages - 1u);
 469
 470     /* stage loop */
 471     while(stageCnt > 0u)
 472     {
 473       /* read g2(n) from state buffer */
 474       gcurr = *px;
 475
 476       /* save g1(n) in state buffer */
 477       *px++ = gnext;
 478
 479       /* Sample processing for K2, K3.... */
 480       /* f2(n) = f1(n) +  K2 * g1(n-1) */
 481       fnext = fcurr + ((*pk) * gcurr);
 482       /* g2(n) = f1(n) * K2  +  g1(n-1) */
 483       gnext = (fcurr * (*pk++)) + gcurr;
 484
 485       /* f1(n) is saved in fcurr1
 486          for next stage processing */
 487       fcurr = fnext;
 488
 489       stageCnt--;
 490
 491     }
 492
 493     /* y(n) = fN(n) */
 494     *pDst++ = fcurr;
 495
 496     blkCnt--;
 497
 498   }
 499
 500 #endif /*   #ifndef ARM_MATH_CM0_FAMILY */
 501
 502 }
 503
 504 /**
 505  * @} end of FIR_Lattice group
 506  */