tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_fir_interpolate_f32.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_fir_interpolate_f32.c
   9 *
  10 * Description:  FIR interpolation for floating-point sequences.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39 * -------------------------------------------------------------------- */
  40
  41 #include "arm_math.h"
  42
  43 /**
  44  * @defgroup FIR_Interpolate Finite Impulse Response (FIR) Interpolator
  45  *
  46  * These functions combine an upsampler (zero stuffer) and an FIR filter.
  47  * They are used in multirate systems for increasing the sample rate of a signal without introducing high frequency images.
  48  * Conceptually, the functions are equivalent to the block diagram below:
  49  * \image html FIRInterpolator.gif "Components included in the FIR Interpolator functions"
  50  * After upsampling by a factor of <code>L</code>, the signal should be filtered by a lowpass filter with a normalized
  51  * cutoff frequency of <code>1/L</code> in order to eliminate high frequency copies of the spectrum.
  52  * The user of the function is responsible for providing the filter coefficients.
  53  *
  54  * The FIR interpolator functions provided in the CMSIS DSP Library combine the upsampler and FIR filter in an efficient manner.
  55  * The upsampler inserts <code>L-1</code> zeros between each sample.
  56  * Instead of multiplying by these zero values, the FIR filter is designed to skip them.
  57  * This leads to an efficient implementation without any wasted effort.
  58  * The functions operate on blocks of input and output data.
  59  * <code>pSrc</code> points to an array of <code>blockSize</code> input values and
  60  * <code>pDst</code> points to an array of <code>blockSize*L</code> output values.
  61  *
  62  * The library provides separate functions for Q15, Q31, and floating-point data types.
  63  *
  64  * \par Algorithm:
  65  * The functions use a polyphase filter structure:
  66  * <pre>
  67  *    y[n] = b[0] * x[n] + b[L]   * x[n-1] + ... + b[L*(phaseLength-1)] * x[n-phaseLength+1]
  68  *    y[n+1] = b[1] * x[n] + b[L+1] * x[n-1] + ... + b[L*(phaseLength-1)+1] * x[n-phaseLength+1]
  69  *    ...
  70  *    y[n+(L-1)] = b[L-1] * x[n] + b[2*L-1] * x[n-1] + ....+ b[L*(phaseLength-1)+(L-1)] * x[n-phaseLength+1]
  71  * </pre>
  72  * This approach is more efficient than straightforward upsample-then-filter algorithms.
  73  * With this method the computation is reduced by a factor of <code>1/L</code> when compared to using a standard FIR filter.
  74  * \par
  75  * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.
  76  * <code>numTaps</code> must be a multiple of the interpolation factor <code>L</code> and this is checked by the
  77  * initialization functions.
  78  * Internally, the function divides the FIR filter's impulse response into shorter filters of length
  79  * <code>phaseLength=numTaps/L</code>.
  80  * Coefficients are stored in time reversed order.
  81  * \par
  82  * <pre>
  83  *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
  84  * </pre>
  85  * \par
  86  * <code>pState</code> points to a state array of size <code>blockSize + phaseLength - 1</code>.
  87  * Samples in the state buffer are stored in the order:
  88  * \par
  89  * <pre>
  90  *    {x[n-phaseLength+1], x[n-phaseLength], x[n-phaseLength-1], x[n-phaseLength-2]....x[0], x[1], ..., x[blockSize-1]}
  91  * </pre>
  92  * The state variables are updated after each block of data is processed, the coefficients are untouched.
  93  *
  94  * \par Instance Structure
  95  * The coefficients and state variables for a filter are stored together in an instance data structure.
  96  * A separate instance structure must be defined for each filter.
  97  * Coefficient arrays may be shared among several instances while state variable array should be allocated separately.
  98  * There are separate instance structure declarations for each of the 3 supported data types.
  99  *
 100  * \par Initialization Functions
 101  * There is also an associated initialization function for each data type.
 102  * The initialization function performs the following operations:
 103  * - Sets the values of the internal structure fields.
 104  * - Zeros out the values in the state buffer.
 105  * - Checks to make sure that the length of the filter is a multiple of the interpolation factor.
 106  * To do this manually without calling the init function, assign the follow subfields of the instance structure:
 107  * L (interpolation factor), pCoeffs, phaseLength (numTaps / L), pState. Also set all of the values in pState to zero.
 108  *
 109  * \par
 110  * Use of the initialization function is optional.
 111  * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
 112  * To place an instance structure into a const data section, the instance structure must be manually initialized.
 113  * The code below statically initializes each of the 3 different data type filter instance structures
 114  * <pre>
 115  * arm_fir_interpolate_instance_f32 S = {L, phaseLength, pCoeffs, pState};
 116  * arm_fir_interpolate_instance_q31 S = {L, phaseLength, pCoeffs, pState};
 117  * arm_fir_interpolate_instance_q15 S = {L, phaseLength, pCoeffs, pState};
 118  * </pre>
 119  * where <code>L</code> is the interpolation factor; <code>phaseLength=numTaps/L</code> is the
 120  * length of each of the shorter FIR filters used internally,
 121  * <code>pCoeffs</code> is the address of the coefficient buffer;
 122  * <code>pState</code> is the address of the state buffer.
 123  * Be sure to set the values in the state buffer to zeros when doing static initialization.
 124  *
 125  * \par Fixed-Point Behavior
 126  * Care must be taken when using the fixed-point versions of the FIR interpolate filter functions.
 127  * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
 128  * Refer to the function specific documentation below for usage guidelines.
 129  */
 130
 131 /**
 132  * @addtogroup FIR_Interpolate
 133  * @{
 134  */
 135
 136 /**
 137  * @brief Processing function for the floating-point FIR interpolator.
 138  * @param[in] *S        points to an instance of the floating-point FIR interpolator structure.
 139  * @param[in] *pSrc     points to the block of input data.
 140  * @param[out] *pDst    points to the block of output data.
 141  * @param[in] blockSize number of input samples to process per call.
 142  * @return none.
 143  */
 144 #ifndef ARM_MATH_CM0_FAMILY
 145
 146   /* Run the below code for Cortex-M4 and Cortex-M3 */
 147
 148 void arm_fir_interpolate_f32(
 149   const arm_fir_interpolate_instance_f32 * S,
 150   float32_t * pSrc,
 151   float32_t * pDst,
 152   uint32_t blockSize)
 153 {
 154   float32_t *pState = S->pState;                 /* State pointer */
 155   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
 156   float32_t *pStateCurnt;                        /* Points to the current sample of the state */
 157   float32_t *ptr1, *ptr2;                        /* Temporary pointers for state and coefficient buffers */
 158   float32_t sum0;                                /* Accumulators */
 159   float32_t x0, c0;                              /* Temporary variables to hold state and coefficient values */
 160   uint32_t i, blkCnt, j;                         /* Loop counters */
 161   uint16_t phaseLen = S->phaseLength, tapCnt;    /* Length of each polyphase filter component */
 162   float32_t acc0, acc1, acc2, acc3;
 163   float32_t x1, x2, x3;
 164   uint32_t blkCntN4;
 165   float32_t c1, c2, c3;
 166
 167   /* S->pState buffer contains previous frame (phaseLen - 1) samples */
 168   /* pStateCurnt points to the location where the new input data should be written */
 169   pStateCurnt = S->pState + (phaseLen - 1u);
 170
 171   /* Initialise  blkCnt */
 172   blkCnt = blockSize / 4;
 173   blkCntN4 = blockSize - (4 * blkCnt);
 174
 175   /* Samples loop unrolled by 4 */
 176   while(blkCnt > 0u)
 177   {
 178     /* Copy new input sample into the state buffer */
 179     *pStateCurnt++ = *pSrc++;
 180     *pStateCurnt++ = *pSrc++;
 181     *pStateCurnt++ = *pSrc++;
 182     *pStateCurnt++ = *pSrc++;
 183
 184     /* Address modifier index of coefficient buffer */
 185     j = 1u;
 186
 187     /* Loop over the Interpolation factor. */
 188     i = (S->L);
 189
 190     while(i > 0u)
 191     {
 192       /* Set accumulator to zero */
 193       acc0 = 0.0f;
 194       acc1 = 0.0f;
 195       acc2 = 0.0f;
 196       acc3 = 0.0f;
 197
 198       /* Initialize state pointer */
 199       ptr1 = pState;
 200
 201       /* Initialize coefficient pointer */
 202       ptr2 = pCoeffs + (S->L - j);
 203
 204       /* Loop over the polyPhase length. Unroll by a factor of 4.
 205        ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
 206       tapCnt = phaseLen >> 2u;
 207
 208       x0 = *(ptr1++);
 209       x1 = *(ptr1++);
 210       x2 = *(ptr1++);
 211
 212       while(tapCnt > 0u)
 213       {
 214
 215         /* Read the input sample */
 216         x3 = *(ptr1++);
 217
 218         /* Read the coefficient */
 219         c0 = *(ptr2);
 220
 221         /* Perform the multiply-accumulate */
 222         acc0 += x0 * c0;
 223         acc1 += x1 * c0;
 224         acc2 += x2 * c0;
 225         acc3 += x3 * c0;
 226
 227         /* Read the coefficient */
 228         c1 = *(ptr2 + S->L);
 229
 230         /* Read the input sample */
 231         x0 = *(ptr1++);
 232
 233         /* Perform the multiply-accumulate */
 234         acc0 += x1 * c1;
 235         acc1 += x2 * c1;
 236         acc2 += x3 * c1;
 237         acc3 += x0 * c1;
 238
 239         /* Read the coefficient */
 240         c2 = *(ptr2 + S->L * 2);
 241
 242         /* Read the input sample */
 243         x1 = *(ptr1++);
 244
 245         /* Perform the multiply-accumulate */
 246         acc0 += x2 * c2;
 247         acc1 += x3 * c2;
 248         acc2 += x0 * c2;
 249         acc3 += x1 * c2;
 250
 251         /* Read the coefficient */
 252         c3 = *(ptr2 + S->L * 3);
 253
 254         /* Read the input sample */
 255         x2 = *(ptr1++);
 256
 257         /* Perform the multiply-accumulate */
 258         acc0 += x3 * c3;
 259         acc1 += x0 * c3;
 260         acc2 += x1 * c3;
 261         acc3 += x2 * c3;
 262
 263
 264         /* Upsampling is done by stuffing L-1 zeros between each sample.
 265          * So instead of multiplying zeros with coefficients,
 266          * Increment the coefficient pointer by interpolation factor times. */
 267         ptr2 += 4 * S->L;
 268
 269         /* Decrement the loop counter */
 270         tapCnt--;
 271       }
 272
 273       /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
 274       tapCnt = phaseLen % 0x4u;
 275
 276       while(tapCnt > 0u)
 277       {
 278
 279         /* Read the input sample */
 280         x3 = *(ptr1++);
 281
 282         /* Read the coefficient */
 283         c0 = *(ptr2);
 284
 285         /* Perform the multiply-accumulate */
 286         acc0 += x0 * c0;
 287         acc1 += x1 * c0;
 288         acc2 += x2 * c0;
 289         acc3 += x3 * c0;
 290
 291         /* Increment the coefficient pointer by interpolation factor times. */
 292         ptr2 += S->L;
 293
 294         /* update states for next sample processing */
 295         x0 = x1;
 296         x1 = x2;
 297         x2 = x3;
 298
 299         /* Decrement the loop counter */
 300         tapCnt--;
 301       }
 302
 303       /* The result is in the accumulator, store in the destination buffer. */
 304       *pDst = acc0;
 305       *(pDst + S->L) = acc1;
 306       *(pDst + 2 * S->L) = acc2;
 307       *(pDst + 3 * S->L) = acc3;
 308
 309       pDst++;
 310
 311       /* Increment the address modifier index of coefficient buffer */
 312       j++;
 313
 314       /* Decrement the loop counter */
 315       i--;
 316     }
 317
 318     /* Advance the state pointer by 1
 319      * to process the next group of interpolation factor number samples */
 320     pState = pState + 4;
 321
 322     pDst += S->L * 3;
 323
 324     /* Decrement the loop counter */
 325     blkCnt--;
 326   }
 327
 328   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
 329    ** No loop unrolling is used. */
 330
 331   while(blkCntN4 > 0u)
 332   {
 333     /* Copy new input sample into the state buffer */
 334     *pStateCurnt++ = *pSrc++;
 335
 336     /* Address modifier index of coefficient buffer */
 337     j = 1u;
 338
 339     /* Loop over the Interpolation factor. */
 340     i = S->L;
 341     while(i > 0u)
 342     {
 343       /* Set accumulator to zero */
 344       sum0 = 0.0f;
 345
 346       /* Initialize state pointer */
 347       ptr1 = pState;
 348
 349       /* Initialize coefficient pointer */
 350       ptr2 = pCoeffs + (S->L - j);
 351
 352       /* Loop over the polyPhase length. Unroll by a factor of 4.
 353        ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
 354       tapCnt = phaseLen >> 2u;
 355       while(tapCnt > 0u)
 356       {
 357
 358         /* Read the coefficient */
 359         c0 = *(ptr2);
 360
 361         /* Upsampling is done by stuffing L-1 zeros between each sample.
 362          * So instead of multiplying zeros with coefficients,
 363          * Increment the coefficient pointer by interpolation factor times. */
 364         ptr2 += S->L;
 365
 366         /* Read the input sample */
 367         x0 = *(ptr1++);
 368
 369         /* Perform the multiply-accumulate */
 370         sum0 += x0 * c0;
 371
 372         /* Read the coefficient */
 373         c0 = *(ptr2);
 374
 375         /* Increment the coefficient pointer by interpolation factor times. */
 376         ptr2 += S->L;
 377
 378         /* Read the input sample */
 379         x0 = *(ptr1++);
 380
 381         /* Perform the multiply-accumulate */
 382         sum0 += x0 * c0;
 383
 384         /* Read the coefficient */
 385         c0 = *(ptr2);
 386
 387         /* Increment the coefficient pointer by interpolation factor times. */
 388         ptr2 += S->L;
 389
 390         /* Read the input sample */
 391         x0 = *(ptr1++);
 392
 393         /* Perform the multiply-accumulate */
 394         sum0 += x0 * c0;
 395
 396         /* Read the coefficient */
 397         c0 = *(ptr2);
 398
 399         /* Increment the coefficient pointer by interpolation factor times. */
 400         ptr2 += S->L;
 401
 402         /* Read the input sample */
 403         x0 = *(ptr1++);
 404
 405         /* Perform the multiply-accumulate */
 406         sum0 += x0 * c0;
 407
 408         /* Decrement the loop counter */
 409         tapCnt--;
 410       }
 411
 412       /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
 413       tapCnt = phaseLen % 0x4u;
 414
 415       while(tapCnt > 0u)
 416       {
 417         /* Perform the multiply-accumulate */
 418         sum0 += *(ptr1++) * (*ptr2);
 419
 420         /* Increment the coefficient pointer by interpolation factor times. */
 421         ptr2 += S->L;
 422
 423         /* Decrement the loop counter */
 424         tapCnt--;
 425       }
 426
 427       /* The result is in the accumulator, store in the destination buffer. */
 428       *pDst++ = sum0;
 429
 430       /* Increment the address modifier index of coefficient buffer */
 431       j++;
 432
 433       /* Decrement the loop counter */
 434       i--;
 435     }
 436
 437     /* Advance the state pointer by 1
 438      * to process the next group of interpolation factor number samples */
 439     pState = pState + 1;
 440
 441     /* Decrement the loop counter */
 442     blkCntN4--;
 443   }
 444
 445   /* Processing is complete.
 446    ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
 447    ** This prepares the state buffer for the next function call. */
 448
 449   /* Points to the start of the state buffer */
 450   pStateCurnt = S->pState;
 451
 452   tapCnt = (phaseLen - 1u) >> 2u;
 453
 454   /* copy data */
 455   while(tapCnt > 0u)
 456   {
 457     *pStateCurnt++ = *pState++;
 458     *pStateCurnt++ = *pState++;
 459     *pStateCurnt++ = *pState++;
 460     *pStateCurnt++ = *pState++;
 461
 462     /* Decrement the loop counter */
 463     tapCnt--;
 464   }
 465
 466   tapCnt = (phaseLen - 1u) % 0x04u;
 467
 468   /* copy data */
 469   while(tapCnt > 0u)
 470   {
 471     *pStateCurnt++ = *pState++;
 472
 473     /* Decrement the loop counter */
 474     tapCnt--;
 475   }
 476 }
 477
 478 #else
 479
 480   /* Run the below code for Cortex-M0 */
 481
 482 void arm_fir_interpolate_f32(
 483   const arm_fir_interpolate_instance_f32 * S,
 484   float32_t * pSrc,
 485   float32_t * pDst,
 486   uint32_t blockSize)
 487 {
 488   float32_t *pState = S->pState;                 /* State pointer */
 489   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
 490   float32_t *pStateCurnt;                        /* Points to the current sample of the state */
 491   float32_t *ptr1, *ptr2;                        /* Temporary pointers for state and coefficient buffers */
 492
 493
 494   float32_t sum;                                 /* Accumulator */
 495   uint32_t i, blkCnt;                            /* Loop counters */
 496   uint16_t phaseLen = S->phaseLength, tapCnt;    /* Length of each polyphase filter component */
 497
 498
 499   /* S->pState buffer contains previous frame (phaseLen - 1) samples */
 500   /* pStateCurnt points to the location where the new input data should be written */
 501   pStateCurnt = S->pState + (phaseLen - 1u);
 502
 503   /* Total number of intput samples */
 504   blkCnt = blockSize;
 505
 506   /* Loop over the blockSize. */
 507   while(blkCnt > 0u)
 508   {
 509     /* Copy new input sample into the state buffer */
 510     *pStateCurnt++ = *pSrc++;
 511
 512     /* Loop over the Interpolation factor. */
 513     i = S->L;
 514
 515     while(i > 0u)
 516     {
 517       /* Set accumulator to zero */
 518       sum = 0.0f;
 519
 520       /* Initialize state pointer */
 521       ptr1 = pState;
 522
 523       /* Initialize coefficient pointer */
 524       ptr2 = pCoeffs + (i - 1u);
 525
 526       /* Loop over the polyPhase length */
 527       tapCnt = phaseLen;
 528
 529       while(tapCnt > 0u)
 530       {
 531         /* Perform the multiply-accumulate */
 532         sum += *ptr1++ * *ptr2;
 533
 534         /* Increment the coefficient pointer by interpolation factor times. */
 535         ptr2 += S->L;
 536
 537         /* Decrement the loop counter */
 538         tapCnt--;
 539       }
 540
 541       /* The result is in the accumulator, store in the destination buffer. */
 542       *pDst++ = sum;
 543
 544       /* Decrement the loop counter */
 545       i--;
 546     }
 547
 548     /* Advance the state pointer by 1
 549      * to process the next group of interpolation factor number samples */
 550     pState = pState + 1;
 551
 552     /* Decrement the loop counter */
 553     blkCnt--;
 554   }
 555
 556   /* Processing is complete.
 557    ** Now copy the last phaseLen - 1 samples to the start of the state buffer.
 558    ** This prepares the state buffer for the next function call. */
 559
 560   /* Points to the start of the state buffer */
 561   pStateCurnt = S->pState;
 562
 563   tapCnt = phaseLen - 1u;
 564
 565   while(tapCnt > 0u)
 566   {
 567     *pStateCurnt++ = *pState++;
 568
 569     /* Decrement the loop counter */
 570     tapCnt--;
 571   }
 572
 573 }
 574
 575 #endif /*   #ifndef ARM_MATH_CM0_FAMILY */
 576
 577
 578
 579  /**
 580   * @} end of FIR_Interpolate group
 581   */