tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_biquad_cascade_df1_q31.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_biquad_cascade_df1_q31.c
   9 *
  10 * Description:  Processing function for the
  11 *                               Q31 Biquad cascade filter
  12 *
  13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  14 *
  15 * Redistribution and use in source and binary forms, with or without
  16 * modification, are permitted provided that the following conditions
  17 * are met:
  18 *   - Redistributions of source code must retain the above copyright
  19 *     notice, this list of conditions and the following disclaimer.
  20 *   - Redistributions in binary form must reproduce the above copyright
  21 *     notice, this list of conditions and the following disclaimer in
  22 *     the documentation and/or other materials provided with the
  23 *     distribution.
  24 *   - Neither the name of ARM LIMITED nor the names of its contributors
  25 *     may be used to endorse or promote products derived from this
  26 *     software without specific prior written permission.
  27 *
  28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  39 * POSSIBILITY OF SUCH DAMAGE.
  40 * -------------------------------------------------------------------- */
  41
  42 #include "arm_math.h"
  43
  44 /**
  45  * @ingroup groupFilters
  46  */
  47
  48 /**
  49  * @addtogroup BiquadCascadeDF1
  50  * @{
  51  */
  52
  53 /**
  54  * @brief Processing function for the Q31 Biquad cascade filter.
  55  * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
  56  * @param[in]  *pSrc      points to the block of input data.
  57  * @param[out] *pDst      points to the block of output data.
  58  * @param[in]  blockSize  number of samples to process per call.
  59  * @return none.
  60  *
  61  * <b>Scaling and Overflow Behavior:</b>
  62  * \par
  63  * The function is implemented using an internal 64-bit accumulator.
  64  * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
  65  * Thus, if the accumulator result overflows it wraps around rather than clip.
  66  * In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).
  67  * After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to
  68  * 1.31 format by discarding the low 32 bits.
  69  *
  70  * \par
  71  * Refer to the function <code>arm_biquad_cascade_df1_fast_q31()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.
  72  */
  73
  74 void arm_biquad_cascade_df1_q31(
  75   const arm_biquad_casd_df1_inst_q31 * S,
  76   q31_t * pSrc,
  77   q31_t * pDst,
  78   uint32_t blockSize)
  79 {
  80   q63_t acc;                                     /*  accumulator                   */
  81   uint32_t uShift = ((uint32_t) S->postShift + 1u);
  82   uint32_t lShift = 32u - uShift;                /*  Shift to be applied to the output */
  83   q31_t *pIn = pSrc;                             /*  input pointer initialization  */
  84   q31_t *pOut = pDst;                            /*  output pointer initialization */
  85   q31_t *pState = S->pState;                     /*  pState pointer initialization */
  86   q31_t *pCoeffs = S->pCoeffs;                   /*  coeff pointer initialization  */
  87   q31_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */
  88   q31_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */
  89   q31_t Xn;                                      /*  temporary input               */
  90   uint32_t sample, stage = S->numStages;         /*  loop counters                     */
  91
  92
  93 #ifndef ARM_MATH_CM0_FAMILY
  94
  95   q31_t acc_l, acc_h;                            /*  temporary output variables    */
  96
  97   /* Run the below code for Cortex-M4 and Cortex-M3 */
  98
  99   do
 100   {
 101     /* Reading the coefficients */
 102     b0 = *pCoeffs++;
 103     b1 = *pCoeffs++;
 104     b2 = *pCoeffs++;
 105     a1 = *pCoeffs++;
 106     a2 = *pCoeffs++;
 107
 108     /* Reading the state values */
 109     Xn1 = pState[0];
 110     Xn2 = pState[1];
 111     Yn1 = pState[2];
 112     Yn2 = pState[3];
 113
 114     /* Apply loop unrolling and compute 4 output values simultaneously. */
 115     /*      The variable acc hold output values that are being computed:
 116      *
 117      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
 118      */
 119
 120     sample = blockSize >> 2u;
 121
 122     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
 123      ** a second loop below computes the remaining 1 to 3 samples. */
 124     while(sample > 0u)
 125     {
 126       /* Read the input */
 127       Xn = *pIn++;
 128
 129       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 130
 131       /* acc =  b0 * x[n] */
 132       acc = (q63_t) b0 *Xn;
 133       /* acc +=  b1 * x[n-1] */
 134       acc += (q63_t) b1 *Xn1;
 135       /* acc +=  b[2] * x[n-2] */
 136       acc += (q63_t) b2 *Xn2;
 137       /* acc +=  a1 * y[n-1] */
 138       acc += (q63_t) a1 *Yn1;
 139       /* acc +=  a2 * y[n-2] */
 140       acc += (q63_t) a2 *Yn2;
 141
 142       /* The result is converted to 1.31 , Yn2 variable is reused */
 143
 144       /* Calc lower part of acc */
 145       acc_l = acc & 0xffffffff;
 146
 147       /* Calc upper part of acc */
 148       acc_h = (acc >> 32) & 0xffffffff;
 149
 150       /* Apply shift for lower part of acc and upper part of acc */
 151       Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
 152
 153       /* Store the output in the destination buffer. */
 154       *pOut++ = Yn2;
 155
 156       /* Read the second input */
 157       Xn2 = *pIn++;
 158
 159       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 160
 161       /* acc =  b0 * x[n] */
 162       acc = (q63_t) b0 *Xn2;
 163       /* acc +=  b1 * x[n-1] */
 164       acc += (q63_t) b1 *Xn;
 165       /* acc +=  b[2] * x[n-2] */
 166       acc += (q63_t) b2 *Xn1;
 167       /* acc +=  a1 * y[n-1] */
 168       acc += (q63_t) a1 *Yn2;
 169       /* acc +=  a2 * y[n-2] */
 170       acc += (q63_t) a2 *Yn1;
 171
 172
 173       /* The result is converted to 1.31, Yn1 variable is reused  */
 174
 175       /* Calc lower part of acc */
 176       acc_l = acc & 0xffffffff;
 177
 178       /* Calc upper part of acc */
 179       acc_h = (acc >> 32) & 0xffffffff;
 180
 181
 182       /* Apply shift for lower part of acc and upper part of acc */
 183       Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
 184
 185       /* Store the output in the destination buffer. */
 186       *pOut++ = Yn1;
 187
 188       /* Read the third input  */
 189       Xn1 = *pIn++;
 190
 191       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 192
 193       /* acc =  b0 * x[n] */
 194       acc = (q63_t) b0 *Xn1;
 195       /* acc +=  b1 * x[n-1] */
 196       acc += (q63_t) b1 *Xn2;
 197       /* acc +=  b[2] * x[n-2] */
 198       acc += (q63_t) b2 *Xn;
 199       /* acc +=  a1 * y[n-1] */
 200       acc += (q63_t) a1 *Yn1;
 201       /* acc +=  a2 * y[n-2] */
 202       acc += (q63_t) a2 *Yn2;
 203
 204       /* The result is converted to 1.31, Yn2 variable is reused  */
 205       /* Calc lower part of acc */
 206       acc_l = acc & 0xffffffff;
 207
 208       /* Calc upper part of acc */
 209       acc_h = (acc >> 32) & 0xffffffff;
 210
 211
 212       /* Apply shift for lower part of acc and upper part of acc */
 213       Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
 214
 215       /* Store the output in the destination buffer. */
 216       *pOut++ = Yn2;
 217
 218       /* Read the forth input */
 219       Xn = *pIn++;
 220
 221       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 222
 223       /* acc =  b0 * x[n] */
 224       acc = (q63_t) b0 *Xn;
 225       /* acc +=  b1 * x[n-1] */
 226       acc += (q63_t) b1 *Xn1;
 227       /* acc +=  b[2] * x[n-2] */
 228       acc += (q63_t) b2 *Xn2;
 229       /* acc +=  a1 * y[n-1] */
 230       acc += (q63_t) a1 *Yn2;
 231       /* acc +=  a2 * y[n-2] */
 232       acc += (q63_t) a2 *Yn1;
 233
 234       /* The result is converted to 1.31, Yn1 variable is reused  */
 235       /* Calc lower part of acc */
 236       acc_l = acc & 0xffffffff;
 237
 238       /* Calc upper part of acc */
 239       acc_h = (acc >> 32) & 0xffffffff;
 240
 241       /* Apply shift for lower part of acc and upper part of acc */
 242       Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
 243
 244       /* Every time after the output is computed state should be updated. */
 245       /* The states should be updated as:  */
 246       /* Xn2 = Xn1    */
 247       /* Xn1 = Xn     */
 248       /* Yn2 = Yn1    */
 249       /* Yn1 = acc    */
 250       Xn2 = Xn1;
 251       Xn1 = Xn;
 252
 253       /* Store the output in the destination buffer. */
 254       *pOut++ = Yn1;
 255
 256       /* decrement the loop counter */
 257       sample--;
 258     }
 259
 260     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
 261      ** No loop unrolling is used. */
 262     sample = (blockSize & 0x3u);
 263
 264     while(sample > 0u)
 265     {
 266       /* Read the input */
 267       Xn = *pIn++;
 268
 269       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 270
 271       /* acc =  b0 * x[n] */
 272       acc = (q63_t) b0 *Xn;
 273       /* acc +=  b1 * x[n-1] */
 274       acc += (q63_t) b1 *Xn1;
 275       /* acc +=  b[2] * x[n-2] */
 276       acc += (q63_t) b2 *Xn2;
 277       /* acc +=  a1 * y[n-1] */
 278       acc += (q63_t) a1 *Yn1;
 279       /* acc +=  a2 * y[n-2] */
 280       acc += (q63_t) a2 *Yn2;
 281
 282       /* The result is converted to 1.31  */
 283       acc = acc >> lShift;
 284
 285       /* Every time after the output is computed state should be updated. */
 286       /* The states should be updated as:  */
 287       /* Xn2 = Xn1    */
 288       /* Xn1 = Xn     */
 289       /* Yn2 = Yn1    */
 290       /* Yn1 = acc    */
 291       Xn2 = Xn1;
 292       Xn1 = Xn;
 293       Yn2 = Yn1;
 294       Yn1 = (q31_t) acc;
 295
 296       /* Store the output in the destination buffer. */
 297       *pOut++ = (q31_t) acc;
 298
 299       /* decrement the loop counter */
 300       sample--;
 301     }
 302
 303     /*  The first stage goes from the input buffer to the output buffer. */
 304     /*  Subsequent stages occur in-place in the output buffer */
 305     pIn = pDst;
 306
 307     /* Reset to destination pointer */
 308     pOut = pDst;
 309
 310     /*  Store the updated state variables back into the pState array */
 311     *pState++ = Xn1;
 312     *pState++ = Xn2;
 313     *pState++ = Yn1;
 314     *pState++ = Yn2;
 315
 316   } while(--stage);
 317
 318 #else
 319
 320   /* Run the below code for Cortex-M0 */
 321
 322   do
 323   {
 324     /* Reading the coefficients */
 325     b0 = *pCoeffs++;
 326     b1 = *pCoeffs++;
 327     b2 = *pCoeffs++;
 328     a1 = *pCoeffs++;
 329     a2 = *pCoeffs++;
 330
 331     /* Reading the state values */
 332     Xn1 = pState[0];
 333     Xn2 = pState[1];
 334     Yn1 = pState[2];
 335     Yn2 = pState[3];
 336
 337     /*      The variables acc holds the output value that is computed:
 338      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
 339      */
 340
 341     sample = blockSize;
 342
 343     while(sample > 0u)
 344     {
 345       /* Read the input */
 346       Xn = *pIn++;
 347
 348       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 349       /* acc =  b0 * x[n] */
 350       acc = (q63_t) b0 *Xn;
 351
 352       /* acc +=  b1 * x[n-1] */
 353       acc += (q63_t) b1 *Xn1;
 354       /* acc +=  b[2] * x[n-2] */
 355       acc += (q63_t) b2 *Xn2;
 356       /* acc +=  a1 * y[n-1] */
 357       acc += (q63_t) a1 *Yn1;
 358       /* acc +=  a2 * y[n-2] */
 359       acc += (q63_t) a2 *Yn2;
 360
 361       /* The result is converted to 1.31  */
 362       acc = acc >> lShift;
 363
 364       /* Every time after the output is computed state should be updated. */
 365       /* The states should be updated as:  */
 366       /* Xn2 = Xn1    */
 367       /* Xn1 = Xn     */
 368       /* Yn2 = Yn1    */
 369       /* Yn1 = acc    */
 370       Xn2 = Xn1;
 371       Xn1 = Xn;
 372       Yn2 = Yn1;
 373       Yn1 = (q31_t) acc;
 374
 375       /* Store the output in the destination buffer. */
 376       *pOut++ = (q31_t) acc;
 377
 378       /* decrement the loop counter */
 379       sample--;
 380     }
 381
 382     /*  The first stage goes from the input buffer to the output buffer. */
 383     /*  Subsequent stages occur in-place in the output buffer */
 384     pIn = pDst;
 385
 386     /* Reset to destination pointer */
 387     pOut = pDst;
 388
 389     /*  Store the updated state variables back into the pState array */
 390     *pState++ = Xn1;
 391     *pState++ = Xn2;
 392     *pState++ = Yn1;
 393     *pState++ = Yn2;
 394
 395   } while(--stage);
 396
 397 #endif /*  #ifndef ARM_MATH_CM0_FAMILY */
 398 }
 399
 400 /**
 401   * @} end of BiquadCascadeDF1 group
 402   */