tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_biquad_cascade_df1_q15.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_biquad_cascade_df1_q15.c
   9 *
  10 * Description:  Processing function for the
  11 *                               Q15 Biquad cascade DirectFormI(DF1) filter.
  12 *
  13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  14 *
  15 * Redistribution and use in source and binary forms, with or without
  16 * modification, are permitted provided that the following conditions
  17 * are met:
  18 *   - Redistributions of source code must retain the above copyright
  19 *     notice, this list of conditions and the following disclaimer.
  20 *   - Redistributions in binary form must reproduce the above copyright
  21 *     notice, this list of conditions and the following disclaimer in
  22 *     the documentation and/or other materials provided with the
  23 *     distribution.
  24 *   - Neither the name of ARM LIMITED nor the names of its contributors
  25 *     may be used to endorse or promote products derived from this
  26 *     software without specific prior written permission.
  27 *
  28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  39 * POSSIBILITY OF SUCH DAMAGE.
  40 * -------------------------------------------------------------------- */
  41
  42 #include "arm_math.h"
  43
  44 /**
  45  * @ingroup groupFilters
  46  */
  47
  48 /**
  49  * @addtogroup BiquadCascadeDF1
  50  * @{
  51  */
  52
  53 /**
  54  * @brief Processing function for the Q15 Biquad cascade filter.
  55  * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
  56  * @param[in]  *pSrc points to the block of input data.
  57  * @param[out] *pDst points to the location where the output result is written.
  58  * @param[in]  blockSize number of samples to process per call.
  59  * @return none.
  60  *
  61  *
  62  * <b>Scaling and Overflow Behavior:</b>
  63  * \par
  64  * The function is implemented using a 64-bit internal accumulator.
  65  * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
  66  * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
  67  * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
  68  * The accumulator is then shifted by <code>postShift</code> bits to truncate the result to 1.15 format by discarding the low 16 bits.
  69  * Finally, the result is saturated to 1.15 format.
  70  *
  71  * \par
  72  * Refer to the function <code>arm_biquad_cascade_df1_fast_q15()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.
  73  */
  74
  75 void arm_biquad_cascade_df1_q15(
  76   const arm_biquad_casd_df1_inst_q15 * S,
  77   q15_t * pSrc,
  78   q15_t * pDst,
  79   uint32_t blockSize)
  80 {
  81
  82
  83 #ifndef ARM_MATH_CM0_FAMILY
  84
  85   /* Run the below code for Cortex-M4 and Cortex-M3 */
  86
  87   q15_t *pIn = pSrc;                             /*  Source pointer                               */
  88   q15_t *pOut = pDst;                            /*  Destination pointer                          */
  89   q31_t in;                                      /*  Temporary variable to hold input value       */
  90   q31_t out;                                     /*  Temporary variable to hold output value      */
  91   q31_t b0;                                      /*  Temporary variable to hold bo value          */
  92   q31_t b1, a1;                                  /*  Filter coefficients                          */
  93   q31_t state_in, state_out;                     /*  Filter state variables                       */
  94   q31_t acc_l, acc_h;
  95   q63_t acc;                                     /*  Accumulator                                  */
  96   int32_t lShift = (15 - (int32_t) S->postShift);       /*  Post shift                                   */
  97   q15_t *pState = S->pState;                     /*  State pointer                                */
  98   q15_t *pCoeffs = S->pCoeffs;                   /*  Coefficient pointer                          */
  99   uint32_t sample, stage = (uint32_t) S->numStages;     /*  Stage loop counter                           */
 100   int32_t uShift = (32 - lShift);
 101
 102   do
 103   {
 104     /* Read the b0 and 0 coefficients using SIMD  */
 105     b0 = *__SIMD32(pCoeffs)++;
 106
 107     /* Read the b1 and b2 coefficients using SIMD */
 108     b1 = *__SIMD32(pCoeffs)++;
 109
 110     /* Read the a1 and a2 coefficients using SIMD */
 111     a1 = *__SIMD32(pCoeffs)++;
 112
 113     /* Read the input state values from the state buffer:  x[n-1], x[n-2] */
 114     state_in = *__SIMD32(pState)++;
 115
 116     /* Read the output state values from the state buffer:  y[n-1], y[n-2] */
 117     state_out = *__SIMD32(pState)--;
 118
 119     /* Apply loop unrolling and compute 2 output values simultaneously. */
 120     /*      The variable acc hold output values that are being computed:
 121      *
 122      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
 123      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
 124      */
 125     sample = blockSize >> 1u;
 126
 127     /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.
 128      ** a second loop below computes the remaining 1 sample. */
 129     while(sample > 0u)
 130     {
 131
 132       /* Read the input */
 133       in = *__SIMD32(pIn)++;
 134
 135       /* out =  b0 * x[n] + 0 * 0 */
 136       out = __SMUAD(b0, in);
 137
 138       /* acc +=  b1 * x[n-1] +  b2 * x[n-2] + out */
 139       acc = __SMLALD(b1, state_in, out);
 140       /* acc +=  a1 * y[n-1] +  a2 * y[n-2] */
 141       acc = __SMLALD(a1, state_out, acc);
 142
 143       /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
 144       /* Calc lower part of acc */
 145       acc_l = acc & 0xffffffff;
 146
 147       /* Calc upper part of acc */
 148       acc_h = (acc >> 32) & 0xffffffff;
 149
 150       /* Apply shift for lower part of acc and upper part of acc */
 151       out = (uint32_t) acc_l >> lShift | acc_h << uShift;
 152
 153       out = __SSAT(out, 16);
 154
 155       /* Every time after the output is computed state should be updated. */
 156       /* The states should be updated as:  */
 157       /* Xn2 = Xn1    */
 158       /* Xn1 = Xn     */
 159       /* Yn2 = Yn1    */
 160       /* Yn1 = acc   */
 161       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
 162       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
 163
 164 #ifndef  ARM_MATH_BIG_ENDIAN
 165
 166       state_in = __PKHBT(in, state_in, 16);
 167       state_out = __PKHBT(out, state_out, 16);
 168
 169 #else
 170
 171       state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
 172       state_out = __PKHBT(state_out >> 16, (out), 16);
 173
 174 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
 175
 176       /* out =  b0 * x[n] + 0 * 0 */
 177       out = __SMUADX(b0, in);
 178       /* acc +=  b1 * x[n-1] +  b2 * x[n-2] + out */
 179       acc = __SMLALD(b1, state_in, out);
 180       /* acc +=  a1 * y[n-1] + a2 * y[n-2] */
 181       acc = __SMLALD(a1, state_out, acc);
 182
 183       /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
 184       /* Calc lower part of acc */
 185       acc_l = acc & 0xffffffff;
 186
 187       /* Calc upper part of acc */
 188       acc_h = (acc >> 32) & 0xffffffff;
 189
 190       /* Apply shift for lower part of acc and upper part of acc */
 191       out = (uint32_t) acc_l >> lShift | acc_h << uShift;
 192
 193       out = __SSAT(out, 16);
 194
 195       /* Store the output in the destination buffer. */
 196
 197 #ifndef  ARM_MATH_BIG_ENDIAN
 198
 199       *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
 200
 201 #else
 202
 203       *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
 204
 205 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
 206
 207       /* Every time after the output is computed state should be updated. */
 208       /* The states should be updated as:  */
 209       /* Xn2 = Xn1    */
 210       /* Xn1 = Xn     */
 211       /* Yn2 = Yn1    */
 212       /* Yn1 = acc   */
 213       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
 214       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
 215 #ifndef  ARM_MATH_BIG_ENDIAN
 216
 217       state_in = __PKHBT(in >> 16, state_in, 16);
 218       state_out = __PKHBT(out, state_out, 16);
 219
 220 #else
 221
 222       state_in = __PKHBT(state_in >> 16, in, 16);
 223       state_out = __PKHBT(state_out >> 16, out, 16);
 224
 225 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
 226
 227
 228       /* Decrement the loop counter */
 229       sample--;
 230
 231     }
 232
 233     /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
 234      ** No loop unrolling is used. */
 235
 236     if((blockSize & 0x1u) != 0u)
 237     {
 238       /* Read the input */
 239       in = *pIn++;
 240
 241       /* out =  b0 * x[n] + 0 * 0 */
 242
 243 #ifndef  ARM_MATH_BIG_ENDIAN
 244
 245       out = __SMUAD(b0, in);
 246
 247 #else
 248
 249       out = __SMUADX(b0, in);
 250
 251 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
 252
 253       /* acc =  b1 * x[n-1] + b2 * x[n-2] + out */
 254       acc = __SMLALD(b1, state_in, out);
 255       /* acc +=  a1 * y[n-1] + a2 * y[n-2] */
 256       acc = __SMLALD(a1, state_out, acc);
 257
 258       /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
 259       /* Calc lower part of acc */
 260       acc_l = acc & 0xffffffff;
 261
 262       /* Calc upper part of acc */
 263       acc_h = (acc >> 32) & 0xffffffff;
 264
 265       /* Apply shift for lower part of acc and upper part of acc */
 266       out = (uint32_t) acc_l >> lShift | acc_h << uShift;
 267
 268       out = __SSAT(out, 16);
 269
 270       /* Store the output in the destination buffer. */
 271       *pOut++ = (q15_t) out;
 272
 273       /* Every time after the output is computed state should be updated. */
 274       /* The states should be updated as:  */
 275       /* Xn2 = Xn1    */
 276       /* Xn1 = Xn     */
 277       /* Yn2 = Yn1    */
 278       /* Yn1 = acc   */
 279       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
 280       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
 281
 282 #ifndef  ARM_MATH_BIG_ENDIAN
 283
 284       state_in = __PKHBT(in, state_in, 16);
 285       state_out = __PKHBT(out, state_out, 16);
 286
 287 #else
 288
 289       state_in = __PKHBT(state_in >> 16, in, 16);
 290       state_out = __PKHBT(state_out >> 16, out, 16);
 291
 292 #endif /*   #ifndef  ARM_MATH_BIG_ENDIAN    */
 293
 294     }
 295
 296     /*  The first stage goes from the input wire to the output wire.  */
 297     /*  Subsequent numStages occur in-place in the output wire  */
 298     pIn = pDst;
 299
 300     /* Reset the output pointer */
 301     pOut = pDst;
 302
 303     /*  Store the updated state variables back into the state array */
 304     *__SIMD32(pState)++ = state_in;
 305     *__SIMD32(pState)++ = state_out;
 306
 307
 308     /* Decrement the loop counter */
 309     stage--;
 310
 311   } while(stage > 0u);
 312
 313 #else
 314
 315   /* Run the below code for Cortex-M0 */
 316
 317   q15_t *pIn = pSrc;                             /*  Source pointer                               */
 318   q15_t *pOut = pDst;                            /*  Destination pointer                          */
 319   q15_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */
 320   q15_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */
 321   q15_t Xn;                                      /*  temporary input               */
 322   q63_t acc;                                     /*  Accumulator                                  */
 323   int32_t shift = (15 - (int32_t) S->postShift); /*  Post shift                                   */
 324   q15_t *pState = S->pState;                     /*  State pointer                                */
 325   q15_t *pCoeffs = S->pCoeffs;                   /*  Coefficient pointer                          */
 326   uint32_t sample, stage = (uint32_t) S->numStages;     /*  Stage loop counter                           */
 327
 328   do
 329   {
 330     /* Reading the coefficients */
 331     b0 = *pCoeffs++;
 332     pCoeffs++;  // skip the 0 coefficient
 333     b1 = *pCoeffs++;
 334     b2 = *pCoeffs++;
 335     a1 = *pCoeffs++;
 336     a2 = *pCoeffs++;
 337
 338     /* Reading the state values */
 339     Xn1 = pState[0];
 340     Xn2 = pState[1];
 341     Yn1 = pState[2];
 342     Yn2 = pState[3];
 343
 344     /*      The variables acc holds the output value that is computed:
 345      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
 346      */
 347
 348     sample = blockSize;
 349
 350     while(sample > 0u)
 351     {
 352       /* Read the input */
 353       Xn = *pIn++;
 354
 355       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 356       /* acc =  b0 * x[n] */
 357       acc = (q31_t) b0 *Xn;
 358
 359       /* acc +=  b1 * x[n-1] */
 360       acc += (q31_t) b1 *Xn1;
 361       /* acc +=  b[2] * x[n-2] */
 362       acc += (q31_t) b2 *Xn2;
 363       /* acc +=  a1 * y[n-1] */
 364       acc += (q31_t) a1 *Yn1;
 365       /* acc +=  a2 * y[n-2] */
 366       acc += (q31_t) a2 *Yn2;
 367
 368       /* The result is converted to 1.31  */
 369       acc = __SSAT((acc >> shift), 16);
 370
 371       /* Every time after the output is computed state should be updated. */
 372       /* The states should be updated as:  */
 373       /* Xn2 = Xn1    */
 374       /* Xn1 = Xn     */
 375       /* Yn2 = Yn1    */
 376       /* Yn1 = acc    */
 377       Xn2 = Xn1;
 378       Xn1 = Xn;
 379       Yn2 = Yn1;
 380       Yn1 = (q15_t) acc;
 381
 382       /* Store the output in the destination buffer. */
 383       *pOut++ = (q15_t) acc;
 384
 385       /* decrement the loop counter */
 386       sample--;
 387     }
 388
 389     /*  The first stage goes from the input buffer to the output buffer. */
 390     /*  Subsequent stages occur in-place in the output buffer */
 391     pIn = pDst;
 392
 393     /* Reset to destination pointer */
 394     pOut = pDst;
 395
 396     /*  Store the updated state variables back into the pState array */
 397     *pState++ = Xn1;
 398     *pState++ = Xn2;
 399     *pState++ = Yn1;
 400     *pState++ = Yn2;
 401
 402   } while(--stage);
 403
 404 #endif /*     #ifndef ARM_MATH_CM0_FAMILY */
 405
 406 }
 407
 408
 409 /**
 410  * @} end of BiquadCascadeDF1 group
 411  */