tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_biquad_cascade_df1_fast_q15.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_biquad_cascade_df1_fast_q15.c
   9 *
  10 * Description:  Fast processing function for the
  11 *                               Q15 Biquad cascade filter.
  12 *
  13 * Target Processor: Cortex-M4/Cortex-M3
  14 *
  15 * Redistribution and use in source and binary forms, with or without
  16 * modification, are permitted provided that the following conditions
  17 * are met:
  18 *   - Redistributions of source code must retain the above copyright
  19 *     notice, this list of conditions and the following disclaimer.
  20 *   - Redistributions in binary form must reproduce the above copyright
  21 *     notice, this list of conditions and the following disclaimer in
  22 *     the documentation and/or other materials provided with the
  23 *     distribution.
  24 *   - Neither the name of ARM LIMITED nor the names of its contributors
  25 *     may be used to endorse or promote products derived from this
  26 *     software without specific prior written permission.
  27 *
  28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  39 * POSSIBILITY OF SUCH DAMAGE.
  40 * -------------------------------------------------------------------- */
  41
  42 #include "arm_math.h"
  43
  44 /**
  45  * @ingroup groupFilters
  46  */
  47
  48 /**
  49  * @addtogroup BiquadCascadeDF1
  50  * @{
  51  */
  52
  53 /**
  54  * @details
  55  * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
  56  * @param[in]  *pSrc points to the block of input data.
  57  * @param[out] *pDst points to the block of output data.
  58  * @param[in]  blockSize number of samples to process per call.
  59  * @return none.
  60  *
  61  * <b>Scaling and Overflow Behavior:</b>
  62  * \par
  63  * This fast version uses a 32-bit accumulator with 2.30 format.
  64  * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.
  65  * Thus, if the accumulator result overflows it wraps around and distorts the result.
  66  * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25).
  67  * The 2.30 accumulator is then shifted by <code>postShift</code> bits and the result truncated to 1.15 format by discarding the low 16 bits.
  68  *
  69  * \par
  70  * Refer to the function <code>arm_biquad_cascade_df1_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion.  Both the slow and the fast versions use the same instance structure.
  71  * Use the function <code>arm_biquad_cascade_df1_init_q15()</code> to initialize the filter structure.
  72  *
  73  */
  74
  75 void arm_biquad_cascade_df1_fast_q15(
  76   const arm_biquad_casd_df1_inst_q15 * S,
  77   q15_t * pSrc,
  78   q15_t * pDst,
  79   uint32_t blockSize)
  80 {
  81   q15_t *pIn = pSrc;                             /*  Source pointer                               */
  82   q15_t *pOut = pDst;                            /*  Destination pointer                          */
  83   q31_t in;                                      /*  Temporary variable to hold input value       */
  84   q31_t out;                                     /*  Temporary variable to hold output value      */
  85   q31_t b0;                                      /*  Temporary variable to hold bo value          */
  86   q31_t b1, a1;                                  /*  Filter coefficients                          */
  87   q31_t state_in, state_out;                     /*  Filter state variables                       */
  88   q31_t acc;                                     /*  Accumulator                                  */
  89   int32_t shift = (int32_t) (15 - S->postShift); /*  Post shift                                   */
  90   q15_t *pState = S->pState;                     /*  State pointer                                */
  91   q15_t *pCoeffs = S->pCoeffs;                   /*  Coefficient pointer                          */
  92   uint32_t sample, stage = S->numStages;         /*  Stage loop counter                           */
  93
  94
  95
  96   do
  97   {
  98
  99     /* Read the b0 and 0 coefficients using SIMD  */
 100     b0 = *__SIMD32(pCoeffs)++;
 101
 102     /* Read the b1 and b2 coefficients using SIMD */
 103     b1 = *__SIMD32(pCoeffs)++;
 104
 105     /* Read the a1 and a2 coefficients using SIMD */
 106     a1 = *__SIMD32(pCoeffs)++;
 107
 108     /* Read the input state values from the state buffer:  x[n-1], x[n-2] */
 109     state_in = *__SIMD32(pState)++;
 110
 111     /* Read the output state values from the state buffer:  y[n-1], y[n-2] */
 112     state_out = *__SIMD32(pState)--;
 113
 114     /* Apply loop unrolling and compute 2 output values simultaneously. */
 115     /*      The variable acc hold output values that are being computed:
 116      *
 117      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
 118      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
 119      */
 120     sample = blockSize >> 1u;
 121
 122     /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.
 123      ** a second loop below computes the remaining 1 sample. */
 124     while(sample > 0u)
 125     {
 126
 127       /* Read the input */
 128       in = *__SIMD32(pIn)++;
 129
 130       /* out =  b0 * x[n] + 0 * 0 */
 131       out = __SMUAD(b0, in);
 132       /* acc =  b1 * x[n-1] + acc +=  b2 * x[n-2] + out */
 133       acc = __SMLAD(b1, state_in, out);
 134       /* acc +=  a1 * y[n-1] + acc +=  a2 * y[n-2] */
 135       acc = __SMLAD(a1, state_out, acc);
 136
 137       /* The result is converted from 3.29 to 1.31 and then saturation is applied */
 138       out = __SSAT((acc >> shift), 16);
 139
 140       /* Every time after the output is computed state should be updated. */
 141       /* The states should be updated as:  */
 142       /* Xn2 = Xn1    */
 143       /* Xn1 = Xn     */
 144       /* Yn2 = Yn1    */
 145       /* Yn1 = acc   */
 146       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
 147       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
 148
 149 #ifndef  ARM_MATH_BIG_ENDIAN
 150
 151       state_in = __PKHBT(in, state_in, 16);
 152       state_out = __PKHBT(out, state_out, 16);
 153
 154 #else
 155
 156       state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
 157       state_out = __PKHBT(state_out >> 16, (out), 16);
 158
 159 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
 160
 161       /* out =  b0 * x[n] + 0 * 0 */
 162       out = __SMUADX(b0, in);
 163       /* acc0 =  b1 * x[n-1] , acc0 +=  b2 * x[n-2] + out */
 164       acc = __SMLAD(b1, state_in, out);
 165       /* acc +=  a1 * y[n-1] + acc +=  a2 * y[n-2] */
 166       acc = __SMLAD(a1, state_out, acc);
 167
 168       /* The result is converted from 3.29 to 1.31 and then saturation is applied */
 169       out = __SSAT((acc >> shift), 16);
 170
 171
 172       /* Store the output in the destination buffer. */
 173
 174 #ifndef  ARM_MATH_BIG_ENDIAN
 175
 176       *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
 177
 178 #else
 179
 180       *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
 181
 182 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
 183
 184       /* Every time after the output is computed state should be updated. */
 185       /* The states should be updated as:  */
 186       /* Xn2 = Xn1    */
 187       /* Xn1 = Xn     */
 188       /* Yn2 = Yn1    */
 189       /* Yn1 = acc   */
 190       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
 191       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
 192
 193 #ifndef  ARM_MATH_BIG_ENDIAN
 194
 195       state_in = __PKHBT(in >> 16, state_in, 16);
 196       state_out = __PKHBT(out, state_out, 16);
 197
 198 #else
 199
 200       state_in = __PKHBT(state_in >> 16, in, 16);
 201       state_out = __PKHBT(state_out >> 16, out, 16);
 202
 203 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
 204
 205
 206       /* Decrement the loop counter */
 207       sample--;
 208
 209     }
 210
 211     /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
 212      ** No loop unrolling is used. */
 213
 214     if((blockSize & 0x1u) != 0u)
 215     {
 216       /* Read the input */
 217       in = *pIn++;
 218
 219       /* out =  b0 * x[n] + 0 * 0 */
 220
 221 #ifndef  ARM_MATH_BIG_ENDIAN
 222
 223       out = __SMUAD(b0, in);
 224
 225 #else
 226
 227       out = __SMUADX(b0, in);
 228
 229 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
 230
 231       /* acc =  b1 * x[n-1], acc +=  b2 * x[n-2] + out */
 232       acc = __SMLAD(b1, state_in, out);
 233       /* acc +=  a1 * y[n-1] + acc +=  a2 * y[n-2] */
 234       acc = __SMLAD(a1, state_out, acc);
 235
 236       /* The result is converted from 3.29 to 1.31 and then saturation is applied */
 237       out = __SSAT((acc >> shift), 16);
 238
 239       /* Store the output in the destination buffer. */
 240       *pOut++ = (q15_t) out;
 241
 242       /* Every time after the output is computed state should be updated. */
 243       /* The states should be updated as:  */
 244       /* Xn2 = Xn1    */
 245       /* Xn1 = Xn     */
 246       /* Yn2 = Yn1    */
 247       /* Yn1 = acc   */
 248       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
 249       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
 250
 251 #ifndef  ARM_MATH_BIG_ENDIAN
 252
 253       state_in = __PKHBT(in, state_in, 16);
 254       state_out = __PKHBT(out, state_out, 16);
 255
 256 #else
 257
 258       state_in = __PKHBT(state_in >> 16, in, 16);
 259       state_out = __PKHBT(state_out >> 16, out, 16);
 260
 261 #endif /*   #ifndef  ARM_MATH_BIG_ENDIAN    */
 262
 263     }
 264
 265     /*  The first stage goes from the input buffer to the output buffer.  */
 266     /*  Subsequent (numStages - 1) occur in-place in the output buffer  */
 267     pIn = pDst;
 268
 269     /* Reset the output pointer */
 270     pOut = pDst;
 271
 272     /*  Store the updated state variables back into the state array */
 273     *__SIMD32(pState)++ = state_in;
 274     *__SIMD32(pState)++ = state_out;
 275
 276
 277     /* Decrement the loop counter */
 278     stage--;
 279
 280   } while(stage > 0u);
 281 }
 282
 283
 284 /**
 285  * @} end of BiquadCascadeDF1 group
 286  */