tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_lms_norm_q31.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_lms_norm_q31.c
   9 *
  10 * Description:  Processing function for the Q31 NLMS filter.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39 * -------------------------------------------------------------------- */
  40
  41 #include "arm_math.h"
  42
  43 /**
  44  * @ingroup groupFilters
  45  */
  46
  47 /**
  48  * @addtogroup LMS_NORM
  49  * @{
  50  */
  51
  52 /**
  53 * @brief Processing function for Q31 normalized LMS filter.
  54 * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
  55 * @param[in] *pSrc points to the block of input data.
  56 * @param[in] *pRef points to the block of reference data.
  57 * @param[out] *pOut points to the block of output data.
  58 * @param[out] *pErr points to the block of error data.
  59 * @param[in] blockSize number of samples to process.
  60 * @return none.
  61 *
  62 * <b>Scaling and Overflow Behavior:</b>
  63 * \par
  64 * The function is implemented using an internal 64-bit accumulator.
  65 * The accumulator has a 2.62 format and maintains full precision of the intermediate
  66 * multiplication results but provides only a single guard bit.
  67 * Thus, if the accumulator result overflows it wraps around rather than clip.
  68 * In order to avoid overflows completely the input signal must be scaled down by
  69 * log2(numTaps) bits. The reference signal should not be scaled down.
  70 * After all multiply-accumulates are performed, the 2.62 accumulator is shifted
  71 * and saturated to 1.31 format to yield the final result.
  72 * The output signal and error signal are in 1.31 format.
  73 *
  74 * \par
  75 *       In this filter, filter coefficients are updated for each sample and the
  76 * updation of filter cofficients are saturted.
  77 *
  78 */
  79
  80 void arm_lms_norm_q31(
  81   arm_lms_norm_instance_q31 * S,
  82   q31_t * pSrc,
  83   q31_t * pRef,
  84   q31_t * pOut,
  85   q31_t * pErr,
  86   uint32_t blockSize)
  87 {
  88   q31_t *pState = S->pState;                     /* State pointer */
  89   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
  90   q31_t *pStateCurnt;                            /* Points to the current sample of the state */
  91   q31_t *px, *pb;                                /* Temporary pointers for state and coefficient buffers */
  92   q31_t mu = S->mu;                              /* Adaptive factor */
  93   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
  94   uint32_t tapCnt, blkCnt;                       /* Loop counters */
  95   q63_t energy;                                  /* Energy of the input */
  96   q63_t acc;                                     /* Accumulator */
  97   q31_t e = 0, d = 0;                            /* error, reference data sample */
  98   q31_t w = 0, in;                               /* weight factor and state */
  99   q31_t x0;                                      /* temporary variable to hold input sample */
 100 //  uint32_t shift = 32u - ((uint32_t) S->postShift + 1u);        /* Shift to be applied to the output */
 101   q31_t errorXmu, oneByEnergy;                   /* Temporary variables to store error and mu product and reciprocal of energy */
 102   q31_t postShift;                               /* Post shift to be applied to weight after reciprocal calculation */
 103   q31_t coef;                                    /* Temporary variable for coef */
 104   q31_t acc_l, acc_h;                            /*  temporary input */
 105   uint32_t uShift = ((uint32_t) S->postShift + 1u);
 106   uint32_t lShift = 32u - uShift;                /*  Shift to be applied to the output */
 107
 108   energy = S->energy;
 109   x0 = S->x0;
 110
 111   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
 112   /* pStateCurnt points to the location where the new input data should be written */
 113   pStateCurnt = &(S->pState[(numTaps - 1u)]);
 114
 115   /* Loop over blockSize number of values */
 116   blkCnt = blockSize;
 117
 118
 119 #ifndef ARM_MATH_CM0_FAMILY
 120
 121   /* Run the below code for Cortex-M4 and Cortex-M3 */
 122
 123   while(blkCnt > 0u)
 124   {
 125
 126     /* Copy the new input sample into the state buffer */
 127     *pStateCurnt++ = *pSrc;
 128
 129     /* Initialize pState pointer */
 130     px = pState;
 131
 132     /* Initialize coeff pointer */
 133     pb = (pCoeffs);
 134
 135     /* Read the sample from input buffer */
 136     in = *pSrc++;
 137
 138     /* Update the energy calculation */
 139     energy = (q31_t) ((((q63_t) energy << 32) -
 140                        (((q63_t) x0 * x0) << 1)) >> 32);
 141     energy = (q31_t) (((((q63_t) in * in) << 1) + (energy << 32)) >> 32);
 142
 143     /* Set the accumulator to zero */
 144     acc = 0;
 145
 146     /* Loop unrolling.  Process 4 taps at a time. */
 147     tapCnt = numTaps >> 2;
 148
 149     while(tapCnt > 0u)
 150     {
 151       /* Perform the multiply-accumulate */
 152       acc += ((q63_t) (*px++)) * (*pb++);
 153       acc += ((q63_t) (*px++)) * (*pb++);
 154       acc += ((q63_t) (*px++)) * (*pb++);
 155       acc += ((q63_t) (*px++)) * (*pb++);
 156
 157       /* Decrement the loop counter */
 158       tapCnt--;
 159     }
 160
 161     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 162     tapCnt = numTaps % 0x4u;
 163
 164     while(tapCnt > 0u)
 165     {
 166       /* Perform the multiply-accumulate */
 167       acc += ((q63_t) (*px++)) * (*pb++);
 168
 169       /* Decrement the loop counter */
 170       tapCnt--;
 171     }
 172
 173     /* Converting the result to 1.31 format */
 174     /* Calc lower part of acc */
 175     acc_l = acc & 0xffffffff;
 176
 177     /* Calc upper part of acc */
 178     acc_h = (acc >> 32) & 0xffffffff;
 179
 180     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
 181
 182     /* Store the result from accumulator into the destination buffer. */
 183     *pOut++ = (q31_t) acc;
 184
 185     /* Compute and store error */
 186     d = *pRef++;
 187     e = d - (q31_t) acc;
 188     *pErr++ = e;
 189
 190     /* Calculates the reciprocal of energy */
 191     postShift = arm_recip_q31(energy + DELTA_Q31,
 192                               &oneByEnergy, &S->recipTable[0]);
 193
 194     /* Calculation of product of (e * mu) */
 195     errorXmu = (q31_t) (((q63_t) e * mu) >> 31);
 196
 197     /* Weighting factor for the normalized version */
 198     w = clip_q63_to_q31(((q63_t) errorXmu * oneByEnergy) >> (31 - postShift));
 199
 200     /* Initialize pState pointer */
 201     px = pState;
 202
 203     /* Initialize coeff pointer */
 204     pb = (pCoeffs);
 205
 206     /* Loop unrolling.  Process 4 taps at a time. */
 207     tapCnt = numTaps >> 2;
 208
 209     /* Update filter coefficients */
 210     while(tapCnt > 0u)
 211     {
 212       /* Perform the multiply-accumulate */
 213
 214       /* coef is in 2.30 format */
 215       coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
 216       /* get coef in 1.31 format by left shifting */
 217       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 218       /* update coefficient buffer to next coefficient */
 219       pb++;
 220
 221       coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
 222       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 223       pb++;
 224
 225       coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
 226       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 227       pb++;
 228
 229       coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
 230       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 231       pb++;
 232
 233       /* Decrement the loop counter */
 234       tapCnt--;
 235     }
 236
 237     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 238     tapCnt = numTaps % 0x4u;
 239
 240     while(tapCnt > 0u)
 241     {
 242       /* Perform the multiply-accumulate */
 243       coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
 244       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 245       pb++;
 246
 247       /* Decrement the loop counter */
 248       tapCnt--;
 249     }
 250
 251     /* Read the sample from state buffer */
 252     x0 = *pState;
 253
 254     /* Advance state pointer by 1 for the next sample */
 255     pState = pState + 1;
 256
 257     /* Decrement the loop counter */
 258     blkCnt--;
 259   }
 260
 261   /* Save energy and x0 values for the next frame */
 262   S->energy = (q31_t) energy;
 263   S->x0 = x0;
 264
 265   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 266      satrt of the state buffer. This prepares the state buffer for the
 267      next function call. */
 268
 269   /* Points to the start of the pState buffer */
 270   pStateCurnt = S->pState;
 271
 272   /* Loop unrolling for (numTaps - 1u) samples copy */
 273   tapCnt = (numTaps - 1u) >> 2u;
 274
 275   /* copy data */
 276   while(tapCnt > 0u)
 277   {
 278     *pStateCurnt++ = *pState++;
 279     *pStateCurnt++ = *pState++;
 280     *pStateCurnt++ = *pState++;
 281     *pStateCurnt++ = *pState++;
 282
 283     /* Decrement the loop counter */
 284     tapCnt--;
 285   }
 286
 287   /* Calculate remaining number of copies */
 288   tapCnt = (numTaps - 1u) % 0x4u;
 289
 290   /* Copy the remaining q31_t data */
 291   while(tapCnt > 0u)
 292   {
 293     *pStateCurnt++ = *pState++;
 294
 295     /* Decrement the loop counter */
 296     tapCnt--;
 297   }
 298
 299 #else
 300
 301   /* Run the below code for Cortex-M0 */
 302
 303   while(blkCnt > 0u)
 304   {
 305
 306     /* Copy the new input sample into the state buffer */
 307     *pStateCurnt++ = *pSrc;
 308
 309     /* Initialize pState pointer */
 310     px = pState;
 311
 312     /* Initialize pCoeffs pointer */
 313     pb = pCoeffs;
 314
 315     /* Read the sample from input buffer */
 316     in = *pSrc++;
 317
 318     /* Update the energy calculation */
 319     energy =
 320       (q31_t) ((((q63_t) energy << 32) - (((q63_t) x0 * x0) << 1)) >> 32);
 321     energy = (q31_t) (((((q63_t) in * in) << 1) + (energy << 32)) >> 32);
 322
 323     /* Set the accumulator to zero */
 324     acc = 0;
 325
 326     /* Loop over numTaps number of values */
 327     tapCnt = numTaps;
 328
 329     while(tapCnt > 0u)
 330     {
 331       /* Perform the multiply-accumulate */
 332       acc += ((q63_t) (*px++)) * (*pb++);
 333
 334       /* Decrement the loop counter */
 335       tapCnt--;
 336     }
 337
 338     /* Converting the result to 1.31 format */
 339     /* Converting the result to 1.31 format */
 340     /* Calc lower part of acc */
 341     acc_l = acc & 0xffffffff;
 342
 343     /* Calc upper part of acc */
 344     acc_h = (acc >> 32) & 0xffffffff;
 345
 346     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
 347
 348
 349     //acc = (q31_t) (acc >> shift);
 350
 351     /* Store the result from accumulator into the destination buffer. */
 352     *pOut++ = (q31_t) acc;
 353
 354     /* Compute and store error */
 355     d = *pRef++;
 356     e = d - (q31_t) acc;
 357     *pErr++ = e;
 358
 359     /* Calculates the reciprocal of energy */
 360     postShift =
 361       arm_recip_q31(energy + DELTA_Q31, &oneByEnergy, &S->recipTable[0]);
 362
 363     /* Calculation of product of (e * mu) */
 364     errorXmu = (q31_t) (((q63_t) e * mu) >> 31);
 365
 366     /* Weighting factor for the normalized version */
 367     w = clip_q63_to_q31(((q63_t) errorXmu * oneByEnergy) >> (31 - postShift));
 368
 369     /* Initialize pState pointer */
 370     px = pState;
 371
 372     /* Initialize coeff pointer */
 373     pb = (pCoeffs);
 374
 375     /* Loop over numTaps number of values */
 376     tapCnt = numTaps;
 377
 378     while(tapCnt > 0u)
 379     {
 380       /* Perform the multiply-accumulate */
 381       /* coef is in 2.30 format */
 382       coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
 383       /* get coef in 1.31 format by left shifting */
 384       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 385       /* update coefficient buffer to next coefficient */
 386       pb++;
 387
 388       /* Decrement the loop counter */
 389       tapCnt--;
 390     }
 391
 392     /* Read the sample from state buffer */
 393     x0 = *pState;
 394
 395     /* Advance state pointer by 1 for the next sample */
 396     pState = pState + 1;
 397
 398     /* Decrement the loop counter */
 399     blkCnt--;
 400   }
 401
 402   /* Save energy and x0 values for the next frame */
 403   S->energy = (q31_t) energy;
 404   S->x0 = x0;
 405
 406   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 407      start of the state buffer. This prepares the state buffer for the
 408      next function call. */
 409
 410   /* Points to the start of the pState buffer */
 411   pStateCurnt = S->pState;
 412
 413   /* Loop for (numTaps - 1u) samples copy */
 414   tapCnt = (numTaps - 1u);
 415
 416   /* Copy the remaining q31_t data */
 417   while(tapCnt > 0u)
 418   {
 419     *pStateCurnt++ = *pState++;
 420
 421     /* Decrement the loop counter */
 422     tapCnt--;
 423   }
 424
 425 #endif /*   #ifndef ARM_MATH_CM0_FAMILY */
 426
 427 }
 428
 429 /**
 430  * @} end of LMS_NORM group
 431  */