tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_q15.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_rfft_q15.c
   9 *
  10 * Description:  RFFT & RIFFT Q15 process function
  11 *
  12 *
  13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  14 *
  15 * Redistribution and use in source and binary forms, with or without
  16 * modification, are permitted provided that the following conditions
  17 * are met:
  18 *   - Redistributions of source code must retain the above copyright
  19 *     notice, this list of conditions and the following disclaimer.
  20 *   - Redistributions in binary form must reproduce the above copyright
  21 *     notice, this list of conditions and the following disclaimer in
  22 *     the documentation and/or other materials provided with the
  23 *     distribution.
  24 *   - Neither the name of ARM LIMITED nor the names of its contributors
  25 *     may be used to endorse or promote products derived from this
  26 *     software without specific prior written permission.
  27 *
  28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  39 * POSSIBILITY OF SUCH DAMAGE.
  40 * -------------------------------------------------------------------- */
  41
  42 #include "arm_math.h"
  43
  44 void arm_radix4_butterfly_q15(
  45   q15_t * pSrc16,
  46   uint32_t fftLen,
  47   q15_t * pCoef16,
  48   uint32_t twidCoefModifier);
  49
  50 void arm_radix4_butterfly_inverse_q15(
  51   q15_t * pSrc16,
  52   uint32_t fftLen,
  53   q15_t * pCoef16,
  54   uint32_t twidCoefModifier);
  55
  56 void arm_bitreversal_q15(
  57   q15_t * pSrc,
  58   uint32_t fftLen,
  59   uint16_t bitRevFactor,
  60   uint16_t * pBitRevTab);
  61
  62         /*--------------------------------------------------------------------
  63 *               Internal functions prototypes
  64 --------------------------------------------------------------------*/
  65
  66 void arm_split_rfft_q15(
  67   q15_t * pSrc,
  68   uint32_t fftLen,
  69   q15_t * pATable,
  70   q15_t * pBTable,
  71   q15_t * pDst,
  72   uint32_t modifier);
  73
  74 void arm_split_rifft_q15(
  75   q15_t * pSrc,
  76   uint32_t fftLen,
  77   q15_t * pATable,
  78   q15_t * pBTable,
  79   q15_t * pDst,
  80   uint32_t modifier);
  81
  82 /**
  83  * @addtogroup RealFFT
  84  * @{
  85  */
  86
  87 /**
  88  * @brief Processing function for the Q15 RFFT/RIFFT.
  89  * @param[in]  *S    points to an instance of the Q15 RFFT/RIFFT structure.
  90  * @param[in]  *pSrc points to the input buffer.
  91  * @param[out] *pDst points to the output buffer.
  92  * @return none.
  93  *
  94  * \par Input an output formats:
  95  * \par
  96  * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
  97  * Hence the output format is different for different RFFT sizes.
  98  * The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
  99  * \par
 100  * \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"
 101  * \par
 102  * \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"
 103  */
 104
 105 void arm_rfft_q15(
 106   const arm_rfft_instance_q15 * S,
 107   q15_t * pSrc,
 108   q15_t * pDst)
 109 {
 110   const arm_cfft_radix4_instance_q15 *S_CFFT = S->pCfft;
 111
 112   /* Calculation of RIFFT of input */
 113   if(S->ifftFlagR == 1u)
 114   {
 115     /*  Real IFFT core process */
 116     arm_split_rifft_q15(pSrc, S->fftLenBy2, S->pTwiddleAReal,
 117                         S->pTwiddleBReal, pDst, S->twidCoefRModifier);
 118
 119     /* Complex readix-4 IFFT process */
 120     arm_radix4_butterfly_inverse_q15(pDst, S_CFFT->fftLen,
 121                                      S_CFFT->pTwiddle,
 122                                      S_CFFT->twidCoefModifier);
 123
 124     /* Bit reversal process */
 125     if(S->bitReverseFlagR == 1u)
 126     {
 127       arm_bitreversal_q15(pDst, S_CFFT->fftLen,
 128                           S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
 129     }
 130   }
 131   else
 132   {
 133     /* Calculation of RFFT of input */
 134
 135     /* Complex readix-4 FFT process */
 136     arm_radix4_butterfly_q15(pSrc, S_CFFT->fftLen,
 137                              S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);
 138
 139     /* Bit reversal process */
 140     if(S->bitReverseFlagR == 1u)
 141     {
 142       arm_bitreversal_q15(pSrc, S_CFFT->fftLen,
 143                           S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
 144     }
 145
 146     arm_split_rfft_q15(pSrc, S->fftLenBy2, S->pTwiddleAReal,
 147                        S->pTwiddleBReal, pDst, S->twidCoefRModifier);
 148   }
 149
 150 }
 151
 152   /**
 153    * @} end of RealFFT group
 154    */
 155
 156 /**
 157  * @brief  Core Real FFT process
 158  * @param  *pSrc                                points to the input buffer.
 159  * @param  fftLen                               length of FFT.
 160  * @param  *pATable                     points to the A twiddle Coef buffer.
 161  * @param  *pBTable                     points to the B twiddle Coef buffer.
 162  * @param  *pDst                                points to the output buffer.
 163  * @param  modifier             twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
 164  * @return none.
 165  * The function implements a Real FFT
 166  */
 167
 168 void arm_split_rfft_q15(
 169   q15_t * pSrc,
 170   uint32_t fftLen,
 171   q15_t * pATable,
 172   q15_t * pBTable,
 173   q15_t * pDst,
 174   uint32_t modifier)
 175 {
 176   uint32_t i;                                    /* Loop Counter */
 177   q31_t outR, outI;                              /* Temporary variables for output */
 178   q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
 179   q15_t *pSrc1, *pSrc2;
 180
 181
 182 //  pSrc[2u * fftLen] = pSrc[0];
 183 //  pSrc[(2u * fftLen) + 1u] = pSrc[1];
 184
 185   pCoefA = &pATable[modifier * 2u];
 186   pCoefB = &pBTable[modifier * 2u];
 187
 188   pSrc1 = &pSrc[2];
 189   pSrc2 = &pSrc[(2u * fftLen) - 2u];
 190
 191 #ifndef ARM_MATH_CM0_FAMILY
 192
 193   /* Run the below code for Cortex-M4 and Cortex-M3 */
 194
 195   i = 1u;
 196
 197   while(i < fftLen)
 198   {
 199     /*
 200        outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
 201        + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
 202        pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
 203      */
 204
 205     /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
 206        pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
 207        pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
 208
 209
 210 #ifndef ARM_MATH_BIG_ENDIAN
 211
 212     /* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */
 213     outR = __SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA));
 214
 215 #else
 216
 217     /* -(pSrc[2 * i + 1] * pATable[2 * i + 1] - pSrc[2 * i] * pATable[2 * i]) */
 218     outR = -(__SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA)));
 219
 220 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 221
 222     /* pSrc[2 * n - 2 * i] * pBTable[2 * i] +
 223        pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
 224     outR = __SMLAD(*__SIMD32(pSrc2), *__SIMD32(pCoefB), outR) >> 15u;
 225
 226     /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
 227        pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
 228
 229 #ifndef ARM_MATH_BIG_ENDIAN
 230
 231     outI = __SMUSDX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
 232
 233 #else
 234
 235     outI = __SMUSDX(*__SIMD32(pCoefB), *__SIMD32(pSrc2)--);
 236
 237 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 238
 239     /* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */
 240     outI = __SMLADX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), outI);
 241
 242     /* write output */
 243     pDst[2u * i] = (q15_t) outR;
 244     pDst[(2u * i) + 1u] = outI >> 15u;
 245
 246     /* write complex conjugate output */
 247     pDst[(4u * fftLen) - (2u * i)] = (q15_t) outR;
 248     pDst[((4u * fftLen) - (2u * i)) + 1u] = -(outI >> 15u);
 249
 250     /* update coefficient pointer */
 251     pCoefB = pCoefB + (2u * modifier);
 252     pCoefA = pCoefA + (2u * modifier);
 253
 254     i++;
 255
 256   }
 257
 258   pDst[2u * fftLen] = pSrc[0] - pSrc[1];
 259   pDst[(2u * fftLen) + 1u] = 0;
 260
 261   pDst[0] = pSrc[0] + pSrc[1];
 262   pDst[1] = 0;
 263
 264
 265 #else
 266
 267   /* Run the below code for Cortex-M0 */
 268
 269   i = 1u;
 270
 271   while(i < fftLen)
 272   {
 273     /*
 274        outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
 275        + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
 276        pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
 277      */
 278
 279     outR = *pSrc1 * *pCoefA;
 280     outR = outR - (*(pSrc1 + 1) * *(pCoefA + 1));
 281     outR = outR + (*pSrc2 * *pCoefB);
 282     outR = (outR + (*(pSrc2 + 1) * *(pCoefB + 1))) >> 15;
 283
 284
 285     /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
 286        pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
 287        pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
 288      */
 289
 290     outI = *pSrc2 * *(pCoefB + 1);
 291     outI = outI - (*(pSrc2 + 1) * *pCoefB);
 292     outI = outI + (*(pSrc1 + 1) * *pCoefA);
 293     outI = outI + (*pSrc1 * *(pCoefA + 1));
 294
 295     /* update input pointers */
 296     pSrc1 += 2u;
 297     pSrc2 -= 2u;
 298
 299     /* write output */
 300     pDst[2u * i] = (q15_t) outR;
 301     pDst[(2u * i) + 1u] = outI >> 15u;
 302
 303     /* write complex conjugate output */
 304     pDst[(4u * fftLen) - (2u * i)] = (q15_t) outR;
 305     pDst[((4u * fftLen) - (2u * i)) + 1u] = -(outI >> 15u);
 306
 307     /* update coefficient pointer */
 308     pCoefB = pCoefB + (2u * modifier);
 309     pCoefA = pCoefA + (2u * modifier);
 310
 311     i++;
 312
 313   }
 314
 315   pDst[2u * fftLen] = pSrc[0] - pSrc[1];
 316   pDst[(2u * fftLen) + 1u] = 0;
 317
 318   pDst[0] = pSrc[0] + pSrc[1];
 319   pDst[1] = 0;
 320
 321 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
 322
 323 }
 324
 325
 326 /**
 327  * @brief  Core Real IFFT process
 328  * @param[in]   *pSrc                           points to the input buffer.
 329  * @param[in]   fftLen                      length of FFT.
 330  * @param[in]   *pATable                        points to the twiddle Coef A buffer.
 331  * @param[in]   *pBTable                        points to the twiddle Coef B buffer.
 332  * @param[out]  *pDst                           points to the output buffer.
 333  * @param[in]   modifier                twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
 334  * @return none.
 335  * The function implements a Real IFFT
 336  */
 337 void arm_split_rifft_q15(
 338   q15_t * pSrc,
 339   uint32_t fftLen,
 340   q15_t * pATable,
 341   q15_t * pBTable,
 342   q15_t * pDst,
 343   uint32_t modifier)
 344 {
 345   uint32_t i;                                    /* Loop Counter */
 346   q31_t outR, outI;                              /* Temporary variables for output */
 347   q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
 348   q15_t *pSrc1, *pSrc2;
 349   q15_t *pDst1 = &pDst[0];
 350
 351   pCoefA = &pATable[0];
 352   pCoefB = &pBTable[0];
 353
 354   pSrc1 = &pSrc[0];
 355   pSrc2 = &pSrc[2u * fftLen];
 356
 357 #ifndef ARM_MATH_CM0_FAMILY
 358
 359   /* Run the below code for Cortex-M4 and Cortex-M3 */
 360
 361   i = fftLen;
 362
 363   while(i > 0u)
 364   {
 365
 366     /*
 367        outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
 368        pIn[2 * n - 2 * i] * pBTable[2 * i] -
 369        pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
 370
 371        outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
 372        pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
 373        pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
 374
 375      */
 376
 377
 378 #ifndef ARM_MATH_BIG_ENDIAN
 379
 380     /* pIn[2 * n - 2 * i] * pBTable[2 * i] -
 381        pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
 382     outR = __SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB));
 383
 384 #else
 385
 386     /* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] +
 387        pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */
 388     outR = -(__SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB)));
 389
 390 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 391
 392     /* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
 393        pIn[2 * n - 2 * i] * pBTable[2 * i] */
 394     outR = __SMLAD(*__SIMD32(pSrc1), *__SIMD32(pCoefA), outR) >> 15u;
 395
 396     /*
 397        -pIn[2 * n - 2 * i] * pBTable[2 * i + 1] +
 398        pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
 399     outI = __SMUADX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
 400
 401     /* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */
 402
 403 #ifndef ARM_MATH_BIG_ENDIAN
 404
 405     outI = __SMLSDX(*__SIMD32(pCoefA), *__SIMD32(pSrc1)++, -outI);
 406
 407 #else
 408
 409     outI = __SMLSDX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), -outI);
 410
 411 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 412     /* write output */
 413
 414 #ifndef ARM_MATH_BIG_ENDIAN
 415
 416     *__SIMD32(pDst1)++ = __PKHBT(outR, (outI >> 15u), 16);
 417
 418 #else
 419
 420     *__SIMD32(pDst1)++ = __PKHBT((outI >> 15u), outR, 16);
 421
 422 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 423
 424     /* update coefficient pointer */
 425     pCoefB = pCoefB + (2u * modifier);
 426     pCoefA = pCoefA + (2u * modifier);
 427
 428     i--;
 429
 430   }
 431
 432
 433 #else
 434
 435   /* Run the below code for Cortex-M0 */
 436
 437   i = fftLen;
 438
 439   while(i > 0u)
 440   {
 441
 442     /*
 443        outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
 444        pIn[2 * n - 2 * i] * pBTable[2 * i] -
 445        pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
 446      */
 447
 448     outR = *pSrc2 * *pCoefB;
 449     outR = outR - (*(pSrc2 + 1) * *(pCoefB + 1));
 450     outR = outR + (*pSrc1 * *pCoefA);
 451     outR = (outR + (*(pSrc1 + 1) * *(pCoefA + 1))) >> 15;
 452
 453     /*
 454        outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
 455        pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
 456        pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
 457      */
 458
 459     outI = *(pSrc1 + 1) * *pCoefA;
 460     outI = outI - (*pSrc1 * *(pCoefA + 1));
 461     outI = outI - (*pSrc2 * *(pCoefB + 1));
 462     outI = outI - (*(pSrc2 + 1) * *(pCoefB));
 463
 464     /* update input pointers */
 465     pSrc1 += 2u;
 466     pSrc2 -= 2u;
 467
 468     /* write output */
 469     *pDst1++ = (q15_t) outR;
 470     *pDst1++ = (q15_t) (outI >> 15);
 471
 472     /* update coefficient pointer */
 473     pCoefB = pCoefB + (2u * modifier);
 474     pCoefA = pCoefA + (2u * modifier);
 475
 476     i--;
 477
 478   }
 479
 480 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
 481
 482 }