1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
4 * $Date: 17. January 2013
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_decimate_q15.c
10 * Description: Q15 FIR Decimator.
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
44 * @ingroup groupFilters
48 * @addtogroup FIR_decimate
53 * @brief Processing function for the Q15 FIR decimator.
54 * @param[in] *S points to an instance of the Q15 FIR decimator structure.
55 * @param[in] *pSrc points to the block of input data.
56 * @param[out] *pDst points to the location where the output result is written.
57 * @param[in] blockSize number of input samples to process per call.
60 * <b>Scaling and Overflow Behavior:</b>
62 * The function is implemented using a 64-bit internal accumulator.
63 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
64 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
65 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
66 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
67 * Lastly, the accumulator is saturated to yield a result in 1.15 format.
70 * Refer to the function <code>arm_fir_decimate_fast_q15()</code> for a faster but less precise implementation of this function for Cortex-M3 and Cortex-M4.
73 #ifndef ARM_MATH_CM0_FAMILY
75 #ifndef UNALIGNED_SUPPORT_DISABLE
77 void arm_fir_decimate_q15(
78 const arm_fir_decimate_instance_q15 * S,
83 q15_t *pState = S->pState; /* State pointer */
84 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
85 q15_t *pStateCurnt; /* Points to the current sample of the state */
86 q15_t *px; /* Temporary pointer for state buffer */
87 q15_t *pb; /* Temporary pointer coefficient buffer */
88 q31_t x0, x1, c0, c1; /* Temporary variables to hold state and coefficient values */
89 q63_t sum0; /* Accumulators */
93 uint32_t numTaps = S->numTaps; /* Number of taps */
94 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
97 /* S->pState buffer contains previous frame (numTaps - 1) samples */
98 /* pStateCurnt points to the location where the new input data should be written */
99 pStateCurnt = S->pState + (numTaps - 1u);
102 /* Total number of output samples to be computed */
103 blkCnt = outBlockSize / 2;
104 blkCntN3 = outBlockSize - (2 * blkCnt);
109 /* Copy decimation factor number of new input samples into the state buffer */
114 *pStateCurnt++ = *pSrc++;
118 /* Set accumulator to zero */
122 /* Initialize state pointer */
128 /* Initialize coeff pointer */
131 /* Loop unrolling. Process 4 taps at a time. */
132 tapCnt = numTaps >> 2;
134 /* Loop over the number of taps. Unroll by a factor of 4.
135 ** Repeat until we've computed numTaps-4 coefficients. */
138 /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */
139 c0 = *__SIMD32(pb)++;
141 /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
142 x0 = *__SIMD32(px0)++;
144 x1 = *__SIMD32(px1)++;
146 /* Perform the multiply-accumulate */
147 acc0 = __SMLALD(x0, c0, acc0);
149 acc1 = __SMLALD(x1, c0, acc1);
151 /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
152 c0 = *__SIMD32(pb)++;
154 /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
155 x0 = *__SIMD32(px0)++;
157 x1 = *__SIMD32(px1)++;
159 /* Perform the multiply-accumulate */
160 acc0 = __SMLALD(x0, c0, acc0);
162 acc1 = __SMLALD(x1, c0, acc1);
164 /* Decrement the loop counter */
168 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
169 tapCnt = numTaps % 0x4u;
173 /* Read coefficients */
176 /* Fetch 1 state variable */
181 /* Perform the multiply-accumulate */
182 acc0 = __SMLALD(x0, c0, acc0);
183 acc1 = __SMLALD(x1, c0, acc1);
185 /* Decrement the loop counter */
189 /* Advance the state pointer by the decimation factor
190 * to process the next group of decimation factor number samples */
191 pState = pState + S->M * 2;
193 /* Store filter output, smlad returns the values in 2.14 format */
194 /* so downsacle by 15 to get output in 1.15 */
195 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
196 *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
198 /* Decrement the loop counter */
206 /* Copy decimation factor number of new input samples into the state buffer */
211 *pStateCurnt++ = *pSrc++;
218 /* Initialize state pointer */
221 /* Initialize coeff pointer */
224 /* Loop unrolling. Process 4 taps at a time. */
225 tapCnt = numTaps >> 2;
227 /* Loop over the number of taps. Unroll by a factor of 4.
228 ** Repeat until we've computed numTaps-4 coefficients. */
231 /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */
232 c0 = *__SIMD32(pb)++;
234 /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
235 x0 = *__SIMD32(px)++;
237 /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
238 c1 = *__SIMD32(pb)++;
240 /* Perform the multiply-accumulate */
241 sum0 = __SMLALD(x0, c0, sum0);
243 /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
244 x0 = *__SIMD32(px)++;
246 /* Perform the multiply-accumulate */
247 sum0 = __SMLALD(x0, c1, sum0);
249 /* Decrement the loop counter */
253 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
254 tapCnt = numTaps % 0x4u;
258 /* Read coefficients */
261 /* Fetch 1 state variable */
264 /* Perform the multiply-accumulate */
265 sum0 = __SMLALD(x0, c0, sum0);
267 /* Decrement the loop counter */
271 /* Advance the state pointer by the decimation factor
272 * to process the next group of decimation factor number samples */
273 pState = pState + S->M;
275 /* Store filter output, smlad returns the values in 2.14 format */
276 /* so downsacle by 15 to get output in 1.15 */
277 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
279 /* Decrement the loop counter */
283 /* Processing is complete.
284 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
285 ** This prepares the state buffer for the next function call. */
287 /* Points to the start of the state buffer */
288 pStateCurnt = S->pState;
290 i = (numTaps - 1u) >> 2u;
295 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
296 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
298 /* Decrement the loop counter */
302 i = (numTaps - 1u) % 0x04u;
307 *pStateCurnt++ = *pState++;
309 /* Decrement the loop counter */
317 void arm_fir_decimate_q15(
318 const arm_fir_decimate_instance_q15 * S,
323 q15_t *pState = S->pState; /* State pointer */
324 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
325 q15_t *pStateCurnt; /* Points to the current sample of the state */
326 q15_t *px; /* Temporary pointer for state buffer */
327 q15_t *pb; /* Temporary pointer coefficient buffer */
328 q15_t x0, x1, c0; /* Temporary variables to hold state and coefficient values */
329 q63_t sum0; /* Accumulators */
333 uint32_t numTaps = S->numTaps; /* Number of taps */
334 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
337 /* S->pState buffer contains previous frame (numTaps - 1) samples */
338 /* pStateCurnt points to the location where the new input data should be written */
339 pStateCurnt = S->pState + (numTaps - 1u);
342 /* Total number of output samples to be computed */
343 blkCnt = outBlockSize / 2;
344 blkCntN3 = outBlockSize - (2 * blkCnt);
348 /* Copy decimation factor number of new input samples into the state buffer */
353 *pStateCurnt++ = *pSrc++;
357 /* Set accumulator to zero */
361 /* Initialize state pointer */
367 /* Initialize coeff pointer */
370 /* Loop unrolling. Process 4 taps at a time. */
371 tapCnt = numTaps >> 2;
373 /* Loop over the number of taps. Unroll by a factor of 4.
374 ** Repeat until we've computed numTaps-4 coefficients. */
377 /* Read the Read b[numTaps-1] coefficients */
380 /* Read x[n-numTaps-1] for sample 0 and for sample 1 */
384 /* Perform the multiply-accumulate */
388 /* Read the b[numTaps-2] coefficient */
391 /* Read x[n-numTaps-2] for sample 0 and sample 1 */
395 /* Perform the multiply-accumulate */
399 /* Read the b[numTaps-3] coefficients */
402 /* Read x[n-numTaps-3] for sample 0 and sample 1 */
406 /* Perform the multiply-accumulate */
410 /* Read the b[numTaps-4] coefficient */
413 /* Read x[n-numTaps-4] for sample 0 and sample 1 */
417 /* Perform the multiply-accumulate */
421 /* Decrement the loop counter */
425 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
426 tapCnt = numTaps % 0x4u;
430 /* Read coefficients */
433 /* Fetch 1 state variable */
437 /* Perform the multiply-accumulate */
441 /* Decrement the loop counter */
445 /* Advance the state pointer by the decimation factor
446 * to process the next group of decimation factor number samples */
447 pState = pState + S->M * 2;
449 /* Store filter output, smlad returns the values in 2.14 format */
450 /* so downsacle by 15 to get output in 1.15 */
452 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
453 *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
455 /* Decrement the loop counter */
461 /* Copy decimation factor number of new input samples into the state buffer */
466 *pStateCurnt++ = *pSrc++;
473 /* Initialize state pointer */
476 /* Initialize coeff pointer */
479 /* Loop unrolling. Process 4 taps at a time. */
480 tapCnt = numTaps >> 2;
482 /* Loop over the number of taps. Unroll by a factor of 4.
483 ** Repeat until we've computed numTaps-4 coefficients. */
486 /* Read the Read b[numTaps-1] coefficients */
489 /* Read x[n-numTaps-1] and sample */
492 /* Perform the multiply-accumulate */
495 /* Read the b[numTaps-2] coefficient */
498 /* Read x[n-numTaps-2] and sample */
501 /* Perform the multiply-accumulate */
504 /* Read the b[numTaps-3] coefficients */
507 /* Read x[n-numTaps-3] sample */
510 /* Perform the multiply-accumulate */
513 /* Read the b[numTaps-4] coefficient */
516 /* Read x[n-numTaps-4] sample */
519 /* Perform the multiply-accumulate */
522 /* Decrement the loop counter */
526 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
527 tapCnt = numTaps % 0x4u;
531 /* Read coefficients */
534 /* Fetch 1 state variable */
537 /* Perform the multiply-accumulate */
540 /* Decrement the loop counter */
544 /* Advance the state pointer by the decimation factor
545 * to process the next group of decimation factor number samples */
546 pState = pState + S->M;
548 /* Store filter output, smlad returns the values in 2.14 format */
549 /* so downsacle by 15 to get output in 1.15 */
550 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
552 /* Decrement the loop counter */
556 /* Processing is complete.
557 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
558 ** This prepares the state buffer for the next function call. */
560 /* Points to the start of the state buffer */
561 pStateCurnt = S->pState;
563 i = (numTaps - 1u) >> 2u;
568 *pStateCurnt++ = *pState++;
569 *pStateCurnt++ = *pState++;
570 *pStateCurnt++ = *pState++;
571 *pStateCurnt++ = *pState++;
573 /* Decrement the loop counter */
577 i = (numTaps - 1u) % 0x04u;
582 *pStateCurnt++ = *pState++;
584 /* Decrement the loop counter */
590 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
595 void arm_fir_decimate_q15(
596 const arm_fir_decimate_instance_q15 * S,
601 q15_t *pState = S->pState; /* State pointer */
602 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
603 q15_t *pStateCurnt; /* Points to the current sample of the state */
604 q15_t *px; /* Temporary pointer for state buffer */
605 q15_t *pb; /* Temporary pointer coefficient buffer */
606 q31_t x0, c0; /* Temporary variables to hold state and coefficient values */
607 q63_t sum0; /* Accumulators */
608 uint32_t numTaps = S->numTaps; /* Number of taps */
609 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
613 /* Run the below code for Cortex-M0 */
615 /* S->pState buffer contains previous frame (numTaps - 1) samples */
616 /* pStateCurnt points to the location where the new input data should be written */
617 pStateCurnt = S->pState + (numTaps - 1u);
619 /* Total number of output samples to be computed */
620 blkCnt = outBlockSize;
624 /* Copy decimation factor number of new input samples into the state buffer */
629 *pStateCurnt++ = *pSrc++;
636 /* Initialize state pointer */
639 /* Initialize coeff pointer */
646 /* Read coefficients */
649 /* Fetch 1 state variable */
652 /* Perform the multiply-accumulate */
653 sum0 += (q31_t) x0 *c0;
655 /* Decrement the loop counter */
659 /* Advance the state pointer by the decimation factor
660 * to process the next group of decimation factor number samples */
661 pState = pState + S->M;
663 /*Store filter output , smlad will return the values in 2.14 format */
664 /* so downsacle by 15 to get output in 1.15 */
665 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
667 /* Decrement the loop counter */
671 /* Processing is complete.
672 ** Now copy the last numTaps - 1 samples to the start of the state buffer.
673 ** This prepares the state buffer for the next function call. */
675 /* Points to the start of the state buffer */
676 pStateCurnt = S->pState;
683 *pStateCurnt++ = *pState++;
685 /* Decrement the loop counter */
691 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
695 * @} end of FIR_decimate group