1 #ifndef __INC_LIB8TION_MATH_H
2 #define __INC_LIB8TION_MATH_H
8 ///@defgroup Math Basic math operations
9 /// Fast, efficient 8-bit math functions specifically
10 /// designed for high-performance LED programming.
12 /// Because of the AVR(Arduino) and ARM assembly language
13 /// implementations provided, using these functions often
14 /// results in smaller and faster code than the equivalent
15 /// program using plain "C" arithmetic and logic.
19 /// add one byte to another, saturating at 0xFF
20 /// @param i - first byte to add
21 /// @param j - second byte to add
22 /// @returns the sum of i & j, capped at 0xFF
23 LIB8STATIC_ALWAYS_INLINE uint8_t qadd8( uint8_t i, uint8_t j)
29 #elif QADD8_AVRASM == 1
31 /* First, add j to i, conditioning the C flag */
34 /* Now test the C flag.
35 If C is clear, we branch around a load of 0xFF into i.
36 If C is set, we go ahead and load 0xFF into i.
44 #elif QADD8_ARM_DSP_ASM == 1
45 asm volatile( "uqadd8 %0, %0, %1" : "+r" (i) : "r" (j));
48 #error "No implementation for qadd8 available."
52 /// Add one byte to another, saturating at 0x7F
53 /// @param i - first byte to add
54 /// @param j - second byte to add
55 /// @returns the sum of i & j, capped at 0xFF
56 LIB8STATIC_ALWAYS_INLINE int8_t qadd7( int8_t i, int8_t j)
62 #elif QADD7_AVRASM == 1
64 /* First, add j to i, conditioning the V flag */
67 /* Now test the V flag.
68 If V is clear, we branch around a load of 0x7F into i.
69 If V is set, we go ahead and load 0x7F into i.
78 #elif QADD7_ARM_DSP_ASM == 1
79 asm volatile( "qadd8 %0, %0, %1" : "+r" (i) : "r" (j));
82 #error "No implementation for qadd7 available."
86 /// subtract one byte from another, saturating at 0x00
87 /// @returns i - j with a floor of 0
88 LIB8STATIC_ALWAYS_INLINE uint8_t qsub8( uint8_t i, uint8_t j)
94 #elif QSUB8_AVRASM == 1
97 /* First, subtract j from i, conditioning the C flag */
100 /* Now test the C flag.
101 If C is clear, we branch around a load of 0x00 into i.
102 If C is set, we go ahead and load 0x00 into i.
112 #error "No implementation for qsub8 available."
116 /// add one byte to another, with one byte result
117 LIB8STATIC_ALWAYS_INLINE uint8_t add8( uint8_t i, uint8_t j)
122 #elif ADD8_AVRASM == 1
123 // Add j to i, period.
124 asm volatile( "add %0, %1" : "+a" (i) : "a" (j));
127 #error "No implementation for add8 available."
131 /// add one byte to another, with one byte result
132 LIB8STATIC_ALWAYS_INLINE uint16_t add8to16( uint8_t i, uint16_t j)
137 #elif ADD8_AVRASM == 1
138 // Add i(one byte) to j(two bytes)
139 asm volatile( "add %A[j], %[i] \n\t"
140 "adc %B[j], __zero_reg__ \n\t"
146 #error "No implementation for add8to16 available."
151 /// subtract one byte from another, 8-bit result
152 LIB8STATIC_ALWAYS_INLINE uint8_t sub8( uint8_t i, uint8_t j)
157 #elif SUB8_AVRASM == 1
158 // Subtract j from i, period.
159 asm volatile( "sub %0, %1" : "+a" (i) : "a" (j));
162 #error "No implementation for sub8 available."
166 /// Calculate an integer average of two unsigned
167 /// 8-bit integer values (uint8_t).
168 /// Fractional results are rounded down, e.g. avg8(20,41) = 30
169 LIB8STATIC_ALWAYS_INLINE uint8_t avg8( uint8_t i, uint8_t j)
173 #elif AVG8_AVRASM == 1
175 /* First, add j to i, 9th bit overflows into C flag */
177 /* Divide by two, moving C flag into high 8th bit */
183 #error "No implementation for avg8 available."
187 /// Calculate an integer average of two unsigned
188 /// 16-bit integer values (uint16_t).
189 /// Fractional results are rounded down, e.g. avg16(20,41) = 30
190 LIB8STATIC_ALWAYS_INLINE uint16_t avg16( uint16_t i, uint16_t j)
193 return (uint32_t)((uint32_t)(i) + (uint32_t)(j)) >> 1;
194 #elif AVG16_AVRASM == 1
196 /* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
197 "add %A[i], %A[j] \n\t"
198 /* Now, add C + jHi to iHi, 17th bit overflows into C flag */
199 "adc %B[i], %B[j] \n\t"
200 /* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now in C */
202 /* Divide iLo by two, moving C flag into high 8th bit */
208 #error "No implementation for avg16 available."
213 /// Calculate an integer average of two signed 7-bit
214 /// integers (int8_t)
215 /// If the first argument is even, result is rounded down.
216 /// If the first argument is odd, result is result up.
217 LIB8STATIC_ALWAYS_INLINE int8_t avg7( int8_t i, int8_t j)
220 return ((i + j) >> 1) + (i & 0x1);
221 #elif AVG7_AVRASM == 1
230 #error "No implementation for avg7 available."
234 /// Calculate an integer average of two signed 15-bit
235 /// integers (int16_t)
236 /// If the first argument is even, result is rounded down.
237 /// If the first argument is odd, result is result up.
238 LIB8STATIC_ALWAYS_INLINE int16_t avg15( int16_t i, int16_t j)
241 return ((int32_t)((int32_t)(i) + (int32_t)(j)) >> 1) + (i & 0x1);
242 #elif AVG15_AVRASM == 1
244 /* first divide j by 2, throwing away lowest bit */
247 /* now divide i by 2, with lowest bit going into C */
251 "adc %A[i], %A[j] \n\t"
252 "adc %B[i], %B[j] \n\t"
257 #error "No implementation for avg15 available."
262 /// Calculate the remainder of one unsigned 8-bit
263 /// value divided by anoter, aka A % M.
264 /// Implemented by repeated subtraction, which is
265 /// very compact, and very fast if A is 'probably'
266 /// less than M. If A is a large multiple of M,
267 /// the loop has to execute multiple times. However,
268 /// even in that case, the loop is only two
269 /// instructions long on AVR, i.e., quick.
270 LIB8STATIC_ALWAYS_INLINE uint8_t mod8( uint8_t a, uint8_t m)
274 "L_%=: sub %[a],%[m] \n\t"
276 " add %[a],%[m] \n\t"
281 while( a >= m) a -= m;
286 /// Add two numbers, and calculate the modulo
287 /// of the sum and a third number, M.
288 /// In other words, it returns (A+B) % M.
289 /// It is designed as a compact mechanism for
290 /// incrementing a 'mode' switch and wrapping
291 /// around back to 'mode 0' when the switch
292 /// goes past the end of the available range.
293 /// e.g. if you have seven modes, this switches
294 /// to the next one and wraps around if needed:
295 /// mode = addmod8( mode, 1, 7);
296 ///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance.
297 LIB8STATIC uint8_t addmod8( uint8_t a, uint8_t b, uint8_t m)
301 " add %[a],%[b] \n\t"
302 "L_%=: sub %[a],%[m] \n\t"
304 " add %[a],%[m] \n\t"
306 : [b] "r" (b), [m] "r" (m)
310 while( a >= m) a -= m;
315 /// Subtract two numbers, and calculate the modulo
316 /// of the difference and a third number, M.
317 /// In other words, it returns (A-B) % M.
318 /// It is designed as a compact mechanism for
319 /// incrementing a 'mode' switch and wrapping
320 /// around back to 'mode 0' when the switch
321 /// goes past the end of the available range.
322 /// e.g. if you have seven modes, this switches
323 /// to the next one and wraps around if needed:
324 /// mode = addmod8( mode, 1, 7);
325 ///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance.
326 LIB8STATIC uint8_t submod8( uint8_t a, uint8_t b, uint8_t m)
330 " sub %[a],%[b] \n\t"
331 "L_%=: sub %[a],%[m] \n\t"
333 " add %[a],%[m] \n\t"
335 : [b] "r" (b), [m] "r" (m)
339 while( a >= m) a -= m;
344 /// 8x8 bit multiplication, with 8 bit result
345 LIB8STATIC_ALWAYS_INLINE uint8_t mul8( uint8_t i, uint8_t j)
348 return ((uint16_t)i * (uint16_t)(j) ) & 0xFF;
349 #elif MUL8_AVRASM == 1
351 /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
353 /* Extract the LOW 8-bits (r0) */
355 /* Restore r1 to "0"; it's expected to always be that */
356 "clr __zero_reg__ \n\t"
363 #error "No implementation for mul8 available."
368 /// saturating 8x8 bit multiplication, with 8 bit result
369 /// @returns the product of i * j, capping at 0xFF
370 LIB8STATIC_ALWAYS_INLINE uint8_t qmul8( uint8_t i, uint8_t j)
373 int p = ((uint16_t)i * (uint16_t)(j) );
374 if( p > 255) p = 255;
376 #elif QMUL8_AVRASM == 1
378 /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
380 /* If high byte of result is zero, all is well. */
382 " breq Lnospill_%= \n\t"
383 /* If high byte of result > 0, saturate low byte to 0xFF */
385 " rjmp Ldone_%= \n\t"
387 /* Extract the LOW 8-bits (r0) */
390 /* Restore r1 to "0"; it's expected to always be that */
391 " clr __zero_reg__ \n\t"
398 #error "No implementation for qmul8 available."
403 /// take abs() of a signed 8-bit uint8_t
404 LIB8STATIC_ALWAYS_INLINE int8_t abs8( int8_t i)
409 #elif ABS8_AVRASM == 1
413 /* First, check the high bit, and prepare to skip if it's clear */
416 /* Negate the value */
419 : "+r" (i) : "r" (i) );
422 #error "No implementation for abs8 available."
426 /// square root for 16-bit integers
427 /// About three times faster and five times smaller
428 /// than Arduino's general sqrt on AVR.
429 LIB8STATIC uint8_t sqrt16(uint16_t x)
435 uint8_t low = 1; // lower bound
441 hi = (x >> 5) + 8; // initial estimate for upper bound
445 mid = (low + hi) >> 1;
446 if ((uint16_t)(mid * mid) > x) {
459 /// blend a variable proproportion(0-255) of one byte to another
460 /// @param a - the starting byte value
461 /// @param b - the byte value to blend toward
462 /// @param amountOfB - the proportion (0-255) of b to blend
463 /// @returns a byte value between a and b, inclusive
464 #if (FASTLED_BLEND_FIXED == 1)
465 LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB)
471 uint8_t amountOfA = 255 - amountOfB;
473 partial = (a * amountOfA);
474 #if (FASTLED_SCALE8_FIXED == 1)
476 //partial = add8to16( a, partial);
479 partial += (b * amountOfB);
480 #if (FASTLED_SCALE8_FIXED == 1)
482 //partial = add8to16( b, partial);
485 result = partial >> 8;
489 #elif BLEND8_AVRASM == 1
494 /* partial = b * amountOfB */
495 " mul %[b], %[amountOfB] \n\t"
496 " movw %A[partial], r0 \n\t"
498 /* amountOfB (aka amountOfA) = 255 - amountOfB */
499 " com %[amountOfB] \n\t"
501 /* partial += a * amountOfB (aka amountOfA) */
502 " mul %[a], %[amountOfB] \n\t"
504 " add %A[partial], r0 \n\t"
505 " adc %B[partial], r1 \n\t"
507 " clr __zero_reg__ \n\t"
509 #if (FASTLED_SCALE8_FIXED == 1)
511 " add %A[partial], %[a] \n\t"
512 " adc %B[partial], __zero_reg__ \n\t"
515 " add %A[partial], %[b] \n\t"
516 " adc %B[partial], __zero_reg__ \n\t"
519 : [partial] "=r" (partial),
520 [amountOfB] "+a" (amountOfB)
526 result = partial >> 8;
531 #error "No implementation for blend8 available."
536 LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB)
538 // This version loses precision in the integer math
539 // and can actually return results outside of the range
540 // from a to b. Its use is not recommended.
542 uint8_t amountOfA = 255 - amountOfB;
543 result = scale8_LEAVING_R1_DIRTY( a, amountOfA)
544 + scale8_LEAVING_R1_DIRTY( b, amountOfB);