lib/lib8tion/math8.h

   1 #ifndef __INC_LIB8TION_MATH_H
   2 #define __INC_LIB8TION_MATH_H
   3
   4 #include "scale8.h"
   5
   6 ///@ingroup lib8tion
   7
   8 ///@defgroup Math Basic math operations
   9 /// Fast, efficient 8-bit math functions specifically
  10 /// designed for high-performance LED programming.
  11 ///
  12 /// Because of the AVR(Arduino) and ARM assembly language
  13 /// implementations provided, using these functions often
  14 /// results in smaller and faster code than the equivalent
  15 /// program using plain "C" arithmetic and logic.
  16 ///@{
  17
  18
  19 /// add one byte to another, saturating at 0xFF
  20 /// @param i - first byte to add
  21 /// @param j - second byte to add
  22 /// @returns the sum of i & j, capped at 0xFF
  23 LIB8STATIC_ALWAYS_INLINE uint8_t qadd8( uint8_t i, uint8_t j)
  24 {
  25 #if QADD8_C == 1
  26     uint16_t t = i + j;
  27     if (t > 255) t = 255;
  28     return t;
  29 #elif QADD8_AVRASM == 1
  30     asm volatile(
  31          /* First, add j to i, conditioning the C flag */
  32          "add %0, %1    \n\t"
  33
  34          /* Now test the C flag.
  35            If C is clear, we branch around a load of 0xFF into i.
  36            If C is set, we go ahead and load 0xFF into i.
  37          */
  38          "brcc L_%=     \n\t"
  39          "ldi %0, 0xFF  \n\t"
  40          "L_%=: "
  41          : "+a" (i)
  42          : "a"  (j) );
  43     return i;
  44 #elif QADD8_ARM_DSP_ASM == 1
  45     asm volatile( "uqadd8 %0, %0, %1" : "+r" (i) : "r" (j));
  46     return i;
  47 #else
  48 #error "No implementation for qadd8 available."
  49 #endif
  50 }
  51
  52 /// Add one byte to another, saturating at 0x7F
  53 /// @param i - first byte to add
  54 /// @param j - second byte to add
  55 /// @returns the sum of i & j, capped at 0xFF
  56 LIB8STATIC_ALWAYS_INLINE int8_t qadd7( int8_t i, int8_t j)
  57 {
  58 #if QADD7_C == 1
  59     int16_t t = i + j;
  60     if (t > 127) t = 127;
  61     return t;
  62 #elif QADD7_AVRASM == 1
  63     asm volatile(
  64          /* First, add j to i, conditioning the V flag */
  65          "add %0, %1    \n\t"
  66
  67          /* Now test the V flag.
  68           If V is clear, we branch around a load of 0x7F into i.
  69           If V is set, we go ahead and load 0x7F into i.
  70           */
  71          "brvc L_%=     \n\t"
  72          "ldi %0, 0x7F  \n\t"
  73          "L_%=: "
  74          : "+a" (i)
  75          : "a"  (j) );
  76
  77     return i;
  78 #elif QADD7_ARM_DSP_ASM == 1
  79     asm volatile( "qadd8 %0, %0, %1" : "+r" (i) : "r" (j));
  80     return i;
  81 #else
  82 #error "No implementation for qadd7 available."
  83 #endif
  84 }
  85
  86 /// subtract one byte from another, saturating at 0x00
  87 /// @returns i - j with a floor of 0
  88 LIB8STATIC_ALWAYS_INLINE uint8_t qsub8( uint8_t i, uint8_t j)
  89 {
  90 #if QSUB8_C == 1
  91     int16_t t = i - j;
  92     if (t < 0) t = 0;
  93     return t;
  94 #elif QSUB8_AVRASM == 1
  95
  96     asm volatile(
  97          /* First, subtract j from i, conditioning the C flag */
  98          "sub %0, %1    \n\t"
  99
 100          /* Now test the C flag.
 101           If C is clear, we branch around a load of 0x00 into i.
 102           If C is set, we go ahead and load 0x00 into i.
 103           */
 104          "brcc L_%=     \n\t"
 105          "ldi %0, 0x00  \n\t"
 106          "L_%=: "
 107          : "+a" (i)
 108          : "a"  (j) );
 109
 110     return i;
 111 #else
 112 #error "No implementation for qsub8 available."
 113 #endif
 114 }
 115
 116 /// add one byte to another, with one byte result
 117 LIB8STATIC_ALWAYS_INLINE uint8_t add8( uint8_t i, uint8_t j)
 118 {
 119 #if ADD8_C == 1
 120     uint16_t t = i + j;
 121     return t;
 122 #elif ADD8_AVRASM == 1
 123     // Add j to i, period.
 124     asm volatile( "add %0, %1" : "+a" (i) : "a" (j));
 125     return i;
 126 #else
 127 #error "No implementation for add8 available."
 128 #endif
 129 }
 130
 131 /// add one byte to another, with one byte result
 132 LIB8STATIC_ALWAYS_INLINE uint16_t add8to16( uint8_t i, uint16_t j)
 133 {
 134 #if ADD8_C == 1
 135     uint16_t t = i + j;
 136     return t;
 137 #elif ADD8_AVRASM == 1
 138     // Add i(one byte) to j(two bytes)
 139     asm volatile( "add %A[j], %[i]              \n\t"
 140                   "adc %B[j], __zero_reg__      \n\t"
 141                  : [j] "+a" (j)
 142                  : [i] "a"  (i)
 143                  );
 144     return i;
 145 #else
 146 #error "No implementation for add8to16 available."
 147 #endif
 148 }
 149
 150
 151 /// subtract one byte from another, 8-bit result
 152 LIB8STATIC_ALWAYS_INLINE uint8_t sub8( uint8_t i, uint8_t j)
 153 {
 154 #if SUB8_C == 1
 155     int16_t t = i - j;
 156     return t;
 157 #elif SUB8_AVRASM == 1
 158     // Subtract j from i, period.
 159     asm volatile( "sub %0, %1" : "+a" (i) : "a" (j));
 160     return i;
 161 #else
 162 #error "No implementation for sub8 available."
 163 #endif
 164 }
 165
 166 /// Calculate an integer average of two unsigned
 167 ///       8-bit integer values (uint8_t).
 168 ///       Fractional results are rounded down, e.g. avg8(20,41) = 30
 169 LIB8STATIC_ALWAYS_INLINE uint8_t avg8( uint8_t i, uint8_t j)
 170 {
 171 #if AVG8_C == 1
 172     return (i + j) >> 1;
 173 #elif AVG8_AVRASM == 1
 174     asm volatile(
 175          /* First, add j to i, 9th bit overflows into C flag */
 176          "add %0, %1    \n\t"
 177          /* Divide by two, moving C flag into high 8th bit */
 178          "ror %0        \n\t"
 179          : "+a" (i)
 180          : "a"  (j) );
 181     return i;
 182 #else
 183 #error "No implementation for avg8 available."
 184 #endif
 185 }
 186
 187 /// Calculate an integer average of two unsigned
 188 ///       16-bit integer values (uint16_t).
 189 ///       Fractional results are rounded down, e.g. avg16(20,41) = 30
 190 LIB8STATIC_ALWAYS_INLINE uint16_t avg16( uint16_t i, uint16_t j)
 191 {
 192 #if AVG16_C == 1
 193     return (uint32_t)((uint32_t)(i) + (uint32_t)(j)) >> 1;
 194 #elif AVG16_AVRASM == 1
 195     asm volatile(
 196                  /* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
 197                  "add %A[i], %A[j]    \n\t"
 198                  /* Now, add C + jHi to iHi, 17th bit overflows into C flag */
 199                  "adc %B[i], %B[j]    \n\t"
 200                  /* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now in C */
 201                  "ror %B[i]        \n\t"
 202                  /* Divide iLo by two, moving C flag into high 8th bit */
 203                  "ror %A[i]        \n\t"
 204                  : [i] "+a" (i)
 205                  : [j] "a"  (j) );
 206     return i;
 207 #else
 208 #error "No implementation for avg16 available."
 209 #endif
 210 }
 211
 212
 213 /// Calculate an integer average of two signed 7-bit
 214 ///       integers (int8_t)
 215 ///       If the first argument is even, result is rounded down.
 216 ///       If the first argument is odd, result is result up.
 217 LIB8STATIC_ALWAYS_INLINE int8_t avg7( int8_t i, int8_t j)
 218 {
 219 #if AVG7_C == 1
 220     return ((i + j) >> 1) + (i & 0x1);
 221 #elif AVG7_AVRASM == 1
 222     asm volatile(
 223                  "asr %1        \n\t"
 224                  "asr %0        \n\t"
 225                  "adc %0, %1    \n\t"
 226                  : "+a" (i)
 227                  : "a"  (j) );
 228     return i;
 229 #else
 230 #error "No implementation for avg7 available."
 231 #endif
 232 }
 233
 234 /// Calculate an integer average of two signed 15-bit
 235 ///       integers (int16_t)
 236 ///       If the first argument is even, result is rounded down.
 237 ///       If the first argument is odd, result is result up.
 238 LIB8STATIC_ALWAYS_INLINE int16_t avg15( int16_t i, int16_t j)
 239 {
 240 #if AVG15_C == 1
 241     return ((int32_t)((int32_t)(i) + (int32_t)(j)) >> 1) + (i & 0x1);
 242 #elif AVG15_AVRASM == 1
 243     asm volatile(
 244                  /* first divide j by 2, throwing away lowest bit */
 245                  "asr %B[j]          \n\t"
 246                  "ror %A[j]          \n\t"
 247                  /* now divide i by 2, with lowest bit going into C */
 248                  "asr %B[i]          \n\t"
 249                  "ror %A[i]          \n\t"
 250                  /* add j + C to i */
 251                  "adc %A[i], %A[j]   \n\t"
 252                  "adc %B[i], %B[j]   \n\t"
 253                  : [i] "+a" (i)
 254                  : [j] "a"  (j) );
 255     return i;
 256 #else
 257 #error "No implementation for avg15 available."
 258 #endif
 259 }
 260
 261
 262 ///       Calculate the remainder of one unsigned 8-bit
 263 ///       value divided by anoter, aka A % M.
 264 ///       Implemented by repeated subtraction, which is
 265 ///       very compact, and very fast if A is 'probably'
 266 ///       less than M.  If A is a large multiple of M,
 267 ///       the loop has to execute multiple times.  However,
 268 ///       even in that case, the loop is only two
 269 ///       instructions long on AVR, i.e., quick.
 270 LIB8STATIC_ALWAYS_INLINE uint8_t mod8( uint8_t a, uint8_t m)
 271 {
 272 #if defined(__AVR__)
 273     asm volatile (
 274                   "L_%=:  sub %[a],%[m]    \n\t"
 275                   "       brcc L_%=        \n\t"
 276                   "       add %[a],%[m]    \n\t"
 277                   : [a] "+r" (a)
 278                   : [m] "r"  (m)
 279                   );
 280 #else
 281     while( a >= m) a -= m;
 282 #endif
 283     return a;
 284 }
 285
 286 ///          Add two numbers, and calculate the modulo
 287 ///          of the sum and a third number, M.
 288 ///          In other words, it returns (A+B) % M.
 289 ///          It is designed as a compact mechanism for
 290 ///          incrementing a 'mode' switch and wrapping
 291 ///          around back to 'mode 0' when the switch
 292 ///          goes past the end of the available range.
 293 ///          e.g. if you have seven modes, this switches
 294 ///          to the next one and wraps around if needed:
 295 ///            mode = addmod8( mode, 1, 7);
 296 ///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance.
 297 LIB8STATIC uint8_t addmod8( uint8_t a, uint8_t b, uint8_t m)
 298 {
 299 #if defined(__AVR__)
 300     asm volatile (
 301                   "       add %[a],%[b]    \n\t"
 302                   "L_%=:  sub %[a],%[m]    \n\t"
 303                   "       brcc L_%=        \n\t"
 304                   "       add %[a],%[m]    \n\t"
 305                   : [a] "+r" (a)
 306                   : [b] "r"  (b), [m] "r" (m)
 307                   );
 308 #else
 309     a += b;
 310     while( a >= m) a -= m;
 311 #endif
 312     return a;
 313 }
 314
 315 ///          Subtract two numbers, and calculate the modulo
 316 ///          of the difference and a third number, M.
 317 ///          In other words, it returns (A-B) % M.
 318 ///          It is designed as a compact mechanism for
 319 ///          incrementing a 'mode' switch and wrapping
 320 ///          around back to 'mode 0' when the switch
 321 ///          goes past the end of the available range.
 322 ///          e.g. if you have seven modes, this switches
 323 ///          to the next one and wraps around if needed:
 324 ///            mode = addmod8( mode, 1, 7);
 325 ///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance.
 326 LIB8STATIC uint8_t submod8( uint8_t a, uint8_t b, uint8_t m)
 327 {
 328 #if defined(__AVR__)
 329     asm volatile (
 330                   "       sub %[a],%[b]    \n\t"
 331                   "L_%=:  sub %[a],%[m]    \n\t"
 332                   "       brcc L_%=        \n\t"
 333                   "       add %[a],%[m]    \n\t"
 334                   : [a] "+r" (a)
 335                   : [b] "r"  (b), [m] "r" (m)
 336                   );
 337 #else
 338     a -= b;
 339     while( a >= m) a -= m;
 340 #endif
 341     return a;
 342 }
 343
 344 /// 8x8 bit multiplication, with 8 bit result
 345 LIB8STATIC_ALWAYS_INLINE uint8_t mul8( uint8_t i, uint8_t j)
 346 {
 347 #if MUL8_C == 1
 348     return ((uint16_t)i * (uint16_t)(j) ) & 0xFF;
 349 #elif MUL8_AVRASM == 1
 350     asm volatile(
 351          /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
 352          "mul %0, %1          \n\t"
 353          /* Extract the LOW 8-bits (r0) */
 354          "mov %0, r0          \n\t"
 355          /* Restore r1 to "0"; it's expected to always be that */
 356          "clr __zero_reg__    \n\t"
 357          : "+a" (i)
 358          : "a"  (j)
 359          : "r0", "r1");
 360
 361     return i;
 362 #else
 363 #error "No implementation for mul8 available."
 364 #endif
 365 }
 366
 367
 368 /// saturating 8x8 bit multiplication, with 8 bit result
 369 /// @returns the product of i * j, capping at 0xFF
 370 LIB8STATIC_ALWAYS_INLINE uint8_t qmul8( uint8_t i, uint8_t j)
 371 {
 372 #if QMUL8_C == 1
 373     int p = ((uint16_t)i * (uint16_t)(j) );
 374     if( p > 255) p = 255;
 375     return p;
 376 #elif QMUL8_AVRASM == 1
 377     asm volatile(
 378                  /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
 379                  "  mul %0, %1          \n\t"
 380                  /* If high byte of result is zero, all is well. */
 381                  "  tst r1              \n\t"
 382                  "  breq Lnospill_%=    \n\t"
 383                  /* If high byte of result > 0, saturate low byte to 0xFF */
 384                  "  ldi %0,0xFF         \n\t"
 385                  "  rjmp Ldone_%=       \n\t"
 386                  "Lnospill_%=:          \n\t"
 387                  /* Extract the LOW 8-bits (r0) */
 388                  "  mov %0, r0          \n\t"
 389                  "Ldone_%=:             \n\t"
 390                  /* Restore r1 to "0"; it's expected to always be that */
 391                  "  clr __zero_reg__    \n\t"
 392                  : "+a" (i)
 393                  : "a"  (j)
 394                  : "r0", "r1");
 395
 396     return i;
 397 #else
 398 #error "No implementation for qmul8 available."
 399 #endif
 400 }
 401
 402
 403 /// take abs() of a signed 8-bit uint8_t
 404 LIB8STATIC_ALWAYS_INLINE int8_t abs8( int8_t i)
 405 {
 406 #if ABS8_C == 1
 407     if( i < 0) i = -i;
 408     return i;
 409 #elif ABS8_AVRASM == 1
 410
 411
 412     asm volatile(
 413          /* First, check the high bit, and prepare to skip if it's clear */
 414          "sbrc %0, 7 \n"
 415
 416          /* Negate the value */
 417          "neg %0     \n"
 418
 419          : "+r" (i) : "r" (i) );
 420     return i;
 421 #else
 422 #error "No implementation for abs8 available."
 423 #endif
 424 }
 425
 426 ///         square root for 16-bit integers
 427 ///         About three times faster and five times smaller
 428 ///         than Arduino's general sqrt on AVR.
 429 LIB8STATIC uint8_t sqrt16(uint16_t x)
 430 {
 431     if( x <= 1) {
 432         return x;
 433     }
 434
 435     uint8_t low = 1; // lower bound
 436     uint8_t hi, mid;
 437
 438     if( x > 7904) {
 439         hi = 255;
 440     } else {
 441         hi = (x >> 5) + 8; // initial estimate for upper bound
 442     }
 443
 444     do {
 445         mid = (low + hi) >> 1;
 446         if ((uint16_t)(mid * mid) > x) {
 447             hi = mid - 1;
 448         } else {
 449             if( mid == 255) {
 450                 return 255;
 451             }
 452             low = mid + 1;
 453         }
 454     } while (hi >= low);
 455
 456     return low - 1;
 457 }
 458
 459 /// blend a variable proproportion(0-255) of one byte to another
 460 /// @param a - the starting byte value
 461 /// @param b - the byte value to blend toward
 462 /// @param amountOfB - the proportion (0-255) of b to blend
 463 /// @returns a byte value between a and b, inclusive
 464 #if (FASTLED_BLEND_FIXED == 1)
 465 LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB)
 466 {
 467 #if BLEND8_C == 1
 468     uint16_t partial;
 469     uint8_t result;
 470
 471     uint8_t amountOfA = 255 - amountOfB;
 472
 473     partial = (a * amountOfA);
 474 #if (FASTLED_SCALE8_FIXED == 1)
 475     partial += a;
 476     //partial = add8to16( a, partial);
 477 #endif
 478
 479     partial += (b * amountOfB);
 480 #if (FASTLED_SCALE8_FIXED == 1)
 481     partial += b;
 482     //partial = add8to16( b, partial);
 483 #endif
 484
 485     result = partial >> 8;
 486
 487     return result;
 488
 489 #elif BLEND8_AVRASM == 1
 490     uint16_t partial;
 491     uint8_t result;
 492
 493     asm volatile (
 494         /* partial = b * amountOfB */
 495         "  mul %[b], %[amountOfB]        \n\t"
 496         "  movw %A[partial], r0          \n\t"
 497
 498         /* amountOfB (aka amountOfA) = 255 - amountOfB */
 499         "  com %[amountOfB]              \n\t"
 500
 501         /* partial += a * amountOfB (aka amountOfA) */
 502         "  mul %[a], %[amountOfB]        \n\t"
 503
 504         "  add %A[partial], r0           \n\t"
 505         "  adc %B[partial], r1           \n\t"
 506
 507         "  clr __zero_reg__              \n\t"
 508
 509 #if (FASTLED_SCALE8_FIXED == 1)
 510         /* partial += a */
 511         "  add %A[partial], %[a]         \n\t"
 512         "  adc %B[partial], __zero_reg__ \n\t"
 513
 514         // partial += b
 515         "  add %A[partial], %[b]         \n\t"
 516         "  adc %B[partial], __zero_reg__ \n\t"
 517 #endif
 518
 519         : [partial] "=r" (partial),
 520           [amountOfB] "+a" (amountOfB)
 521         : [a] "a" (a),
 522           [b] "a" (b)
 523         : "r0", "r1"
 524     );
 525
 526     result = partial >> 8;
 527
 528     return result;
 529
 530 #else
 531 #error "No implementation for blend8 available."
 532 #endif
 533 }
 534
 535 #else
 536 LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB)
 537 {
 538     // This version loses precision in the integer math
 539     // and can actually return results outside of the range
 540     // from a to b.  Its use is not recommended.
 541     uint8_t result;
 542     uint8_t amountOfA = 255 - amountOfB;
 543     result = scale8_LEAVING_R1_DIRTY( a, amountOfA)
 544            + scale8_LEAVING_R1_DIRTY( b, amountOfB);
 545     cleanup_R1();
 546     return result;
 547 }
 548 #endif
 549
 550
 551 ///@}
 552 #endif