lib/lib8tion/scale8.h

   1 #ifndef __INC_LIB8TION_SCALE_H
   2 #define __INC_LIB8TION_SCALE_H
   3
   4 ///@ingroup lib8tion
   5
   6 ///@defgroup Scaling Scaling functions
   7 /// Fast, efficient 8-bit scaling functions specifically
   8 /// designed for high-performance LED programming.
   9 ///
  10 /// Because of the AVR(Arduino) and ARM assembly language
  11 /// implementations provided, using these functions often
  12 /// results in smaller and faster code than the equivalent
  13 /// program using plain "C" arithmetic and logic.
  14 ///@{
  15
  16 ///  scale one byte by a second one, which is treated as
  17 ///  the numerator of a fraction whose denominator is 256
  18 ///  In other words, it computes i * (scale / 256)
  19 ///  4 clocks AVR with MUL, 2 clocks ARM
  20 LIB8STATIC_ALWAYS_INLINE uint8_t scale8( uint8_t i, fract8 scale)
  21 {
  22 #if SCALE8_C == 1
  23 #if (FASTLED_SCALE8_FIXED == 1)
  24     return (((uint16_t)i) * (1+(uint16_t)(scale))) >> 8;
  25 #else
  26     return ((uint16_t)i * (uint16_t)(scale) ) >> 8;
  27 #endif
  28 #elif SCALE8_AVRASM == 1
  29 #if defined(LIB8_ATTINY)
  30 #if (FASTLED_SCALE8_FIXED == 1)
  31     uint8_t work=i;
  32 #else
  33     uint8_t work=0;
  34 #endif
  35     uint8_t cnt=0x80;
  36     asm volatile(
  37 #if (FASTLED_SCALE8_FIXED == 1)
  38         "  inc %[scale]                 \n\t"
  39         "  breq DONE_%=                 \n\t"
  40         "  clr %[work]                  \n\t"
  41 #endif
  42         "LOOP_%=:                       \n\t"
  43         /*"  sbrc %[scale], 0             \n\t"
  44         "  add %[work], %[i]            \n\t"
  45         "  ror %[work]                  \n\t"
  46         "  lsr %[scale]                 \n\t"
  47         "  clc                          \n\t"*/
  48         "  sbrc %[scale], 0             \n\t"
  49         "  add %[work], %[i]            \n\t"
  50         "  ror %[work]                  \n\t"
  51         "  lsr %[scale]                 \n\t"
  52         "  lsr %[cnt]                   \n\t"
  53         "brcc LOOP_%=                   \n\t"
  54         "DONE_%=:                       \n\t"
  55         : [work] "+r" (work), [cnt] "+r" (cnt)
  56         : [scale] "r" (scale), [i] "r" (i)
  57         :
  58       );
  59     return work;
  60 #else
  61     asm volatile(
  62 #if (FASTLED_SCALE8_FIXED==1)
  63         // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
  64         "mul %0, %1          \n\t"
  65         // Add i to r0, possibly setting the carry flag
  66         "add r0, %0         \n\t"
  67         // load the immediate 0 into i (note, this does _not_ touch any flags)
  68         "ldi %0, 0x00       \n\t"
  69         // walk and chew gum at the same time
  70         "adc %0, r1          \n\t"
  71 #else
  72          /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
  73          "mul %0, %1          \n\t"
  74          /* Move the high 8-bits of the product (r1) back to i */
  75          "mov %0, r1          \n\t"
  76          /* Restore r1 to "0"; it's expected to always be that */
  77 #endif
  78          "clr __zero_reg__    \n\t"
  79
  80          : "+a" (i)      /* writes to i */
  81          : "a"  (scale)  /* uses scale */
  82          : "r0", "r1"    /* clobbers r0, r1 */ );
  83
  84     /* Return the result */
  85     return i;
  86 #endif
  87 #else
  88 #error "No implementation for scale8 available."
  89 #endif
  90 }
  91
  92
  93 ///  The "video" version of scale8 guarantees that the output will
  94 ///  be only be zero if one or both of the inputs are zero.  If both
  95 ///  inputs are non-zero, the output is guaranteed to be non-zero.
  96 ///  This makes for better 'video'/LED dimming, at the cost of
  97 ///  several additional cycles.
  98 LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video( uint8_t i, fract8 scale)
  99 {
 100 #if SCALE8_C == 1 || defined(LIB8_ATTINY)
 101     uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0);
 102     // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
 103     // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
 104     return j;
 105 #elif SCALE8_AVRASM == 1
 106     uint8_t j=0;
 107     asm volatile(
 108         "  tst %[i]\n\t"
 109         "  breq L_%=\n\t"
 110         "  mul %[i], %[scale]\n\t"
 111         "  mov %[j], r1\n\t"
 112         "  clr __zero_reg__\n\t"
 113         "  cpse %[scale], r1\n\t"
 114         "  subi %[j], 0xFF\n\t"
 115         "L_%=: \n\t"
 116         : [j] "+a" (j)
 117         : [i] "a" (i), [scale] "a" (scale)
 118         : "r0", "r1");
 119
 120     return j;
 121     // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
 122     // asm volatile(
 123     //      "      tst %0           \n"
 124     //      "      breq L_%=        \n"
 125     //      "      mul %0, %1       \n"
 126     //      "      mov %0, r1       \n"
 127     //      "      add %0, %2       \n"
 128     //      "      clr __zero_reg__ \n"
 129     //      "L_%=:                  \n"
 130
 131     //      : "+a" (i)
 132     //      : "a" (scale), "a" (nonzeroscale)
 133     //      : "r0", "r1");
 134
 135     // // Return the result
 136     // return i;
 137 #else
 138 #error "No implementation for scale8_video available."
 139 #endif
 140 }
 141
 142
 143 /// This version of scale8 does not clean up the R1 register on AVR
 144 /// If you are doing several 'scale8's in a row, use this, and
 145 /// then explicitly call cleanup_R1.
 146 LIB8STATIC_ALWAYS_INLINE uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
 147 {
 148 #if SCALE8_C == 1
 149 #if (FASTLED_SCALE8_FIXED == 1)
 150     return (((uint16_t)i) * ((uint16_t)(scale)+1)) >> 8;
 151 #else
 152     return ((int)i * (int)(scale) ) >> 8;
 153 #endif
 154 #elif SCALE8_AVRASM == 1
 155     asm volatile(
 156       #if (FASTLED_SCALE8_FIXED==1)
 157               // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
 158               "mul %0, %1          \n\t"
 159               // Add i to r0, possibly setting the carry flag
 160               "add r0, %0         \n\t"
 161               // load the immediate 0 into i (note, this does _not_ touch any flags)
 162               "ldi %0, 0x00       \n\t"
 163               // walk and chew gum at the same time
 164               "adc %0, r1          \n\t"
 165       #else
 166          /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
 167          "mul %0, %1    \n\t"
 168          /* Move the high 8-bits of the product (r1) back to i */
 169          "mov %0, r1    \n\t"
 170       #endif
 171          /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF  */
 172          /* "clr __zero_reg__    \n\t" */
 173
 174          : "+a" (i)      /* writes to i */
 175          : "a"  (scale)  /* uses scale */
 176          : "r0", "r1"    /* clobbers r0, r1 */ );
 177
 178     // Return the result
 179     return i;
 180 #else
 181 #error "No implementation for scale8_LEAVING_R1_DIRTY available."
 182 #endif
 183 }
 184
 185
 186 /// This version of scale8_video does not clean up the R1 register on AVR
 187 /// If you are doing several 'scale8_video's in a row, use this, and
 188 /// then explicitly call cleanup_R1.
 189 LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
 190 {
 191 #if SCALE8_C == 1 || defined(LIB8_ATTINY)
 192     uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0);
 193     // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
 194     // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
 195     return j;
 196 #elif SCALE8_AVRASM == 1
 197     uint8_t j=0;
 198     asm volatile(
 199         "  tst %[i]\n\t"
 200         "  breq L_%=\n\t"
 201         "  mul %[i], %[scale]\n\t"
 202         "  mov %[j], r1\n\t"
 203         "  breq L_%=\n\t"
 204         "  subi %[j], 0xFF\n\t"
 205         "L_%=: \n\t"
 206         : [j] "+a" (j)
 207         : [i] "a" (i), [scale] "a" (scale)
 208         : "r0", "r1");
 209
 210     return j;
 211     // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
 212     // asm volatile(
 213     //      "      tst %0           \n"
 214     //      "      breq L_%=        \n"
 215     //      "      mul %0, %1       \n"
 216     //      "      mov %0, r1       \n"
 217     //      "      add %0, %2       \n"
 218     //      "      clr __zero_reg__ \n"
 219     //      "L_%=:                  \n"
 220
 221     //      : "+a" (i)
 222     //      : "a" (scale), "a" (nonzeroscale)
 223     //      : "r0", "r1");
 224
 225     // // Return the result
 226     // return i;
 227 #else
 228 #error "No implementation for scale8_video_LEAVING_R1_DIRTY available."
 229 #endif
 230 }
 231
 232 /// Clean up the r1 register after a series of *LEAVING_R1_DIRTY calls
 233 LIB8STATIC_ALWAYS_INLINE void cleanup_R1(void)
 234 {
 235 #if CLEANUP_R1_AVRASM == 1
 236     // Restore r1 to "0"; it's expected to always be that
 237     asm volatile( "clr __zero_reg__  \n\t" : : : "r1" );
 238 #endif
 239 }
 240
 241
 242 /// scale a 16-bit unsigned value by an 8-bit value,
 243 ///         considered as numerator of a fraction whose denominator
 244 ///         is 256. In other words, it computes i * (scale / 256)
 245
 246 LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale )
 247 {
 248 #if SCALE16BY8_C == 1
 249     uint16_t result;
 250 #if FASTLED_SCALE8_FIXED == 1
 251     result = (i * (1+((uint16_t)scale))) >> 8;
 252 #else
 253     result = (i * scale) / 256;
 254 #endif
 255     return result;
 256 #elif SCALE16BY8_AVRASM == 1
 257 #if FASTLED_SCALE8_FIXED == 1
 258     uint16_t result = 0;
 259     asm volatile(
 260                  // result.A = HighByte( (i.A x scale) + i.A )
 261                  "  mul %A[i], %[scale]                 \n\t"
 262                  "  add r0, %A[i]                       \n\t"
 263             //   "  adc r1, [zero]                      \n\t"
 264             //   "  mov %A[result], r1                  \n\t"
 265                  "  adc %A[result], r1                  \n\t"
 266
 267                  // result.A-B += i.B x scale
 268                  "  mul %B[i], %[scale]                 \n\t"
 269                  "  add %A[result], r0                  \n\t"
 270                  "  adc %B[result], r1                  \n\t"
 271
 272                  // cleanup r1
 273                  "  clr __zero_reg__                    \n\t"
 274
 275                  // result.A-B += i.B
 276                  "  add %A[result], %B[i]               \n\t"
 277                  "  adc %B[result], __zero_reg__        \n\t"
 278
 279                  : [result] "+r" (result)
 280                  : [i] "r" (i), [scale] "r" (scale)
 281                  : "r0", "r1"
 282                  );
 283     return result;
 284 #else
 285     uint16_t result = 0;
 286     asm volatile(
 287          // result.A = HighByte(i.A x j )
 288          "  mul %A[i], %[scale]                 \n\t"
 289          "  mov %A[result], r1                  \n\t"
 290          //"  clr %B[result]                      \n\t"
 291
 292          // result.A-B += i.B x j
 293          "  mul %B[i], %[scale]                 \n\t"
 294          "  add %A[result], r0                  \n\t"
 295          "  adc %B[result], r1                  \n\t"
 296
 297          // cleanup r1
 298          "  clr __zero_reg__                    \n\t"
 299
 300          : [result] "+r" (result)
 301          : [i] "r" (i), [scale] "r" (scale)
 302          : "r0", "r1"
 303          );
 304     return result;
 305 #endif
 306 #else
 307     #error "No implementation for scale16by8 available."
 308 #endif
 309 }
 310
 311 /// scale a 16-bit unsigned value by a 16-bit value,
 312 ///         considered as numerator of a fraction whose denominator
 313 ///         is 65536. In other words, it computes i * (scale / 65536)
 314
 315 LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
 316 {
 317   #if SCALE16_C == 1
 318     uint16_t result;
 319 #if FASTLED_SCALE8_FIXED == 1
 320     result = ((uint32_t)(i) * (1+(uint32_t)(scale))) / 65536;
 321 #else
 322     result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536;
 323 #endif
 324     return result;
 325 #elif SCALE16_AVRASM == 1
 326 #if FASTLED_SCALE8_FIXED == 1
 327     // implemented sort of like
 328     //   result = ((i * scale) + i ) / 65536
 329     //
 330     // why not like this, you may ask?
 331     //   result = (i * (scale+1)) / 65536
 332     // the answer is that if scale is 65535, then scale+1
 333     // will be zero, which is not what we want.
 334     uint32_t result;
 335     asm volatile(
 336                  // result.A-B  = i.A x scale.A
 337                  "  mul %A[i], %A[scale]                 \n\t"
 338                  //  save results...
 339                  // basic idea:
 340                  //"  mov %A[result], r0                 \n\t"
 341                  //"  mov %B[result], r1                 \n\t"
 342                  // which can be written as...
 343                  "  movw %A[result], r0                   \n\t"
 344                  // Because we're going to add i.A-B to
 345                  // result.A-D, we DO need to keep both
 346                  // the r0 and r1 portions of the product
 347                  // UNlike in the 'unfixed scale8' version.
 348                  // So the movw here is needed.
 349                  : [result] "=r" (result)
 350                  : [i] "r" (i),
 351                  [scale] "r" (scale)
 352                  : "r0", "r1"
 353                  );
 354
 355     asm volatile(
 356                  // result.C-D  = i.B x scale.B
 357                  "  mul %B[i], %B[scale]                 \n\t"
 358                  //"  mov %C[result], r0                 \n\t"
 359                  //"  mov %D[result], r1                 \n\t"
 360                  "  movw %C[result], r0                   \n\t"
 361                  : [result] "+r" (result)
 362                  : [i] "r" (i),
 363                  [scale] "r" (scale)
 364                  : "r0", "r1"
 365                  );
 366
 367     const uint8_t  zero = 0;
 368     asm volatile(
 369                  // result.B-D += i.B x scale.A
 370                  "  mul %B[i], %A[scale]                 \n\t"
 371
 372                  "  add %B[result], r0                   \n\t"
 373                  "  adc %C[result], r1                   \n\t"
 374                  "  adc %D[result], %[zero]              \n\t"
 375
 376                  // result.B-D += i.A x scale.B
 377                  "  mul %A[i], %B[scale]                 \n\t"
 378
 379                  "  add %B[result], r0                   \n\t"
 380                  "  adc %C[result], r1                   \n\t"
 381                  "  adc %D[result], %[zero]              \n\t"
 382
 383                  // cleanup r1
 384                  "  clr r1                               \n\t"
 385
 386                  : [result] "+r" (result)
 387                  : [i] "r" (i),
 388                  [scale] "r" (scale),
 389                  [zero] "r" (zero)
 390                  : "r0", "r1"
 391                  );
 392
 393     asm volatile(
 394                  // result.A-D += i.A-B
 395                  "  add %A[result], %A[i]                \n\t"
 396                  "  adc %B[result], %B[i]                \n\t"
 397                  "  adc %C[result], %[zero]              \n\t"
 398                  "  adc %D[result], %[zero]              \n\t"
 399                  : [result] "+r" (result)
 400                  : [i] "r" (i),
 401                  [zero] "r" (zero)
 402                  );
 403
 404     result = result >> 16;
 405     return result;
 406 #else
 407     uint32_t result;
 408     asm volatile(
 409                  // result.A-B  = i.A x scale.A
 410                  "  mul %A[i], %A[scale]                 \n\t"
 411                  //  save results...
 412                  // basic idea:
 413                  //"  mov %A[result], r0                 \n\t"
 414                  //"  mov %B[result], r1                 \n\t"
 415                  // which can be written as...
 416                  "  movw %A[result], r0                   \n\t"
 417                  // We actually don't need to do anything with r0,
 418                  // as result.A is never used again here, so we
 419                  // could just move the high byte, but movw is
 420                  // one clock cycle, just like mov, so might as
 421                  // well, in case we want to use this code for
 422                  // a generic 16x16 multiply somewhere.
 423
 424                  : [result] "=r" (result)
 425                  : [i] "r" (i),
 426                    [scale] "r" (scale)
 427                  : "r0", "r1"
 428                  );
 429
 430     asm volatile(
 431                  // result.C-D  = i.B x scale.B
 432                  "  mul %B[i], %B[scale]                 \n\t"
 433                  //"  mov %C[result], r0                 \n\t"
 434                  //"  mov %D[result], r1                 \n\t"
 435                  "  movw %C[result], r0                   \n\t"
 436                  : [result] "+r" (result)
 437                  : [i] "r" (i),
 438                    [scale] "r" (scale)
 439                  : "r0", "r1"
 440                  );
 441
 442     const uint8_t  zero = 0;
 443     asm volatile(
 444                  // result.B-D += i.B x scale.A
 445                  "  mul %B[i], %A[scale]                 \n\t"
 446
 447                  "  add %B[result], r0                   \n\t"
 448                  "  adc %C[result], r1                   \n\t"
 449                  "  adc %D[result], %[zero]              \n\t"
 450
 451                  // result.B-D += i.A x scale.B
 452                  "  mul %A[i], %B[scale]                 \n\t"
 453
 454                  "  add %B[result], r0                   \n\t"
 455                  "  adc %C[result], r1                   \n\t"
 456                  "  adc %D[result], %[zero]              \n\t"
 457
 458                  // cleanup r1
 459                  "  clr r1                               \n\t"
 460
 461                  : [result] "+r" (result)
 462                  : [i] "r" (i),
 463                    [scale] "r" (scale),
 464                    [zero] "r" (zero)
 465                  : "r0", "r1"
 466                  );
 467
 468     result = result >> 16;
 469     return result;
 470 #endif
 471 #else
 472     #error "No implementation for scale16 available."
 473 #endif
 474 }
 475 ///@}
 476
 477 ///@defgroup Dimming Dimming and brightening functions
 478 ///
 479 /// Dimming and brightening functions
 480 ///
 481 /// The eye does not respond in a linear way to light.
 482 /// High speed PWM'd LEDs at 50% duty cycle appear far
 483 /// brighter then the 'half as bright' you might expect.
 484 ///
 485 /// If you want your midpoint brightness leve (128) to
 486 /// appear half as bright as 'full' brightness (255), you
 487 /// have to apply a 'dimming function'.
 488 ///@{
 489
 490 /// Adjust a scaling value for dimming
 491 LIB8STATIC uint8_t dim8_raw( uint8_t x)
 492 {
 493     return scale8( x, x);
 494 }
 495
 496 /// Adjust a scaling value for dimming for video (value will never go below 1)
 497 LIB8STATIC uint8_t dim8_video( uint8_t x)
 498 {
 499     return scale8_video( x, x);
 500 }
 501
 502 /// Linear version of the dimming function that halves for values < 128
 503 LIB8STATIC uint8_t dim8_lin( uint8_t x )
 504 {
 505     if( x & 0x80 ) {
 506         x = scale8( x, x);
 507     } else {
 508         x += 1;
 509         x /= 2;
 510     }
 511     return x;
 512 }
 513
 514 /// inverse of the dimming function, brighten a value
 515 LIB8STATIC uint8_t brighten8_raw( uint8_t x)
 516 {
 517     uint8_t ix = 255 - x;
 518     return 255 - scale8( ix, ix);
 519 }
 520
 521 /// inverse of the dimming function, brighten a value
 522 LIB8STATIC uint8_t brighten8_video( uint8_t x)
 523 {
 524     uint8_t ix = 255 - x;
 525     return 255 - scale8_video( ix, ix);
 526 }
 527
 528 /// inverse of the dimming function, brighten a value
 529 LIB8STATIC uint8_t brighten8_lin( uint8_t x )
 530 {
 531     uint8_t ix = 255 - x;
 532     if( ix & 0x80 ) {
 533         ix = scale8( ix, ix);
 534     } else {
 535         ix += 1;
 536         ix /= 2;
 537     }
 538     return 255 - ix;
 539 }
 540
 541 ///@}
 542 #endif