lib/lib8tion/lib8tion.c

   1 #define FASTLED_INTERNAL
   2 #include <stdint.h>
   3
   4 #define RAND16_SEED  1337
   5 uint16_t rand16seed = RAND16_SEED;
   6
   7
   8 // memset8, memcpy8, memmove8:
   9 //  optimized avr replacements for the standard "C" library
  10 //  routines memset, memcpy, and memmove.
  11 //
  12 //  There are two techniques that make these routines
  13 //  faster than the standard avr-libc routines.
  14 //  First, the loops are unrolled 2X, meaning that
  15 //  the average loop overhead is cut in half.
  16 //  And second, the compare-and-branch at the bottom
  17 //  of each loop decrements the low byte of the
  18 //  counter, and if the carry is clear, it branches
  19 //  back up immediately.  Only if the low byte math
  20 //  causes carry do we bother to decrement the high
  21 //  byte and check that result for carry as well.
  22 //  Results for a 100-byte buffer are 20-40% faster
  23 //  than standard avr-libc, at a cost of a few extra
  24 //  bytes of code.
  25
  26 #if defined(__AVR__)
  27 //__attribute__ ((noinline))
  28 void * memset8 ( void * ptr, uint8_t val, uint16_t num )
  29 {
  30     asm volatile(
  31          "  movw r26, %[ptr]        \n\t"
  32          "  sbrs %A[num], 0         \n\t"
  33          "  rjmp Lseteven_%=        \n\t"
  34          "  rjmp Lsetodd_%=         \n\t"
  35          "Lsetloop_%=:              \n\t"
  36          "  st X+, %[val]           \n\t"
  37          "Lsetodd_%=:               \n\t"
  38          "  st X+, %[val]           \n\t"
  39          "Lseteven_%=:              \n\t"
  40          "  subi %A[num], 2         \n\t"
  41          "  brcc Lsetloop_%=        \n\t"
  42          "  sbci %B[num], 0         \n\t"
  43          "  brcc Lsetloop_%=        \n\t"
  44          : [num] "+r" (num)
  45          : [ptr]  "r" (ptr),
  46            [val]  "r" (val)
  47          : "memory"
  48          );
  49     return ptr;
  50 }
  51
  52
  53
  54 //__attribute__ ((noinline))
  55 void * memcpy8 ( void * dst, const void* src, uint16_t num )
  56 {
  57     asm volatile(
  58          "  movw r30, %[src]        \n\t"
  59          "  movw r26, %[dst]        \n\t"
  60          "  sbrs %A[num], 0         \n\t"
  61          "  rjmp Lcpyeven_%=        \n\t"
  62          "  rjmp Lcpyodd_%=         \n\t"
  63          "Lcpyloop_%=:              \n\t"
  64          "  ld __tmp_reg__, Z+      \n\t"
  65          "  st X+, __tmp_reg__      \n\t"
  66          "Lcpyodd_%=:               \n\t"
  67          "  ld __tmp_reg__, Z+      \n\t"
  68          "  st X+, __tmp_reg__      \n\t"
  69          "Lcpyeven_%=:              \n\t"
  70          "  subi %A[num], 2         \n\t"
  71          "  brcc Lcpyloop_%=        \n\t"
  72          "  sbci %B[num], 0         \n\t"
  73          "  brcc Lcpyloop_%=        \n\t"
  74          : [num] "+r" (num)
  75          : [src] "r" (src),
  76            [dst] "r" (dst)
  77          : "memory"
  78          );
  79     return dst;
  80 }
  81
  82 //__attribute__ ((noinline))
  83 void * memmove8 ( void * dst, const void* src, uint16_t num )
  84 {
  85     if( src > dst) {
  86         // if src > dst then we can use the forward-stepping memcpy8
  87         return memcpy8( dst, src, num);
  88     } else {
  89         // if src < dst then we have to step backward:
  90         dst = (char*)dst + num;
  91         src = (char*)src + num;
  92         asm volatile(
  93              "  movw r30, %[src]        \n\t"
  94              "  movw r26, %[dst]        \n\t"
  95              "  sbrs %A[num], 0         \n\t"
  96              "  rjmp Lmoveven_%=        \n\t"
  97              "  rjmp Lmovodd_%=         \n\t"
  98              "Lmovloop_%=:              \n\t"
  99              "  ld __tmp_reg__, -Z      \n\t"
 100              "  st -X, __tmp_reg__      \n\t"
 101              "Lmovodd_%=:               \n\t"
 102              "  ld __tmp_reg__, -Z      \n\t"
 103              "  st -X, __tmp_reg__      \n\t"
 104              "Lmoveven_%=:              \n\t"
 105              "  subi %A[num], 2         \n\t"
 106              "  brcc Lmovloop_%=        \n\t"
 107              "  sbci %B[num], 0         \n\t"
 108              "  brcc Lmovloop_%=        \n\t"
 109              : [num] "+r" (num)
 110              : [src] "r" (src),
 111                [dst] "r" (dst)
 112              : "memory"
 113              );
 114         return dst;
 115     }
 116 }
 117
 118 #endif /* AVR */
 119
 120
 121
 122
 123 #if 0
 124 // TEST / VERIFICATION CODE ONLY BELOW THIS POINT
 125 #include <Arduino.h>
 126 #include "lib8tion.h"
 127
 128 void test1abs( int8_t i)
 129 {
 130     Serial.print("abs("); Serial.print(i); Serial.print(") = ");
 131     int8_t j = abs8(i);
 132     Serial.print(j); Serial.println(" ");
 133 }
 134
 135 void testabs()
 136 {
 137     delay(5000);
 138     for( int8_t q = -128; q != 127; q++) {
 139         test1abs(q);
 140     }
 141     for(;;){};
 142 }
 143
 144
 145 void testmul8()
 146 {
 147     delay(5000);
 148     byte r, c;
 149
 150     Serial.println("mul8:");
 151     for( r = 0; r <= 20; r += 1) {
 152         Serial.print(r); Serial.print(" : ");
 153         for( c = 0; c <= 20; c += 1) {
 154             byte t;
 155             t = mul8( r, c);
 156             Serial.print(t); Serial.print(' ');
 157         }
 158         Serial.println(' ');
 159     }
 160     Serial.println("done.");
 161     for(;;){};
 162 }
 163
 164
 165 void testscale8()
 166 {
 167     delay(5000);
 168     byte r, c;
 169
 170     Serial.println("scale8:");
 171     for( r = 0; r <= 240; r += 10) {
 172         Serial.print(r); Serial.print(" : ");
 173         for( c = 0; c <= 240; c += 10) {
 174             byte t;
 175             t = scale8( r, c);
 176             Serial.print(t); Serial.print(' ');
 177         }
 178         Serial.println(' ');
 179     }
 180
 181     Serial.println(' ');
 182     Serial.println("scale8_video:");
 183
 184     for( r = 0; r <= 100; r += 4) {
 185         Serial.print(r); Serial.print(" : ");
 186         for( c = 0; c <= 100; c += 4) {
 187             byte t;
 188             t = scale8_video( r, c);
 189             Serial.print(t); Serial.print(' ');
 190         }
 191         Serial.println(' ');
 192     }
 193
 194     Serial.println("done.");
 195     for(;;){};
 196 }
 197
 198
 199
 200 void testqadd8()
 201 {
 202     delay(5000);
 203     byte r, c;
 204     for( r = 0; r <= 240; r += 10) {
 205         Serial.print(r); Serial.print(" : ");
 206         for( c = 0; c <= 240; c += 10) {
 207             byte t;
 208             t = qadd8( r, c);
 209             Serial.print(t); Serial.print(' ');
 210         }
 211         Serial.println(' ');
 212     }
 213     Serial.println("done.");
 214     for(;;){};
 215 }
 216
 217 void testnscale8x3()
 218 {
 219     delay(5000);
 220     byte r, g, b, sc;
 221     for( byte z = 0; z < 10; z++) {
 222         r = random8(); g = random8(); b = random8(); sc = random8();
 223
 224         Serial.print("nscale8x3_video( ");
 225         Serial.print(r); Serial.print(", ");
 226         Serial.print(g); Serial.print(", ");
 227         Serial.print(b); Serial.print(", ");
 228         Serial.print(sc); Serial.print(") = [ ");
 229
 230         nscale8x3_video( r, g, b, sc);
 231
 232         Serial.print(r); Serial.print(", ");
 233         Serial.print(g); Serial.print(", ");
 234         Serial.print(b); Serial.print("]");
 235
 236         Serial.println(' ');
 237     }
 238     Serial.println("done.");
 239     for(;;){};
 240 }
 241
 242 #endif