3 * This file is part of fastq-tools.
5 * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
17 static const size_t INITIAL_TABLE_SIZE = 128;
18 static const double MAX_LOAD = 0.75;
22 * Paul Hsieh's SuperFastHash
23 * http://www.azillionmonkeys.com/qed/hash.html
28 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
29 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
30 #define get16bits(d) (*((const uint16_t *) (d)))
33 #if !defined (get16bits)
34 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
35 +(uint32_t)(((const uint8_t *)(d))[0]) )
38 static uint32_t hash(const char * data, size_t len) {
39 uint32_t hash = len, tmp;
42 if (len <= 0 || data == NULL) return 0;
48 for (;len > 0; len--) {
49 hash += get16bits (data);
50 tmp = (get16bits (data+2) << 11) ^ hash;
51 hash = (hash << 16) ^ tmp;
52 data += 2*sizeof (uint16_t);
56 /* Handle end cases */
58 case 3: hash += get16bits (data);
60 hash ^= data[sizeof (uint16_t)] << 18;
63 case 2: hash += get16bits (data);
67 case 1: hash += *data;
72 /* Force "avalanching" of final 127 bits */
85 static void rehash(hash_table* T, size_t new_n);
86 static void clear_hash_table(hash_table*);
90 hash_table* create_hash_table()
92 hash_table* T = malloc_or_die(sizeof(hash_table));
93 T->A = malloc_or_die(INITIAL_TABLE_SIZE * sizeof(hashed_value*));
94 memset(T->A, 0, INITIAL_TABLE_SIZE * sizeof(hashed_value*));
95 T->n = INITIAL_TABLE_SIZE;
97 T->max_m = T->n * MAX_LOAD;
103 void destroy_hash_table(hash_table* T)
114 void clear_hash_table(hash_table* T)
118 for (i = 0; i < T->n; i++) {
121 free(T->A[i]->value);
131 static void insert_without_copy(hash_table* T, hashed_value* V)
133 uint32_t h = hash(V->value, V->len) % T->n;
141 static void rehash(hash_table* T, size_t new_n)
146 U.max_m = U.n * MAX_LOAD;
147 U.A = malloc_or_die(U.n * sizeof(hashed_value*));
148 memset(U.A, 0, U.n * sizeof(hashed_value*));
152 for (i = 0; i < T->n; i++) {
156 insert_without_copy(&U, j);
169 void inc_hash_table(hash_table* T, const char* value, size_t len)
171 if (T->m >= T->max_m) rehash(T, T->n * 2);
173 uint32_t h = hash(value, len) % T->n;
175 hashed_value* u = T->A[h];
178 if (u->len == len && memcmp(u->value, value, len) == 0) {
186 u = malloc_or_die(sizeof(hashed_value));
187 u->value = malloc_or_die(len);
188 memcpy(u->value, value, len);
200 hashed_value** dump_hash_table(hash_table* T)
202 hashed_value** D = malloc_or_die(T->m * sizeof(hashed_value*));
206 for (i = 0, j = 0; i < T->n; i++) {