From f1ee84b123173a4fd69ad0c730067fdd76e28201 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Fri, 6 Apr 2012 13:47:14 +0000 Subject: [PATCH] upgraded bitmap.rb to a 64bit version git-svn-id: http://biopieces.googlecode.com/svn/trunk@1783 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/bitmap.rb | 395 +++++++++++++++------------------ 1 file changed, 185 insertions(+), 210 deletions(-) diff --git a/code_ruby/lib/maasha/bitmap.rb b/code_ruby/lib/maasha/bitmap.rb index be53acf..40e20b1 100644 --- a/code_ruby/lib/maasha/bitmap.rb +++ b/code_ruby/lib/maasha/bitmap.rb @@ -24,35 +24,26 @@ require 'inline' +CHAR_BIT = 8 # This is not guaranteed! FIXME +BITS_PER_WORD = 64 # This is not portable! FIXME + # Error class for all exceptions to do with BitMap. class BitMapError < StandardError; end -CHAR_BIT = 8 - # Bitmap - 13 columns, 3 rows: # 1000000000000 # 0000001000000 # 0000000000001 -# -# Bytestring: -# byte 0 byte 1 byte 2 byte 3 byte 4 byte 5 -# +--------+--------+--------+--------+--------+--------+ -# |10000000|00000000|00000010|00000000|00000000|00000001| -# +--------+--------+--------+--------+--------+--------+ -# -# Padding: -# row 0 row 1 row 2 -# 10000000 00000xxx 00000010 00000xxx 00000000 00001xxx class BitMap - attr_accessor :cols, :rows, :byte_array + attr_reader :cols, :rows, :map, :width # Method to initialize a BitMap. def initialize(cols, rows) - @cols = cols - @rows = rows - @pads = padding - size = ((rows * (cols + @pads)) / CHAR_BIT) - @byte_array = "\0" * size + @cols = cols + @rows = rows + @pads = pad_init + @width = @cols + @pads + @map = map_init end # Method that set bits to 1 at specified columns, and rows. @@ -66,7 +57,7 @@ class BitMap cols = cols_expand(cols) rows = rows_expand(rows) - set_c(cols, rows) + set_c(@map, @width, cols, rows) end # Method that set bits to 0 at specified columns, and rows. @@ -80,7 +71,16 @@ class BitMap cols = cols_expand(cols) rows = rows_expand(rows) - unset_c(cols, rows) + unset_c(@map, @width, cols, rows) + end + + def sum + size = @rows * @width / BITS_PER_WORD + sum_c(@map, size) + end + + def sum_rows + sum_rows_c(@map, @width, rows) end # Method to get a slice from a bitmap from specified rows and columns, @@ -96,7 +96,7 @@ class BitMap rows = rows_expand(rows) bm = BitMap.new(cols.size, rows.size) - slice_c(bm.byte_array, cols, rows) + slice_c(@map, @width, bm.map, bm.width, cols, rows) bm end @@ -110,11 +110,13 @@ class BitMap bm = BitMap.new(@cols, @rows) if @rows == bit_map.rows and @cols == bit_map.cols - bitwise_and_c(bit_map.byte_array) + raise # TODO + bitwise_and_c() elsif @rows == bit_map.rows - bitwise_and_col_c(bit_map.byte_array) + raise # TODO + bitwise_and_col_c() elsif @cols == bit_map.cols - bitwise_and_row_c(self.byte_array, bm.byte_array, bit_map.byte_array, @cols, @rows) + bitwise_and_row_c(@map, bm.map, bit_map.map, @width, @rows) else raise BitMapError, "can't & uneven bit maps: #{@rows} x #{@cols} & #{bit_map.rows} x #{bit_map.cols}" end @@ -122,6 +124,10 @@ class BitMap bm end + def set?(c, r) + btest_c(@map, @width, c, r) + end + # Method to convert a bit array to a string. def to_s max_rows = 20 @@ -131,9 +137,7 @@ class BitMap (0 ... @rows).each do |r| row = "" (0 ... @cols).each do |c| - pos = r * (@cols + @pads) + c - - if self.set? pos + if set?(c, r) row << "1" else row << "0" @@ -158,8 +162,24 @@ class BitMap private - def padding - (((@cols % CHAR_BIT) != 0) ? (CHAR_BIT - (@cols % CHAR_BIT)) : 0) + def pad_init + if (@cols % BITS_PER_WORD) != 0 + pads = BITS_PER_WORD - (@cols % BITS_PER_WORD) + else + pads = 0 + end + + pads + end + + # Method to initialize a block of memory holding a bitmap. + # The block will be initialized as a zeroed Ruby string, but + # will be padded with 0's so that each row is a multiplum of + # BITS_PER_WORD - which is 64 or 32 bits. + def map_init + size = @rows * @width / CHAR_BIT + + "\0" * size end def rows_expand(rows) @@ -198,269 +218,224 @@ class BitMap builder.add_static "cols", 'rb_intern("@cols")', "ID" builder.add_static "byte_array", 'rb_intern("@byte_array")', "ID" + # New builder.prefix %s{ - unsigned char bit_count[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 - }; + typedef uint64_t word_t; } - builder.prefix %{ - #define COUNT_BITS(c) (bit_count[c]) - } - - builder.prefix %{ - #define PADDING(cols) (((cols % CHAR_BIT) != 0) ? (CHAR_BIT - (cols % CHAR_BIT)) : 0) + # New + builder.prefix %s{ + enum { BITS_PER_WORD = sizeof(word_t) * CHAR_BIT }; } - builder.prefix %{ - #define BYTE_SIZE(rows, cols) (rows * (cols + PADDING(cols)) / CHAR_BIT) + # New + builder.prefix %s{ + #define WORD_OFFSET(b) ((b) / BITS_PER_WORD) } - builder.prefix %{ - #define BYTE_POS(pos) (pos / CHAR_BIT) + # New + builder.prefix %s{ + #define BIT_OFFSET(b) ((b) % BITS_PER_WORD) } + # Method to count all set bits in an unsigned long int (64 bits) + # based on a variant of pop_count optimal for bits with most 0 + # bits set. builder.prefix %{ - #define BIT_POS(pos) (1 << (CHAR_BIT - 1 - (pos % CHAR_BIT))) - } + int count_bits_c(word_t x) { + int count; - builder.c %{ - VALUE fill_c() - { - VALUE _byte_array = rb_ivar_get(self, byte_array); - VALUE _rows = rb_ivar_get(self, rows); - VALUE _cols = rb_ivar_get(self, cols); - unsigned char* byte_array = StringValuePtr(_byte_array); - int rows = NUM2INT(_rows); - int cols = NUM2INT(_cols); - int size = BYTE_SIZE(rows, cols); - int i = 0; - - for (i = 0; i < size; i++) byte_array[i] = ~0; + for (count = 0; x; count++) + x &= x - 1; - return self; + return count; } } - builder.c %{ - VALUE empty_c() - { - VALUE _byte_array = rb_ivar_get(self, byte_array); - VALUE _rows = rb_ivar_get(self, rows); - VALUE _cols = rb_ivar_get(self, cols); - unsigned char* byte_array = StringValuePtr(_byte_array); - int rows = NUM2INT(_rows); - int cols = NUM2INT(_cols); - int size = BYTE_SIZE(rows, cols); - int i = 0; - - for (i = 0; i < size; i++) byte_array[i] = 0; - - return self; + # New + builder.prefix %{ + void bit_set_c(word_t *map, int pos) { + map[WORD_OFFSET(pos)] |= (1 << BIT_OFFSET(pos)); } } - builder.c %{ - VALUE bit_test_c(VALUE _pos) - { - int pos = NUM2INT(_pos); - VALUE _byte_array = rb_ivar_get(self, byte_array); - unsigned char* byte_array = StringValuePtr(_byte_array); - - return ((byte_array[BYTE_POS(pos)] & BIT_POS(pos)) != 0); + # New + builder.prefix %{ + void bit_unset_c(word_t *map, int pos) { + map[WORD_OFFSET(pos)] &= ~(1 << BIT_OFFSET(pos)); } } builder.c %{ - void set_c(VALUE _cols_array, VALUE _rows_array) + void set_c(VALUE _map, VALUE _width, VALUE _cols_ary, VALUE _rows_ary) { - VALUE _byte_array = rb_ivar_get(self, byte_array); - VALUE _cols = rb_ivar_get(self, cols); - unsigned char* byte_array = StringValuePtr(_byte_array); - int cols = NUM2INT(_cols); - - VALUE *cols_array = RARRAY_PTR(_cols_array); - VALUE *rows_array = RARRAY_PTR(_rows_array); - int col_size = (int) RARRAY_LEN(_cols_array); - int row_size = (int) RARRAY_LEN(_rows_array); - int c = 0; - int r = 0; - int pad = PADDING(cols); - int pos = 0; - - for (r = 0; r < row_size; r++) + word_t *map = (word_t *) StringValuePtr(_map); + int width = NUM2INT(_width); + VALUE *cols_ary = RARRAY_PTR(_cols_ary); + VALUE *rows_ary = RARRAY_PTR(_rows_ary); + int col_ary_size = (int) RARRAY_LEN(_cols_ary); + int row_ary_size = (int) RARRAY_LEN(_rows_ary); + int c = 0; + int r = 0; + int pos = 0; + + for (r = 0; r < row_ary_size; r++) { - for (c = 0; c < col_size; c++) + for (c = 0; c < col_ary_size; c++) { - pos = NUM2INT(rows_array[r]) * (cols + pad) + NUM2INT(cols_array[c]); + pos = NUM2INT(rows_ary[r]) * width + NUM2INT(cols_ary[c]); - byte_array[BYTE_POS(pos)] |= BIT_POS(pos); + bit_set_c(map, pos); } } } } builder.c %{ - void unset_c(VALUE _cols_array, VALUE _rows_array) + void unset_c(VALUE _map, VALUE _width, VALUE _cols_ary, VALUE _rows_ary) { - VALUE _byte_array = rb_ivar_get(self, byte_array); - VALUE _cols = rb_ivar_get(self, cols); - unsigned char* byte_array = StringValuePtr(_byte_array); - int cols = NUM2INT(_cols); - - VALUE *cols_array = RARRAY_PTR(_cols_array); - VALUE *rows_array = RARRAY_PTR(_rows_array); - int col_size = (int) RARRAY_LEN(_cols_array); - int row_size = (int) RARRAY_LEN(_rows_array); - int c = 0; - int r = 0; - int pad = PADDING(cols); - int pos = 0; - unsigned char mask = 0; - - for (r = 0; r < row_size; r++) + word_t *map = (word_t *) StringValuePtr(_map); + int width = NUM2INT(_width); + VALUE *cols_ary = RARRAY_PTR(_cols_ary); + VALUE *rows_ary = RARRAY_PTR(_rows_ary); + int cols_ary_size = (int) RARRAY_LEN(_cols_ary); + int rows_ary_size = (int) RARRAY_LEN(_rows_ary); + int c = 0; + int r = 0; + int pos = 0; + + for (r = 0; r < rows_ary_size; r++) { - for (c = 0; c < col_size; c++) + for (c = 0; c < cols_ary_size; c++) { - pos = NUM2INT(rows_array[r]) * (cols + pad) + NUM2INT(cols_array[c]); - - mask = BIT_POS(pos); - mask = ~mask; + pos = NUM2INT(rows_ary[r]) * width + NUM2INT(cols_ary[c]); - byte_array[BYTE_POS(pos)] &= mask; + bit_unset_c(map, pos); } } } } + # New + builder.prefix %{ + int bit_test_c(word_t* map, int pos) + { + word_t bit = map[WORD_OFFSET(pos)] & (1 << BIT_OFFSET(pos)); + + return bit != 0; + } + } + + # New builder.c %{ - void slice_c(VALUE _new_array, VALUE _new_cols, VALUE _new_rows) + VALUE btest_c(VALUE _map, VALUE _width, VALUE _col, VALUE _row) { - VALUE _old_array = rb_ivar_get(self, byte_array); - VALUE _old_cols = rb_ivar_get(self, cols); - VALUE _old_rows = rb_ivar_get(self, rows); - unsigned char* old_array = StringValuePtr(_old_array); - int old_cols = NUM2INT(_old_cols); - int old_rows = NUM2INT(_old_rows); - - unsigned char* new_array = StringValuePtr(_new_array); - VALUE *cols_array = RARRAY_PTR(_new_cols); - VALUE *rows_array = RARRAY_PTR(_new_rows); - int col_count = (int) RARRAY_LEN(_new_cols); - int row_count = (int) RARRAY_LEN(_new_rows); - int c = 0; - int r = 0; - int old_pos = 0; - int new_pos = 0; - int old_pad = PADDING(old_cols); - int new_pad = PADDING(col_count); - - for (r = 0; r < row_count; r++) + word_t *map = (word_t*) StringValuePtr(_map); + int width = NUM2INT(_width); + int col = NUM2INT(_col); + int row = NUM2INT(_row); + int pos = row * width + col; + + return bit_test_c(map, pos); + } + } + + builder.c %{ + void slice_c(VALUE _old_map, VALUE _old_width, VALUE _new_map, VALUE _new_width, VALUE _cols_ary, VALUE _rows_ary) + { + word_t *old_map = (word_t*) StringValuePtr(_old_map); + int old_width = NUM2INT(_old_width); + word_t *new_map = (word_t*) StringValuePtr(_new_map); + int new_width = NUM2INT(_new_width); + VALUE *cols_ary = RARRAY_PTR(_cols_ary); + VALUE *rows_ary = RARRAY_PTR(_rows_ary); + int cols_ary_size = (int) RARRAY_LEN(_cols_ary); + int rows_ary_size = (int) RARRAY_LEN(_rows_ary); + int old_pos = 0; + int new_pos = 0; + int c = 0; + int r = 0; + + for (r = 0; r < rows_ary_size; r++) { - for (c = 0; c < col_count; c++) + for (c = 0; c < cols_ary_size; c++) { - old_pos = NUM2INT(rows_array[r]) * (old_cols + old_pad) + NUM2INT(cols_array[c]); - new_pos = r * (col_count + new_pad) + c; + old_pos = NUM2INT(rows_ary[r]) * old_width + NUM2INT(cols_ary[c]); - if ((old_array[BYTE_POS(old_pos)] & BIT_POS(old_pos)) != 0) - new_array[BYTE_POS(new_pos)] |= BIT_POS(new_pos); + if (bit_test_c(old_map, old_pos)) + { + new_pos = r * new_width + c; + bit_set_c(new_map, new_pos); + } } } } } builder.c %{ - void bitwise_and_row_c(VALUE _old_array, VALUE _new_array, VALUE _row_array, VALUE _cols, VALUE _rows) + void bitwise_and_row_c(VALUE _old_map, VALUE _new_map, VALUE _row_map, VALUE _width, VALUE _rows) { - unsigned char* old_array = StringValuePtr(_old_array); - unsigned char* new_array = StringValuePtr(_new_array); - unsigned char* row_array = StringValuePtr(_row_array); - int cols = NUM2INT(_cols); - int rows = NUM2INT(_rows); - int col_bytes = (cols / CHAR_BIT) + ((cols % CHAR_BIT) ? 1: 0); - int c = 0; - int r = 0; + word_t *old_map = (word_t*) StringValuePtr(_old_map); + word_t *new_map = (word_t*) StringValuePtr(_new_map); + word_t *row_map = (word_t*) StringValuePtr(_row_map); + int width = NUM2INT(_width); + int rows = NUM2INT(_rows); + int size = width / BITS_PER_WORD; + int c = 0; + int r = 0; for (r = 0; r < rows; r++) { - for (c = 0; c < col_bytes; c++) { - new_array[r * col_bytes + c] = old_array[r * col_bytes + c] & row_array[c]; + for (c = 0; c < size; c++) { + new_map[r * size + c] = old_map[r * size + c] & row_map[c]; } } } } builder.c %{ - VALUE sum_c() + VALUE sum_c(VALUE _map, VALUE _size) { - VALUE _byte_array = rb_ivar_get(self, byte_array); - VALUE _rows = rb_ivar_get(self, rows); - VALUE _cols = rb_ivar_get(self, cols); - unsigned char* byte_array = StringValuePtr(_byte_array); - int rows = NUM2INT(_rows); - int cols = NUM2INT(_cols); - int size = BYTE_SIZE(rows, cols); - int i = 0; - int sum = 0; + word_t *map = (word_t*) StringValuePtr(_map); + int size = NUM2INT(_size); + int i = 0; + int sum = 0; for (i = 0; i < size; i++) - sum += COUNT_BITS(byte_array[i]); + sum += count_bits_c(map[i]); return INT2NUM(sum); } } builder.c %{ - VALUE sum_rows_c() + VALUE sum_rows_c(VALUE _map, VALUE _width, VALUE _rows) { - VALUE _byte_array = rb_ivar_get(self, byte_array); - VALUE _rows = rb_ivar_get(self, rows); - VALUE _cols = rb_ivar_get(self, cols); - unsigned char* byte_array = StringValuePtr(_byte_array); - int rows = NUM2INT(_rows); - int cols = NUM2INT(_cols); - int col_bytes = (cols / CHAR_BIT) + ((cols % CHAR_BIT) ? 1: 0); - int r = 0; - int c = 0; - int sums[rows]; - VALUE rb_sums = rb_ary_new(); - - int pos = 0; + word_t *map = (word_t*) StringValuePtr(_map); + int width = NUM2INT(_width); + int rows = NUM2INT(_rows); + int r = 0; + int c = 0; + int sums[rows]; + VALUE sums_ary = rb_ary_new(); + int size = width / BITS_PER_WORD; + int pos = 0; for (r = 0; r < rows; r++) sums[r] = 0; for (r = 0; r < rows; r++) { - for (c = 0; c < col_bytes; c++) { - sums[r] += COUNT_BITS(byte_array[r * col_bytes + c]); + for (c = 0; c < size; c++) { + pos = r * size + c; + + sums[r] += count_bits_c(map[pos]); } } - for (r = 0; r < rows; r++) rb_ary_push(rb_sums, INT2FIX(sums[r])); + for (r = 0; r < rows; r++) rb_ary_push(sums_ary, INT2FIX(sums[r])); - return rb_sums; + return sums_ary; } } end - - alias :sum :sum_c - alias :sum_rows :sum_rows_c - alias :set? :bit_test_c - alias :fill! :fill_c - alias :empty! :empty_c end -- 2.39.2