From 8a593060edfec9b8365f7d1d56daf9700aceb817 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 16 Sep 2008 04:36:54 +0000 Subject: [PATCH] inline::c code for adaptor removal git-svn-id: http://biopieces.googlecode.com/svn/trunk@258 74ccb610-7750-0410-82ae-013aeee3265d --- code_c/Maasha/src/bed_sort.c | 2 +- code_perl/Maasha/Biopieces.pm | 10 ++--- code_perl/Maasha/Common.pm | 77 +++++++++++++++++++++++++++++++++++ code_perl/Maasha/Seq.pm | 5 ++- 4 files changed, 85 insertions(+), 9 deletions(-) diff --git a/code_c/Maasha/src/bed_sort.c b/code_c/Maasha/src/bed_sort.c index 851a74e..6c7bf69 100644 --- a/code_c/Maasha/src/bed_sort.c +++ b/code_c/Maasha/src/bed_sort.c @@ -56,7 +56,7 @@ int main( int argc, char *argv[] ) } } - printf( "sort: %d cols: %d dir: %s\n", sort, cols, dir ); + fprintf( stderr, "sort: %d cols: %d dir: %s\n", sort, cols, dir ); argc -= optind; argv += optind; diff --git a/code_perl/Maasha/Biopieces.pm b/code_perl/Maasha/Biopieces.pm index 041cbb2..ab7add7 100644 --- a/code_perl/Maasha/Biopieces.pm +++ b/code_perl/Maasha/Biopieces.pm @@ -4705,21 +4705,17 @@ sub script_remove_adaptor $offset--; } - $adaptor = $options->{ "adaptor" }; + $adaptor = uc $options->{ "adaptor" }; $adaptor_len = length $adaptor; - $adaptor = [ split //, uc $adaptor ]; - - $max_match = $adaptor_len - $max_mismatch; while ( $record = get_record( $in ) ) { if ( $record->{ "SEQ" } ) { - $seq = $record->{ "SEQ" }; + $seq = uc $record->{ "SEQ" }; $seq_len = length $seq; - $seq = [ split //, uc $seq ]; - $pos = Maasha::Seq::find_adaptor( $adaptor, $seq, $adaptor_len, $seq_len, $offset, $max_match, $max_mismatch ); + $pos = Maasha::Common::index_m( $seq, $adaptor, $seq_len, $adaptor_len, $offset, $max_mismatch ); $record->{ "ADAPTOR_POS" } = $pos; diff --git a/code_perl/Maasha/Common.pm b/code_perl/Maasha/Common.pm index 8428aa6..16e069f 100644 --- a/code_perl/Maasha/Common.pm +++ b/code_perl/Maasha/Common.pm @@ -42,6 +42,83 @@ use vars qw( @ISA @EXPORT @EXPORT_OK ); @ISA = qw( Exporter ) ; +use Inline ( C => <<'END_C', DIRECTORY => $ENV{ "BP_TMP" } ); + +int index_m( char *str, char *substr, size_t str_len, size_t substr_len, size_t offset, size_t max_mismatch ) +{ + /* Martin A. Hansen & Selene Fernandez, August 2008 */ + + /* Locates a substring within a string starting from offset and allowing for max_mismatch mismatches. */ + /* The begin position of the substring is returned if found otherwise -1 is returned. */ + + int i = 0; + int j = 0; + + size_t max_match = substr_len - max_mismatch; + + i = offset; + + while ( i < str_len - ( max_match + max_mismatch ) + 1 ) + { + j = 0; + + while ( j < substr_len - ( max_match + max_mismatch ) + 1 ) + { + if ( match_m( str, substr, str_len, substr_len, i, j, max_match, max_mismatch ) != 0 ) { + return i; + } + + j++; + } + + i++; + } + + return -1; +} + + +int match_m( char *str, char *substr, size_t str_len, size_t substr_len, size_t str_offset, size_t substr_offset, size_t max_match, size_t max_mismatch ) +{ + /* Martin A. Hansen & Selene Fernandez, August 2008 */ + + /* Compares a string and substring starting at speficied string and substring offset */ + /* positions allowing for a specified number of mismatches. Returns 1 if there is a */ + /* match otherwise returns 0. */ + + size_t match = 0; + size_t mismatch = 0; + + while ( str_offset <= str_len && substr_offset <= substr_len ) + { + if ( str[ str_offset ] == substr[ substr_offset ] ) + { + match++; + + if ( match >= max_match ) { + return 1; + }; + } + else + { + mismatch++; + + if ( mismatch > max_mismatch ) { + return 0; + } + } + + str_offset++; + substr_offset++; + } + + return 0; +} + + +END_C + + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/code_perl/Maasha/Seq.pm b/code_perl/Maasha/Seq.pm index c224808..9304d7b 100644 --- a/code_perl/Maasha/Seq.pm +++ b/code_perl/Maasha/Seq.pm @@ -38,7 +38,6 @@ use vars qw ( @ISA @EXPORT ); @ISA = qw( Exporter ); - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @@ -1389,6 +1388,10 @@ sub seq_word_unpack # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ADAPTOR LOCATING <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +############################### REDUNDANT ############################## + +# these functions have been replaced by index_m and match_m in Common.pm + sub find_adaptor { # Martin A. Hansen & Selene Fernandez, August 2008 -- 2.39.5