From: martinahansen Date: Mon, 15 Jun 2009 15:31:30 +0000 (+0000) Subject: added non-overlapping feature to split_seq X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=97e32a3f0e6671dad1bf98cabcebc59ea9914000;p=biopieces.git added non-overlapping feature to split_seq git-svn-id: http://biopieces.googlecode.com/svn/trunk@525 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/split_seq b/bp_bin/split_seq index 7c4c598..274e133 100755 --- a/bp_bin/split_seq +++ b/bp_bin/split_seq @@ -33,23 +33,27 @@ use Maasha::Biopieces; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -my ( $options, $in, $out, $record, $new_record, $i, $subseq, %lookup ); +my ( $options, $in, $out, $record, $new_record, $i, $inc, $subseq, %lookup ); $options = Maasha::Biopieces::parse_options( [ - { long => 'word_size', short => 'w', type => 'uint', mandatory => 'no', default => 7, allowed => undef, disallowed => 0 }, - { long => 'uniq', short => 'u', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'word_size', short => 'w', type => 'uint', mandatory => 'no', default => 7, allowed => undef, disallowed => 0 }, + { long => 'non_overlapping', short => 'n', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'uniq', short => 'u', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, ] ); $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); +$inc = 1; +$inc = $options->{ "word_size" } if $options->{ "non_overlapping" }; + while ( $record = Maasha::Biopieces::get_record( $in ) ) { if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) { - for ( $i = 0; $i < length( $record->{ "SEQ" } ) - $options->{ "word_size" } + 1; $i++ ) + for ( $i = 0; $i < length( $record->{ "SEQ" } ) - $options->{ "word_size" } + 1; $i += $inc ) { $subseq = substr $record->{ "SEQ" }, $i, $options->{ "word_size" }; @@ -57,6 +61,7 @@ while ( $record = Maasha::Biopieces::get_record( $in ) ) { $new_record->{ "SEQ_NAME" } = $record->{ "SEQ_NAME" } . "[" . ( $i + 1 ) . "-" . ( $i + $options->{ "word_size" } ) . "]"; $new_record->{ "SEQ" } = $subseq; + $new_record->{ "SEQ_LEN" } = $options->{ "word_size" }; Maasha::Biopieces::put_record( $new_record, $out ); @@ -66,6 +71,7 @@ while ( $record = Maasha::Biopieces::get_record( $in ) ) { $new_record->{ "SEQ_NAME" } = $record->{ "SEQ_NAME" } . "[" . ( $i + 1 ) . "-" . ( $i + $options->{ "word_size" } ) . "]"; $new_record->{ "SEQ" } = $subseq; + $new_record->{ "SEQ_LEN" } = $options->{ "word_size" }; Maasha::Biopieces::put_record( $new_record, $out ); }