From 3a5401c88a90a2315a2b958d7eb4c119adba158f Mon Sep 17 00:00:00 2001 From: martinahansen Date: Sun, 24 May 2009 19:49:12 +0000 Subject: [PATCH] migrated split biopieces git-svn-id: http://biopieces.googlecode.com/svn/trunk@401 74ccb610-7750-0410-82ae-013aeee3265d --- code_perl/Maasha/BioRun.pm | 113 ------------------------------------- 1 file changed, 113 deletions(-) diff --git a/code_perl/Maasha/BioRun.pm b/code_perl/Maasha/BioRun.pm index 4863090..32be349 100644 --- a/code_perl/Maasha/BioRun.pm +++ b/code_perl/Maasha/BioRun.pm @@ -145,8 +145,6 @@ sub run_script elsif ( $script eq "get_genome_align" ) { script_get_genome_align( $in, $out, $options ) } elsif ( $script eq "get_genome_phastcons" ) { script_get_genome_phastcons( $in, $out, $options ) } elsif ( $script eq "fold_seq" ) { script_fold_seq( $in, $out, $options ) } - elsif ( $script eq "split_seq" ) { script_split_seq( $in, $out, $options ) } - elsif ( $script eq "split_bed" ) { script_split_bed( $in, $out, $options ) } elsif ( $script eq "tile_seq" ) { script_tile_seq( $in, $out, $options ) } elsif ( $script eq "invert_align" ) { script_invert_align( $in, $out, $options ) } elsif ( $script eq "patscan_seq" ) { script_patscan_seq( $in, $out, $options ) } @@ -408,20 +406,6 @@ sub get_options flank|f=s ); } - elsif ( $script eq "split_seq" ) - { - @options = qw( - word_size|w=s - uniq|u - ); - } - elsif ( $script eq "split_bed" ) - { - @options = qw( - window_size|w=s - step_size|s=s - ); - } elsif ( $script eq "tile_seq" ) { @options = qw( @@ -2380,103 +2364,6 @@ sub script_fold_seq } -sub script_split_seq -{ - # Martin A. Hansen, August 2007. - - # Split a sequence in stream into words. - - my ( $in, # handle to in stream - $out, # handle to out stream - $options, # options hash - ) = @_; - - # Returns nothing. - - my ( $record, $new_record, $i, $subseq, %lookup ); - - $options->{ "word_size" } ||= 7; - - while ( $record = Maasha::Biopieces::get_record( $in ) ) - { - if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) - { - for ( $i = 0; $i < length( $record->{ "SEQ" } ) - $options->{ "word_size" } + 1; $i++ ) - { - $subseq = substr $record->{ "SEQ" }, $i, $options->{ "word_size" }; - - if ( $options->{ "uniq" } and not $lookup{ $subseq } ) - { - $new_record->{ "SEQ_NAME" } = $record->{ "SEQ_NAME" } . "[" . ( $i + 1 ) . "-" . ( $i + $options->{ "word_size" } ) . "]"; - $new_record->{ "SEQ" } = $subseq; - - Maasha::Biopieces::put_record( $new_record, $out ); - - $lookup{ $subseq } = 1; - } - else - { - $new_record->{ "SEQ_NAME" } = $record->{ "SEQ_NAME" } . "[" . ( $i + 1 ) . "-" . ( $i + $options->{ "word_size" } ) . "]"; - $new_record->{ "SEQ" } = $subseq; - - Maasha::Biopieces::put_record( $new_record, $out ); - } - } - } - else - { - Maasha::Biopieces::put_record( $record, $out ); - } - } -} - - -sub script_split_bed -{ - # Martin A. Hansen, June 2008. - - # Split a BED record into overlapping windows. - - my ( $in, # handle to in stream - $out, # handle to out stream - $options, # options hash - ) = @_; - - # Returns nothing. - - my ( $record, $new_record, $i ); - - $options->{ "window_size" } ||= 20; - $options->{ "step_size" } ||= 1; - - while ( $record = Maasha::Biopieces::get_record( $in ) ) - { - if ( $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "CHR_END" } ) - { - $record->{ "BED_LEN" } = $record->{ "CHR_END" } - $record->{ "CHR_BEG" } + 1; - - for ( $i = 0; $i < $record->{ "BED_LEN" } - $options->{ "window_size" }; $i += $options->{ "step_size" } ) - { - $new_record->{ "REC_TYPE" } = "BED"; - $new_record->{ "CHR" } = $record->{ "CHR" }; - $new_record->{ "CHR_BEG" } = $record->{ "CHR_BEG" } + $i; - $new_record->{ "CHR_END" } = $record->{ "CHR_BEG" } + $i + $options->{ "window_size" }; - $new_record->{ "BED_LEN" } = $options->{ "window_size" }; - $new_record->{ "Q_ID" } = $record->{ "Q_ID" } . "_$i"; - $new_record->{ "SCORE" } = $record->{ "SCORE" }; - $new_record->{ "STRAND" } = $record->{ "STRAND" }; - - Maasha::Biopieces::put_record( $new_record, $out ); - } - } - else - { - Maasha::Biopieces::put_record( $record, $out ); - } - } -} - - sub script_tile_seq { # Martin A. Hansen, February 2008. -- 2.39.2