From: martinahansen Date: Sat, 23 May 2009 20:51:25 +0000 (+0000) Subject: migrated extract_seq X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=7daa87ce30adb191a883988bda67ead32c3a0933;p=biopieces.git migrated extract_seq git-svn-id: http://biopieces.googlecode.com/svn/trunk@383 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/extract_seq b/bp_bin/extract_seq index bdd8104..95bc670 100755 --- a/bp_bin/extract_seq +++ b/bp_bin/extract_seq @@ -33,36 +33,61 @@ use Maasha::Biopieces; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -my ( $run_time_beg, $run_time_end, $options, $in, $out, $record, @vals, $i ); +my ( $run_time_beg, $run_time_end, $options, $in, $out, $record, $beg, $end, $len ); $options = Maasha::Biopieces::parse_options( [ - { long => 'key', short => 'k', type => 'string', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, - { long => 'keys', short => 'K', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'delimit', short => 'd', type => 'string', mandatory => 'no', default => '_', allowed => undef, disallowed => undef }, + { long => 'beg', short => 'b', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'end', short => 'e', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'len', short => 'l', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => 0 }, ] ); $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); -while ( $record = Maasha::Biopieces::get_record( $in ) ) +if ( not defined $options->{ "beg" } or $options->{ "beg" } < 0 ) { + $beg = 0; +} else { + $beg = $options->{ "beg" } - 1; # correcting for start offset +} + +if ( defined $options->{ "end" } and $options->{ "end" } - 1 < $beg ) { + $end = $beg - 1; +} elsif ( defined $options->{ "end" } ) { + $end = $options->{ "end" } - 1; # correcting for start offset +} + +$len = $options->{ "len" }; + +# print "beg->$beg, end->$end, len->$len\n"; + +while ( $record = Maasha::Biopieces::get_record( $in ) ) { - if ( exists $options->{ 'key' } and exists $record->{ $options->{ 'key' } } ) + if ( $record->{ "SEQ" } ) { - @vals = split /$options->{ 'delimit' }/, $record->{ $options->{ 'key' } }; - - if ( scalar @vals > 1 ) + if ( defined $beg and defined $end ) { - for ( $i = 0; $i < @vals; $i++ ) - { - if ( defined $options->{ "keys" } and defined $options->{ "keys" }->[ $i ] ) { - $record->{ $options->{ "keys" }->[ $i ] } = $vals[ $i ]; - } else { - $record->{ $options->{ 'key' } . "_$i" } = $vals[ $i ]; - } + if ( $end - $beg + 1 > length $record->{ "SEQ" } ) { + $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg; + } else { + $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg, $end - $beg + 1; } } + elsif ( defined $beg and defined $len ) + { + if ( $len > length $record->{ "SEQ" } ) { + $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg; + } else { + $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg, $len; + } + } + elsif ( defined $beg ) + { + $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg; + } + + $record->{ "SEQ_LEN" } = length $record->{ "SEQ" }; } Maasha::Biopieces::put_record( $record, $out ); diff --git a/bp_bin/print_wiki b/bp_bin/print_wiki new file mode 100755 index 0000000..578aa8e --- /dev/null +++ b/bp_bin/print_wiki @@ -0,0 +1,82 @@ +#!/usr/bin/env perl -w + +# Copyright (C) 2007-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Prints a wiki page in ASCII format. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +use strict; +use Maasha::Biopieces; +use Maasha::Gwiki; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $run_time_beg, $run_time_end, $options, $default, $wiki, $lines ); + +$default = $ENV{ 'BP_DIR' } . "/bp_usage/" . ( split "/", $0 )[ -1 ]. ".wiki"; + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'data_in', short => 'i', type => 'file!', mandatory => 'no', default => $default, allowed => undef, disallowed => undef }, + ] +); + +$wiki = Maasha::Gwiki::gwiki_read( $options->{ 'data_in' } ); + +( $wiki->[ 2 ], $wiki->[ 3 ], $wiki->[ 0 ], $wiki->[ 1 ] ) = ( $wiki->[ 0 ], $wiki->[ 1 ], $wiki->[ 2 ], $wiki->[ 3 ] ); + +if ( not $options->{ "help" } ) { + @{ $wiki } = grep { $_->[ 0 ]->{ 'SECTION' } =~ /Biopiece|summary|Usage|Options|Help/ } @{ $wiki }; +} + +$lines = Maasha::Gwiki::gwiki2ascii( $wiki ); + +print STDERR "$_\n" foreach @{ $lines }; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + $run_time_beg = Maasha::Biopieces::run_time(); + + Maasha::Biopieces::log_biopiece(); +} + + +END +{ + $run_time_end = Maasha::Biopieces::run_time(); + + Maasha::Biopieces::run_time_print( $run_time_beg, $run_time_end, $options ); +} + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ + diff --git a/code_perl/Maasha/BioRun.pm b/code_perl/Maasha/BioRun.pm index 60b7717..a44bdb0 100644 --- a/code_perl/Maasha/BioRun.pm +++ b/code_perl/Maasha/BioRun.pm @@ -147,7 +147,6 @@ sub run_script elsif ( $script eq "transliterate_seq" ) { script_transliterate_seq( $in, $out, $options ) } elsif ( $script eq "transliterate_vals" ) { script_transliterate_vals( $in, $out, $options ) } elsif ( $script eq "translate_seq" ) { script_translate_seq( $in, $out, $options ) } - elsif ( $script eq "extract_seq" ) { script_extract_seq( $in, $out, $options ) } elsif ( $script eq "get_genome_align" ) { script_get_genome_align( $in, $out, $options ) } elsif ( $script eq "get_genome_phastcons" ) { script_get_genome_phastcons( $in, $out, $options ) } elsif ( $script eq "fold_seq" ) { script_fold_seq( $in, $out, $options ) } @@ -398,14 +397,6 @@ sub get_options frames|f=s ); } - elsif ( $script eq "extract_seq" ) - { - @options = qw( - beg|b=s - end|e=s - len|l=s - ); - } elsif ( $script eq "get_genome_align" ) { @options = qw( @@ -2400,70 +2391,6 @@ sub script_translate_seq } -sub script_extract_seq -{ - # Martin A. Hansen, August 2007. - - # Extract subsequences from sequences in record. - - my ( $in, # handle to in stream - $out, # handle to out stream - $options, # options hash - ) = @_; - - # Returns nothing. - - my ( $beg, $end, $len, $record ); - - if ( not defined $options->{ "beg" } or $options->{ "beg" } < 0 ) { - $beg = 0; - } else { - $beg = $options->{ "beg" } - 1; # correcting for start offset - } - - if ( defined $options->{ "end" } and $options->{ "end" } - 1 < $beg ) { - $end = $beg - 1; - } elsif ( defined $options->{ "end" } ) { - $end = $options->{ "end" } - 1; # correcting for start offset - } - - $len = $options->{ "len" }; - -# print "beg->$beg, end->$end, len->$len\n"; - - while ( $record = Maasha::Biopieces::get_record( $in ) ) - { - if ( $record->{ "SEQ" } ) - { - if ( defined $beg and defined $end ) - { - if ( $end - $beg + 1 > length $record->{ "SEQ" } ) { - $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg; - } else { - $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg, $end - $beg + 1; - } - } - elsif ( defined $beg and defined $len ) - { - if ( $len > length $record->{ "SEQ" } ) { - $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg; - } else { - $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg, $len; - } - } - elsif ( defined $beg ) - { - $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg; - } - } - - $record->{ "SEQ_LEN" } = length $record->{ "SEQ" }; - - Maasha::Biopieces::put_record( $record, $out ); - } -} - - sub script_get_genome_align { # Martin A. Hansen, April 2008. diff --git a/code_perl/Maasha/Biopieces.pm b/code_perl/Maasha/Biopieces.pm index b1c5fb0..44369a8 100644 --- a/code_perl/Maasha/Biopieces.pm +++ b/code_perl/Maasha/Biopieces.pm @@ -329,20 +329,25 @@ sub check_print_usage if ( $script ne 'print_wiki' ) { - # if ( exists $options{ 'help' } or not ( exists $options{ 'stream_in' } or exists $options{ 'data_in' } or not -t STDIN ) ) - if ( exists $options->{ 'help' } or ( scalar keys %{ $options } == 0 or exists $options->{ 'data_in' } or not -t STDIN ) ) + if ( exists $options->{ 'help' } or -t STDIN ) { - $wiki = $ENV{ 'BP_DIR' } . "/bp_usage/$script.wiki"; - - if ( exists $options->{ 'help' } ) { - `print_wiki --data_in=$wiki --help`; - } elsif ( $script =~ /^(list_biopieces|list_genomes)$/ ) { - return; - } else { - `print_wiki --data_in=$wiki`; + if ( not ( exists $options->{ 'stream_in' } or $options->{ 'data_in' } ) ) + { + if ( scalar keys %{ $options } <= 1 ) + { + $wiki = $ENV{ 'BP_DIR' } . "/bp_usage/$script.wiki"; + + if ( exists $options->{ 'help' } ) { + `print_wiki --data_in=$wiki --help`; + } elsif ( $script =~ /^(list_biopieces|list_genomes)$/ ) { + return; + } else { + `print_wiki --data_in=$wiki`; + } + + exit; + } } - - exit; } } }