-#!/usr/bin/env perl
+#!/usr/bin/env perl -w
+
+# Copyright (C) 2007-2009 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Read BLAST records in tabular format from one or more files.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
-use warnings;
use strict;
+use Maasha::Biopieces;
+use Maasha::Filesys;
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+my ( $run_time_beg, $run_time_end, $options, $in, $out, $file, $record, $line, @fields, $data_in, $num );
+
+$options = Maasha::Biopieces::parse_options(
+ [
+ { long => 'data_in', short => 'i', type => 'files!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ { long => 'num', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => '0' },
+ ]
+);
+
+$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
+$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
+
+while ( $record = Maasha::Biopieces::get_record( $in ) ) {
+ Maasha::Biopieces::put_record( $record, $out );
+}
+
+if ( $options->{ 'data_in' } )
+{
+ $num = 1;
+
+ $data_in = Maasha::Filesys::files_read_open( $options->{ 'data_in' } );
+
+ while ( $line = <$data_in> )
+ {
+ chomp $line;
+
+ next if $line =~ /^#/;
+
+ @fields = split /\t/, $line;
+
+ $record->{ "REC_TYPE" } = "BLAST";
+ $record->{ "Q_ID" } = $fields[ 0 ];
+ $record->{ "S_ID" } = $fields[ 1 ];
+ $record->{ "IDENT" } = $fields[ 2 ];
+ $record->{ "ALIGN_LEN" } = $fields[ 3 ];
+ $record->{ "MISMATCHES" } = $fields[ 4 ];
+ $record->{ "GAPS" } = $fields[ 5 ];
+ $record->{ "Q_BEG" } = $fields[ 6 ] - 1; # BLAST is 1-based
+ $record->{ "Q_END" } = $fields[ 7 ] - 1; # BLAST is 1-based
+ $record->{ "S_BEG" } = $fields[ 8 ] - 1; # BLAST is 1-based
+ $record->{ "S_END" } = $fields[ 9 ] - 1; # BLAST is 1-based
+ $record->{ "E_VAL" } = $fields[ 10 ];
+ $record->{ "BIT_SCORE" } = $fields[ 11 ];
+
+ if ( $record->{ "S_BEG" } > $record->{ "S_END" } )
+ {
+ $record->{ "STRAND" } = '-';
+
+ ( $record->{ "S_BEG" }, $record->{ "S_END" } ) = ( $record->{ "S_END" }, $record->{ "S_BEG" } );
+ }
+ else
+ {
+ $record->{ "STRAND" } = '+';
+ }
+
+ Maasha::Biopieces::put_record( $record, $out );
+
+ last if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $data_in;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+BEGIN
+{
+ $run_time_beg = Maasha::Biopieces::run_time();
+
+ Maasha::Biopieces::log_biopiece();
+}
+
+END
+{
+ Maasha::Biopieces::close_stream( $in );
+ Maasha::Biopieces::close_stream( $out );
+
+ $run_time_end = Maasha::Biopieces::run_time();
+
+ Maasha::Biopieces::run_time_print( $run_time_beg, $run_time_end, $options );
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
-use Maasha::BioRun;
+__END__
-#!/usr/bin/env perl
+#!/usr/bin/env perl -w
+
+# Copyright (C) 2007-2009 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Read EMBL entries from one or more files.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
-use warnings;
use strict;
+use Maasha::Biopieces;
+use Maasha::Filesys;
+use Maasha::EMBL;
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+my ( $run_time_beg, $run_time_end, $options, $in, $out, %options2, $file, $data_in, $num, $entry, $record );
+
+$options = Maasha::Biopieces::parse_options(
+ [
+ { long => 'data_in', short => 'i', type => 'files!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ { long => 'num', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => '0' },
+ { long => 'keys', short => 'k', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ { long => 'feats', short => 'f', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ { long => 'quals', short => 'q', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ ]
+);
+
+$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
+$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
+
+map { $options2{ "keys" }{ $_ } = 1 } @{ $options->{ "keys" } };
+map { $options2{ "feats" }{ $_ } = 1 } @{ $options->{ "feats" } };
+map { $options2{ "quals" }{ $_ } = 1 } @{ $options->{ "quals" } };
+
+while ( $record = Maasha::Biopieces::get_record( $in ) ) {
+ Maasha::Biopieces::put_record( $record, $out );
+}
+
+if ( $options->{ 'data_in' } )
+{
+ $data_in = Maasha::Filesys::files_read_open( $options->{ 'data_in' } );
+
+ $num = 1;
+
+ while ( $entry = Maasha::EMBL::get_embl_entry( $data_in ) )
+ {
+ $record = Maasha::EMBL::parse_embl_entry( $entry, \%options2 );
+
+ my ( $feat, $feat2, $qual, $qual_val, $record_copy );
+
+ $record_copy = dclone $record;
+
+ delete $record_copy->{ "FT" };
+
+ Maasha::Biopieces::put_record( $record_copy, $out );
+
+ delete $record_copy->{ "SEQ" };
+
+ foreach $feat ( keys %{ $record->{ "FT" } } )
+ {
+ $record_copy->{ "FEAT_TYPE" } = $feat;
+
+ foreach $feat2 ( @{ $record->{ "FT" }->{ $feat } } )
+ {
+ foreach $qual ( keys %{ $feat2 } )
+ {
+ $qual_val = join "; ", @{ $feat2->{ $qual } };
+
+ $qual =~ s/^_//;
+ $qual = uc $qual;
+
+ $record_copy->{ $qual } = $qual_val;
+ }
+
+ Maasha::Biopieces::put_record( $record_copy, $out );
+ }
+ }
+
+ last if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $data_in;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+BEGIN
+{
+ $run_time_beg = Maasha::Biopieces::run_time();
+
+ Maasha::Biopieces::log_biopiece();
+}
+
+END
+{
+ Maasha::Biopieces::close_stream( $in );
+ Maasha::Biopieces::close_stream( $out );
+
+ $run_time_end = Maasha::Biopieces::run_time();
+
+ Maasha::Biopieces::run_time_print( $run_time_beg, $run_time_end, $options );
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
-use Maasha::BioRun;
+__END__
if ( $script eq "print_usage" ) { script_print_usage( $in, $out, $options ) }
elsif ( $script eq "read_psl" ) { script_read_psl( $in, $out, $options ) }
elsif ( $script eq "read_fixedstep" ) { script_read_fixedstep( $in, $out, $options ) }
- elsif ( $script eq "read_blast_tab" ) { script_read_blast_tab( $in, $out, $options ) }
- elsif ( $script eq "read_embl" ) { script_read_embl( $in, $out, $options ) }
elsif ( $script eq "read_stockholm" ) { script_read_stockholm( $in, $out, $options ) }
elsif ( $script eq "read_phastcons" ) { script_read_phastcons( $in, $out, $options ) }
elsif ( $script eq "read_soft" ) { script_read_soft( $in, $out, $options ) }
num|n=s
);
}
- elsif ( $script eq "read_blast_tab" )
- {
- @options = qw(
- data_in|i=s
- num|n=s
- );
- }
- elsif ( $script eq "read_embl" )
- {
- @options = qw(
- data_in|i=s
- num|n=s
- keys|k=s
- feats|f=s
- quals|q=s
- );
- }
elsif ( $script eq "read_stockholm" )
{
@options = qw(
}
-sub script_read_blast_tab
-{
- # Martin A. Hansen, September 2007.
-
- # Read tabular BLAST output from NCBI blast run with -m8 or -m9.
-
- my ( $in, # handle to in stream
- $out, # handle to out stream
- $options, # options hash
- ) = @_;
-
- # Returns nothing.
-
- my ( $file, $line, @fields, $strand, $record, $data_in, $num );
-
- while ( $record = Maasha::Biopieces::get_record( $in ) ) {
- Maasha::Biopieces::put_record( $record, $out );
- }
-
- $num = 1;
-
- foreach $file ( @{ $options->{ "files" } } )
- {
- $data_in = Maasha::Common::read_open( $file );
-
- while ( $line = <$data_in> )
- {
- chomp $line;
-
- next if $line =~ /^#/;
-
- @fields = split /\t/, $line;
-
- $record->{ "REC_TYPE" } = "BLAST";
- $record->{ "Q_ID" } = $fields[ 0 ];
- $record->{ "S_ID" } = $fields[ 1 ];
- $record->{ "IDENT" } = $fields[ 2 ];
- $record->{ "ALIGN_LEN" } = $fields[ 3 ];
- $record->{ "MISMATCHES" } = $fields[ 4 ];
- $record->{ "GAPS" } = $fields[ 5 ];
- $record->{ "Q_BEG" } = $fields[ 6 ] - 1; # BLAST is 1-based
- $record->{ "Q_END" } = $fields[ 7 ] - 1; # BLAST is 1-based
- $record->{ "S_BEG" } = $fields[ 8 ] - 1; # BLAST is 1-based
- $record->{ "S_END" } = $fields[ 9 ] - 1; # BLAST is 1-based
- $record->{ "E_VAL" } = $fields[ 10 ];
- $record->{ "BIT_SCORE" } = $fields[ 11 ];
-
- if ( $record->{ "S_BEG" } > $record->{ "S_END" } )
- {
- $record->{ "STRAND" } = '-';
-
- ( $record->{ "S_BEG" }, $record->{ "S_END" } ) = ( $record->{ "S_END" }, $record->{ "S_BEG" } );
- }
- else
- {
- $record->{ "STRAND" } = '+';
- }
-
- Maasha::Biopieces::put_record( $record, $out );
-
- goto NUM if $options->{ "num" } and $num == $options->{ "num" };
-
- $num++;
- }
-
- close $data_in;
- }
-
- NUM:
-
- close $data_in if $data_in;
-}
-
-
-sub script_read_embl
-{
- # Martin A. Hansen, August 2007.
-
- # Read EMBL format.
-
- my ( $in, # handle to in stream
- $out, # handle to out stream
- $options, # options hash
- ) = @_;
-
- # Returns nothing.
-
- my ( %options2, $file, $data_in, $num, $entry, $record );
-
- map { $options2{ "keys" }{ $_ } = 1 } @{ $options->{ "keys" } };
- map { $options2{ "feats" }{ $_ } = 1 } @{ $options->{ "feats" } };
- map { $options2{ "quals" }{ $_ } = 1 } @{ $options->{ "quals" } };
-
- while ( $record = Maasha::Biopieces::get_record( $in ) ) {
- Maasha::Biopieces::put_record( $record, $out );
- }
-
- $num = 1;
-
- foreach $file ( @{ $options->{ "files" } } )
- {
- $data_in = Maasha::Common::read_open( $file );
-
- while ( $entry = Maasha::EMBL::get_embl_entry( $data_in ) )
- {
- $record = Maasha::EMBL::parse_embl_entry( $entry, \%options2 );
-
- my ( $feat, $feat2, $qual, $qual_val, $record_copy );
-
- $record_copy = dclone $record;
-
- delete $record_copy->{ "FT" };
-
- Maasha::Biopieces::put_record( $record_copy, $out );
-
- delete $record_copy->{ "SEQ" };
-
- foreach $feat ( keys %{ $record->{ "FT" } } )
- {
- $record_copy->{ "FEAT_TYPE" } = $feat;
-
- foreach $feat2 ( @{ $record->{ "FT" }->{ $feat } } )
- {
- foreach $qual ( keys %{ $feat2 } )
- {
- $qual_val = join "; ", @{ $feat2->{ $qual } };
-
- $qual =~ s/^_//;
- $qual = uc $qual;
-
- $record_copy->{ $qual } = $qual_val;
- }
-
- Maasha::Biopieces::put_record( $record_copy, $out );
- }
- }
-
- goto NUM if $options->{ "num" } and $num == $options->{ "num" };
-
- $num++;
- }
-
- close $data_in;
- }
-
- NUM:
-
- close $data_in if $data_in;
-}
-
-
sub script_read_stockholm
{
# Martin A. Hansen, August 2007.