X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fblast_seq;h=3feaa655d8837aa125d316754eacc54635026178;hb=5de6112b70b59420b245ce636a8b2e3c90acbe00;hp=1823216f230a2188c10f7b7c2c0d01ddb35c8704;hpb=fc6aa2a75a62c9da33195bbdf058c425235fe00c;p=biopieces.git diff --git a/bp_bin/blast_seq b/bp_bin/blast_seq index 1823216..3feaa65 100755 --- a/bp_bin/blast_seq +++ b/bp_bin/blast_seq @@ -1,4 +1,4 @@ -#!/usr/bin/env perl -w +#!/usr/bin/env perl # Copyright (C) 2007-2009 Martin A. Hansen. @@ -26,6 +26,7 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +use warnings; use strict; use Maasha::Biopieces; use Maasha::Common; @@ -36,17 +37,24 @@ use Maasha::Fasta; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -my ( $run_time_beg, $run_time_end, $options, $in, $out, $tmp_dir, $tmp_in, $tmp_out, $q_type, $s_type, $record, $entry, - $fh_in, $fh_out, $program ); +my ( $options, $in, $out, $tmp_dir, $tmp_in, $tmp_out, $q_type, $s_type, $record, $entry, + $fh_in, $fh_out, $progs_ok, $program ); + +$progs_ok = 'blastn,blastp,tblastn,blastx,tblastx'; $options = Maasha::Biopieces::parse_options( [ - { long => 'database', short => 'd', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'genome', short => 'g', type => 'genome', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'program', short => 'p', type => 'string', mandatory => 'no', default => undef, allowed => 'blastn,blastp,tblastn,blastx,tblastx', disallowed => undef }, - { long => 'e_val', short => 'e', type => 'float', mandatory => 'no', default => 10, allowed => undef, disallowed => undef }, - { long => 'filter', short => 'f', type => 'string', mandatory => 'no', default => 'no', allowed => 'yes,no', disallowed => undef }, - { long => 'cpus', short => 'c', type => 'uint', mandatory => 'no', default => 1, allowed => undef, disallowed => 0 }, + { long => 'database', short => 'd', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'genome', short => 'g', type => 'genome', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'program', short => 'p', type => 'string', mandatory => 'no', default => undef, allowed => $progs_ok, disallowed => undef }, + { long => 'e_val', short => 'e', type => 'float', mandatory => 'no', default => 10, allowed => undef, disallowed => undef }, + { long => 'filter', short => 'f', type => 'string', mandatory => 'no', default => 'no', allowed => 'yes,no', disallowed => undef }, + { long => 'cpus', short => 'c', type => 'uint', mandatory => 'no', default => 1, allowed => undef, disallowed => 0 }, + { long => 'no_gaps', short => 'G', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'megablast', short => 'm', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'extend_threshold', short => 'E', type => 'uint', mandatory => 'no', default => 0, allowed => undef, disallowed => undef }, + { long => 'word_size', short => 'W', type => 'uint', mandatory => 'no', default => 0, allowed => undef, disallowed => undef }, + { long => 'single_hit', short => 's', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, ] ); @@ -88,6 +96,24 @@ if ( $options->{ 'filter' } eq 'yes' ) { $options->{ 'filter' } = 'F'; } +if ( $options->{ 'no_gaps' } ) { + $options->{ 'gapped' } = 'F'; +} else { + $options->{ 'gapped' } = 'T'; +} + +if ( $options->{ 'megablast' } ) { + $options->{ 'megablast' } = 'T'; +} else { + $options->{ 'megablast' } = 'F'; +} + +if ( $options->{ 'single_hit' } ) { + $options->{ 'single_hit' } = 1 +} else { + $options->{ 'single_hit' } = 0 +} + if ( $options->{ 'verbose' } ) { Maasha::Common::run( @@ -96,13 +122,17 @@ if ( $options->{ 'verbose' } ) "-p $program", "-e $options->{ 'e_val' }", "-a $options->{ 'cpus' }", + "-g $options->{ 'gapped' }", + "-n $options->{ 'megablast' }", "-m 8", "-i $tmp_in", "-d $options->{ 'database' }", "-F $options->{ 'filter' }", + "-P $options->{ 'single_hit' }", + "-W $options->{ 'word_size' }", + "-f $options->{ 'extend_threshold' }", "-o $tmp_out", - ), - 1 + ) ); } else @@ -117,10 +147,12 @@ else "-i $tmp_in", "-d $options->{ 'database' }", "-F $options->{ 'filter' }", + "-P $options->{ 'single_hit' }", + "-W $options->{ 'word_size' }", + "-f $options->{ 'extend_threshold' }", "-o $tmp_out", "> /dev/null 2>&1" - ), - 1 + ) ); } @@ -139,7 +171,8 @@ close $fh_out; unlink $tmp_out; -Maasha::Filesys::dir_remove( $tmp_dir ); +Maasha::Biopieces::close_stream( $in ); +Maasha::Biopieces::close_stream( $out ); # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @@ -154,10 +187,10 @@ sub guess_database_type # a .phr file exists. # Returns string; - if ( -f $options->{ 'database' } . ".phr" ) { - return "protein"; + if ( -f $options->{ 'database' } . ".phr" or -f $options->{ 'database' } . ".pal" ) { + return "PROTEIN"; } else { - return "nucleotide"; + return "NUCLEOTIDE"; } } @@ -177,13 +210,13 @@ sub guess_program my ( $program ); - if ( $q_type ne "protein" and $s_type ne "protein" ) { + if ( $q_type ne "PROTEIN" and $s_type ne "PROTEIN" ) { $program = "blastn"; - } elsif ( $q_type eq "protein" and $s_type eq "protein" ) { + } elsif ( $q_type eq "PROTEIN" and $s_type eq "PROTEIN" ) { $program = "blastp"; - } elsif ( $q_type ne "protein" and $s_type eq "protein" ) { + } elsif ( $q_type ne "PROTEIN" and $s_type eq "PROTEIN" ) { $program = "blastx"; - } elsif ( $q_type eq "protein" and $s_type ne "protein" ) { + } elsif ( $q_type eq "PROTEIN" and $s_type ne "PROTEIN" ) { $program = "tblastn"; } @@ -211,8 +244,6 @@ sub get_tab_entry @fields = split /\s+/, $line; - use Data::Dumper; - return wantarray ? @fields : \@fields; } @@ -268,19 +299,13 @@ sub blast_tab2biopiece BEGIN { - $run_time_beg = Maasha::Biopieces::run_time(); - - Maasha::Biopieces::log_biopiece(); + Maasha::Biopieces::status_set(); } + END { - Maasha::Biopieces::close_stream( $in ); - Maasha::Biopieces::close_stream( $out ); - - $run_time_end = Maasha::Biopieces::run_time(); - - Maasha::Biopieces::run_time_print( $run_time_beg, $run_time_end, $options ); + Maasha::Biopieces::status_log(); } @@ -288,4 +313,3 @@ END __END__ -