X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fblast_seq;h=3feaa655d8837aa125d316754eacc54635026178;hb=4c74f0aa5b9a572a2552767d4c4c0c15166e092a;hp=de78c35d4a2c1419027617068e2257f1c8f8452d;hpb=68b0d2096e200e63bb06f1e20c04761ddb9bc68b;p=biopieces.git diff --git a/bp_bin/blast_seq b/bp_bin/blast_seq index de78c35..3feaa65 100755 --- a/bp_bin/blast_seq +++ b/bp_bin/blast_seq @@ -1,4 +1,4 @@ -#!/usr/bin/env perl -w +#!/usr/bin/env perl # Copyright (C) 2007-2009 Martin A. Hansen. @@ -26,6 +26,7 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +use warnings; use strict; use Maasha::Biopieces; use Maasha::Common; @@ -37,16 +38,23 @@ use Maasha::Fasta; my ( $options, $in, $out, $tmp_dir, $tmp_in, $tmp_out, $q_type, $s_type, $record, $entry, - $fh_in, $fh_out, $program ); + $fh_in, $fh_out, $progs_ok, $program ); + +$progs_ok = 'blastn,blastp,tblastn,blastx,tblastx'; $options = Maasha::Biopieces::parse_options( [ - { long => 'database', short => 'd', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'genome', short => 'g', type => 'genome', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'program', short => 'p', type => 'string', mandatory => 'no', default => undef, allowed => 'blastn,blastp,tblastn,blastx,tblastx', disallowed => undef }, - { long => 'e_val', short => 'e', type => 'float', mandatory => 'no', default => 10, allowed => undef, disallowed => undef }, - { long => 'filter', short => 'f', type => 'string', mandatory => 'no', default => 'no', allowed => 'yes,no', disallowed => undef }, - { long => 'cpus', short => 'c', type => 'uint', mandatory => 'no', default => 1, allowed => undef, disallowed => 0 }, + { long => 'database', short => 'd', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'genome', short => 'g', type => 'genome', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'program', short => 'p', type => 'string', mandatory => 'no', default => undef, allowed => $progs_ok, disallowed => undef }, + { long => 'e_val', short => 'e', type => 'float', mandatory => 'no', default => 10, allowed => undef, disallowed => undef }, + { long => 'filter', short => 'f', type => 'string', mandatory => 'no', default => 'no', allowed => 'yes,no', disallowed => undef }, + { long => 'cpus', short => 'c', type => 'uint', mandatory => 'no', default => 1, allowed => undef, disallowed => 0 }, + { long => 'no_gaps', short => 'G', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'megablast', short => 'm', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'extend_threshold', short => 'E', type => 'uint', mandatory => 'no', default => 0, allowed => undef, disallowed => undef }, + { long => 'word_size', short => 'W', type => 'uint', mandatory => 'no', default => 0, allowed => undef, disallowed => undef }, + { long => 'single_hit', short => 's', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, ] ); @@ -88,6 +96,24 @@ if ( $options->{ 'filter' } eq 'yes' ) { $options->{ 'filter' } = 'F'; } +if ( $options->{ 'no_gaps' } ) { + $options->{ 'gapped' } = 'F'; +} else { + $options->{ 'gapped' } = 'T'; +} + +if ( $options->{ 'megablast' } ) { + $options->{ 'megablast' } = 'T'; +} else { + $options->{ 'megablast' } = 'F'; +} + +if ( $options->{ 'single_hit' } ) { + $options->{ 'single_hit' } = 1 +} else { + $options->{ 'single_hit' } = 0 +} + if ( $options->{ 'verbose' } ) { Maasha::Common::run( @@ -96,13 +122,17 @@ if ( $options->{ 'verbose' } ) "-p $program", "-e $options->{ 'e_val' }", "-a $options->{ 'cpus' }", + "-g $options->{ 'gapped' }", + "-n $options->{ 'megablast' }", "-m 8", "-i $tmp_in", "-d $options->{ 'database' }", "-F $options->{ 'filter' }", + "-P $options->{ 'single_hit' }", + "-W $options->{ 'word_size' }", + "-f $options->{ 'extend_threshold' }", "-o $tmp_out", - ), - 1 + ) ); } else @@ -117,10 +147,12 @@ else "-i $tmp_in", "-d $options->{ 'database' }", "-F $options->{ 'filter' }", + "-P $options->{ 'single_hit' }", + "-W $options->{ 'word_size' }", + "-f $options->{ 'extend_threshold' }", "-o $tmp_out", "> /dev/null 2>&1" - ), - 1 + ) ); } @@ -155,10 +187,10 @@ sub guess_database_type # a .phr file exists. # Returns string; - if ( -f $options->{ 'database' } . ".phr" ) { - return "protein"; + if ( -f $options->{ 'database' } . ".phr" or -f $options->{ 'database' } . ".pal" ) { + return "PROTEIN"; } else { - return "nucleotide"; + return "NUCLEOTIDE"; } } @@ -178,13 +210,13 @@ sub guess_program my ( $program ); - if ( $q_type ne "protein" and $s_type ne "protein" ) { + if ( $q_type ne "PROTEIN" and $s_type ne "PROTEIN" ) { $program = "blastn"; - } elsif ( $q_type eq "protein" and $s_type eq "protein" ) { + } elsif ( $q_type eq "PROTEIN" and $s_type eq "PROTEIN" ) { $program = "blastp"; - } elsif ( $q_type ne "protein" and $s_type eq "protein" ) { + } elsif ( $q_type ne "PROTEIN" and $s_type eq "PROTEIN" ) { $program = "blastx"; - } elsif ( $q_type eq "protein" and $s_type ne "protein" ) { + } elsif ( $q_type eq "PROTEIN" and $s_type ne "PROTEIN" ) { $program = "tblastn"; }