From 3feec35ac9aa2f45b3d88a76b69228e6ecd070e9 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 15 Mar 2010 19:33:02 +0000 Subject: [PATCH] added N block switch to blat_seq git-svn-id: http://biopieces.googlecode.com/svn/trunk@905 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/blast_seq | 12 +++++----- bp_bin/blat_seq | 46 +++++++++++++++++--------------------- bp_bin/calc_bit_scores | 2 +- bp_bin/create_blast_index | 2 +- bp_bin/create_vmatch_index | 4 ++-- bp_bin/fold_seq | 6 ++--- bp_bin/patscan_seq | 2 +- bp_bin/translate_seq | 2 +- 8 files changed, 35 insertions(+), 41 deletions(-) diff --git a/bp_bin/blast_seq b/bp_bin/blast_seq index be2b816..3c4df0d 100755 --- a/bp_bin/blast_seq +++ b/bp_bin/blast_seq @@ -175,9 +175,9 @@ sub guess_database_type # Returns string; if ( -f $options->{ 'database' } . ".phr" ) { - return "protein"; + return "PROTEIN"; } else { - return "nucleotide"; + return "NUCLEOTIDE"; } } @@ -197,13 +197,13 @@ sub guess_program my ( $program ); - if ( $q_type ne "protein" and $s_type ne "protein" ) { + if ( $q_type ne "PROTEIN" and $s_type ne "PROTEIN" ) { $program = "blastn"; - } elsif ( $q_type eq "protein" and $s_type eq "protein" ) { + } elsif ( $q_type eq "PROTEIN" and $s_type eq "PROTEIN" ) { $program = "blastp"; - } elsif ( $q_type ne "protein" and $s_type eq "protein" ) { + } elsif ( $q_type ne "PROTEIN" and $s_type eq "PROTEIN" ) { $program = "blastx"; - } elsif ( $q_type eq "protein" and $s_type ne "protein" ) { + } elsif ( $q_type eq "PROTEIN" and $s_type ne "PROTEIN" ) { $program = "tblastn"; } diff --git a/bp_bin/blat_seq b/bp_bin/blat_seq index 0069302..1f327fa 100755 --- a/bp_bin/blat_seq +++ b/bp_bin/blat_seq @@ -1,6 +1,6 @@ #!/usr/bin/env perl -# Copyright (C) 2007-2009 Martin A. Hansen. +# Copyright (C) 2007-2010 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -43,16 +43,17 @@ my ( $options, $in, $out, $record, $blat_args, $subject_file, $query_file, $fh_i $options = Maasha::Biopieces::parse_options( [ - { long => 'database', short => 'd', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'genome', short => 'g', type => 'genome', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'fast_map', short => 'f', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'occ', short => 'c', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'intron_max', short => 'i', type => 'uint', mandatory => 'no', default => 750000, allowed => undef, disallowed => undef }, - { long => 'tile_size', short => 't', type => 'uint', mandatory => 'no', default => 11, allowed => undef, disallowed => 0 }, - { long => 'step_size', short => 's', type => 'uint', mandatory => 'no', default => 11, allowed => undef, disallowed => 0 }, - { long => 'min_identity', short => 'm', type => 'uint', mandatory => 'no', default => 90, allowed => undef, disallowed => 0 }, - { long => 'min_score', short => 'M', type => 'uint', mandatory => 'no', default => 0, allowed => undef, disallowed => undef }, - { long => 'one_off', short => 'o', type => 'uint', mandatory => 'no', default => 0, allowed => undef, disallowed => undef }, + { long => 'database', short => 'd', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'genome', short => 'g', type => 'genome', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'fast_map', short => 'f', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'occ', short => 'c', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'intron_max', short => 'i', type => 'uint', mandatory => 'no', default => 750000, allowed => undef, disallowed => undef }, + { long => 'tile_size', short => 't', type => 'uint', mandatory => 'no', default => 11, allowed => undef, disallowed => 0 }, + { long => 'step_size', short => 's', type => 'uint', mandatory => 'no', default => 11, allowed => undef, disallowed => 0 }, + { long => 'min_identity', short => 'm', type => 'uint', mandatory => 'no', default => 90, allowed => undef, disallowed => 0 }, + { long => 'min_score', short => 'M', type => 'uint', mandatory => 'no', default => 0, allowed => undef, disallowed => undef }, + { long => 'one_off', short => 'o', type => 'uint', mandatory => 'no', default => 0, allowed => undef, disallowed => undef }, + { long => 'allow_N_blocks', short => 'N', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, ] ); @@ -68,20 +69,15 @@ if ( $options->{ 'database' } ) { $subject_file = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/fasta/$options->{ 'genome' }.fna"; } -if ( $options->{ 'fast_map' } ) -{ - $blat_args .= " -fastMap"; -} -else -{ - $blat_args .= " -tileSize=$options->{ 'tile_size' }"; - $blat_args .= " -oneOff=$options->{ 'one_off' }"; - $blat_args .= " -minIdentity=$options->{ 'min_identity' }"; - $blat_args .= " -minScore=$options->{ 'min_score' }"; - $blat_args .= " -stepSize=$options->{ 'step_size' }"; - $blat_args .= " -maxIntron=$options->{ 'intron_max' }"; - # $blat_args .= " -ooc=" . Maasha::Config::genome_blat_ooc( $options->{ "genome" }, 11 ) if $options->{ 'ooc' }; -} +$blat_args .= " -tileSize=$options->{ 'tile_size' }"; +$blat_args .= " -oneOff=$options->{ 'one_off' }"; +$blat_args .= " -minIdentity=$options->{ 'min_identity' }"; +$blat_args .= " -minScore=$options->{ 'min_score' }"; +$blat_args .= " -stepSize=$options->{ 'step_size' }"; +$blat_args .= " -maxIntron=$options->{ 'intron_max' }"; +$blat_args .= " -fastMap" if $options->{ 'fast_map' }; +$blat_args .= " -extendThroughN" if $options->{ 'allow_N_blocks' }; +# $blat_args .= " -ooc=" . Maasha::Config::genome_blat_ooc( $options->{ "genome" }, 11 ) if $options->{ 'ooc' }; $tmp_dir = Maasha::Biopieces::get_tmpdir(); $query_file = "$tmp_dir/blat.seq"; diff --git a/bp_bin/calc_bit_scores b/bp_bin/calc_bit_scores index 4b930c3..6da3afb 100755 --- a/bp_bin/calc_bit_scores +++ b/bp_bin/calc_bit_scores @@ -69,7 +69,7 @@ while ( $record = Maasha::Biopieces::get_record( $in ) ) undef $record; -if ( $type eq "protein" ) { +if ( $type eq "PROTEIN" ) { $bit_max = 4; } else { $bit_max = 2; diff --git a/bp_bin/create_blast_index b/bp_bin/create_blast_index index 61f75ff..1d2b195 100755 --- a/bp_bin/create_blast_index +++ b/bp_bin/create_blast_index @@ -69,7 +69,7 @@ close $fh_tmp; $arg = "-t $options->{ 'index_name' }"; -if ( $seq_type =~ /protein/i ) { +if ( $seq_type eq "PROTEIN" ) { $arg .= " -p T"; } else { $arg .= " -p F"; diff --git a/bp_bin/create_vmatch_index b/bp_bin/create_vmatch_index index bf467b2..b5d4d0e 100755 --- a/bp_bin/create_vmatch_index +++ b/bp_bin/create_vmatch_index @@ -69,9 +69,9 @@ while ( $record = Maasha::Biopieces::get_record( $in ) ) close $fh_tmp; if ( $options->{ 'verbose' } ) { - Maasha::Common::run( "mkvtree", "-db $file_tmp -$type -pl -allout -indexname $options->{ 'index_name' }" ); + Maasha::Common::run( "mkvtree", "-db $file_tmp -" . ( lc $type ) . " -pl -allout -indexname $options->{ 'index_name' }" ); } else { - Maasha::Common::run( "mkvtree", "-db $file_tmp -$type -pl -allout -indexname $options->{ 'index_name' } > /dev/null 2>&1" ); + Maasha::Common::run( "mkvtree", "-db $file_tmp -" . ( lc $type ) . " -pl -allout -indexname $options->{ 'index_name' } > /dev/null 2>&1" ); } unlink $file_tmp; diff --git a/bp_bin/fold_seq b/bp_bin/fold_seq index 1207cfe..4543920 100755 --- a/bp_bin/fold_seq +++ b/bp_bin/fold_seq @@ -45,11 +45,9 @@ while ( $record = Maasha::Biopieces::get_record( $in ) ) { if ( $record->{ "SEQ" } ) { - if ( not $type ) { - $type = Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ); - } + $type = Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $type; - if ( $type ne "protein" ) + if ( $type ne "PROTEIN" ) { ( $struct, $index ) = Maasha::Seq::fold_struct_rnafold( $record->{ "SEQ" } ); $record->{ "SEC_STRUCT" } = $struct; diff --git a/bp_bin/patscan_seq b/bp_bin/patscan_seq index 28cb585..ca4fc98 100755 --- a/bp_bin/patscan_seq +++ b/bp_bin/patscan_seq @@ -98,7 +98,7 @@ else close $fh_out; - $arg .= " -p" if $type eq "protein"; + $arg .= " -p" if $type eq "PROTEIN"; } foreach $pattern ( @{ $patterns } ) diff --git a/bp_bin/translate_seq b/bp_bin/translate_seq index d62188a..3af0521 100755 --- a/bp_bin/translate_seq +++ b/bp_bin/translate_seq @@ -50,7 +50,7 @@ while ( $record = Maasha::Biopieces::get_record( $in ) ) { if ( $record->{ "SEQ" } ) { - if ( Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) =~ /DNA/i ) + if ( Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) eq "DNA" ) { foreach $frame ( @{ $options->{ "frames" } } ) { -- 2.39.5