]> git.donarmstrong.com Git - biopieces.git/commitdiff
added blast_seq_pair
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 14 Jun 2011 08:24:18 +0000 (08:24 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 14 Jun 2011 08:24:18 +0000 (08:24 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1470 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/blast_seq_pair [new file with mode: 0755]

diff --git a/bp_bin/blast_seq_pair b/bp_bin/blast_seq_pair
new file mode 100755 (executable)
index 0000000..0bf097a
--- /dev/null
@@ -0,0 +1,203 @@
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Run Uclust on sequences in the stream.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+require 'maasha/biopieces'
+require 'maasha/fasta'
+
+class Blast
+  include Enumerable
+
+  def initialize(infile1, infile2, outfile)
+    @infile1 = infile1
+    @infile2 = infile2
+    @outfile = outfile
+    @program = ""
+  end
+
+  # Method to choose the appropriate program, unless already
+  # specified, for the given data types.
+  def program_set(type1, type2, program)
+    if program.nil?
+      @program = program_choose(type1, type2)
+    else
+      @program = program
+    end
+  end
+
+  # Method to run BLAST with given options.
+  def run(options)
+    commands = []
+    commands << "nice -n 19"
+    commands << "bl2seq"
+    commands << "-i #{@infile1}"
+    commands << "-j #{@infile2}"
+    commands << "-o #{@outfile}"
+    commands << "-p #{@program}"
+    commands << "-D 1" # tabular output
+    commands << "-e #{options[:e_val]}"
+    commands << (options[:megablast] ? "-m T" : "-m F")
+    commands << (options[:filter] == 'yes'.to_sym ? "-F T" : "-F F") #  TODO fix this to_sym
+    commands << (options[:no_gaps] ? "-g F" : "-g T")
+    commands << "> /dev/null 2>&1" unless options[:verbose]
+
+               command = commands.join(" ")
+    $stderr.puts command if options[:verbose]
+    system(command)
+    raise "Command failed: #{command}" unless $?.success?
+  end
+
+       # Method to parse a BLAST tabular output and for each line of data
+       # yield a Biopiece record.
+  def each
+    record = {}
+
+    File.open(@outfile, mode="r") do |ios|
+      ios.each_line do |line|
+        if line !~ /^#/
+          fields = line.chomp.split("\t")
+
+          record[:REC_TYPE]   = "BLAST"
+          record[:Q_ID]       = fields[0]
+          record[:S_ID]       = fields[1]
+          record[:IDENT]      = fields[2].to_f
+          record[:ALIGN_LEN]  = fields[3].to_i
+          record[:MISMATCHES] = fields[4].to_i
+          record[:GAPS]       = fields[5].to_i
+          record[:Q_BEG]      = fields[6].to_i - 1
+          record[:Q_END]      = fields[7].to_i - 1
+          record[:S_BEG]      = fields[8].to_i - 1
+          record[:S_END]      = fields[9].to_i - 1
+          record[:E_VAL]      = fields[10].to_f
+          record[:BIT_SCORE]  = fields[11].to_f
+
+          if record[:S_BEG] > record[:S_END]
+            record[:STRAND] = '-'
+
+            record[:S_BEG], record[:S_END] = record[:S_END], record[:S_BEG]
+          else
+            record[:STRAND] = '+'
+          end
+
+          yield record
+        end
+      end
+    end
+
+    self # conventionally
+  end
+
+  private
+
+  # Method that chose the correct BLAST program to work
+  # with two given types of data.
+  def program_choose(type1, type2)
+    program = ""
+
+    if type1 == 'protein'
+      if type2 == 'protein'
+        program = 'blastp'
+      else
+        program = 'tblastn'
+      end
+    else
+      if type2 == 'protein'
+        program = 'blastx'
+      else
+        program = 'blastn'
+      end
+    end
+
+    program
+  end
+end
+
+ok_programs = "blastn,blastp,tblastn,blastx,tblastx"
+
+casts = []
+casts << {:long=>'program',   :short=>'p', :type=>'string', :mandatory=>false, :default=>nil,  :allowed=>ok_programs, :disallowed=>nil}
+casts << {:long=>'e_val',     :short=>'e', :type=>'float',  :mandatory=>false, :default=>10,   :allowed=>nil,         :disallowed=>nil}
+casts << {:long=>'filter',    :short=>'f', :type=>'string', :mandatory=>false, :default=>'no', :allowed=>'yes,no',    :disallowed=>nil}
+casts << {:long=>'megablast', :short=>'m', :type=>'flag',   :mandatory=>false, :default=>nil,  :allowed=>nil,         :disallowed=>nil}
+casts << {:long=>'no_gaps',   :short=>'G', :type=>'flag',   :mandatory=>false, :default=>nil,  :allowed=>nil,         :disallowed=>nil}
+
+options = Biopieces.options_parse(ARGV, casts)
+
+tmpdir  = Biopieces.mktmpdir
+infile1 = File.join(tmpdir, "in1.fna")
+infile2 = File.join(tmpdir, "in2.fna")
+outfile = File.join(tmpdir, "blast.out")
+
+got1    = false
+got2    = false
+type1   = ""
+type2   = ""
+
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+  input.each_record do |record|
+    output.puts record
+
+    if record.has_key? :SEQ_NAME and record.has_key? :SEQ
+      unless got1
+        Fasta.open(infile1, mode="w") do |fasta_io|
+          fasta_io.puts record
+        end
+
+        got1  = true
+        type1 = Seq.new(nil, record[:SEQ][0 ... 100]).type_guess
+      end
+
+      unless got2
+        Fasta.open(infile2, mode="w") do |fasta_io|
+          fasta_io.puts record
+        end
+
+        got2  = true
+        type2 = Seq.new(nil, record[:SEQ][0 ... 100]).type_guess
+      end
+    end
+  end
+
+  if got1 and got2
+    blast = Blast.new(infile1, infile2, outfile)
+    blast.program_set(type1, type2, options[:program])
+    blast.run(options)
+
+    blast.each do |record|
+      output.puts record
+    end
+  end
+end
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__