3 # Copyright (C) 2007-2011 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
21 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
23 # This program is part of the Biopieces framework (www.biopieces.org).
25 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
27 # Run Uclust on sequences in the stream.
29 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
32 require 'maasha/biopieces'
33 require 'maasha/fasta'
38 def initialize(infile1, infile2, outfile)
45 # Method to choose the appropriate program, unless already
46 # specified, for the given data types.
47 def program_set(type1, type2, program)
49 @program = program_choose(type1, type2)
55 # Method to run BLAST with given options.
58 commands << "nice -n 19"
60 commands << "-i #{@infile1}"
61 commands << "-j #{@infile2}"
62 commands << "-o #{@outfile}"
63 commands << "-p #{@program}"
64 commands << "-D 1" # tabular output
65 commands << "-e #{options[:e_val]}"
66 commands << (options[:megablast] ? "-m T" : "-m F")
67 commands << (options[:filter] == 'yes'.to_sym ? "-F T" : "-F F") # TODO fix this to_sym
68 commands << (options[:no_gaps] ? "-g F" : "-g T")
69 commands << "> /dev/null 2>&1" unless options[:verbose]
71 command = commands.join(" ")
72 $stderr.puts command if options[:verbose]
74 raise "Command failed: #{command}" unless $?.success?
77 # Method to parse a BLAST tabular output and for each line of data
78 # yield a Biopiece record.
82 File.open(@outfile, 'r') do |ios|
83 ios.each_line do |line|
85 fields = line.chomp.split("\t")
87 record[:REC_TYPE] = "BLAST"
88 record[:Q_ID] = fields[0]
89 record[:S_ID] = fields[1]
90 record[:IDENT] = fields[2].to_f
91 record[:ALIGN_LEN] = fields[3].to_i
92 record[:MISMATCHES] = fields[4].to_i
93 record[:GAPS] = fields[5].to_i
94 record[:Q_BEG] = fields[6].to_i - 1
95 record[:Q_END] = fields[7].to_i - 1
96 record[:S_BEG] = fields[8].to_i - 1
97 record[:S_END] = fields[9].to_i - 1
98 record[:E_VAL] = fields[10].to_f
99 record[:BIT_SCORE] = fields[11].to_f
101 if record[:S_BEG] > record[:S_END]
102 record[:STRAND] = '-'
104 record[:S_BEG], record[:S_END] = record[:S_END], record[:S_BEG]
106 record[:STRAND] = '+'
114 self # conventionally
119 # Method that chose the correct BLAST program to work
120 # with two given types of data.
121 def program_choose(type1, type2)
142 ok_programs = "blastn,blastp,tblastn,blastx,tblastx"
145 casts << {:long=>'program', :short=>'p', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>ok_programs, :disallowed=>nil}
146 casts << {:long=>'e_val', :short=>'e', :type=>'float', :mandatory=>false, :default=>10.0, :allowed=>nil, :disallowed=>nil}
147 casts << {:long=>'filter', :short=>'f', :type=>'string', :mandatory=>false, :default=>'no', :allowed=>'yes,no', :disallowed=>nil}
148 casts << {:long=>'megablast', :short=>'m', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
149 casts << {:long=>'no_gaps', :short=>'G', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
151 options = Biopieces.options_parse(ARGV, casts)
153 tmpdir = Biopieces.mktmpdir
154 infile1 = File.join(tmpdir, "in1.fna")
155 infile2 = File.join(tmpdir, "in2.fna")
156 outfile = File.join(tmpdir, "blast.out")
163 Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
164 input.each_record do |record|
167 if record[:SEQ_NAME] and record[:SEQ]
168 seq = Seq.new_bp(record)
171 Fasta.open(infile1, "w") do |fasta_io|
172 fasta_io.puts seq.to_fasta
176 type1 = Seq.new(nil, record[:SEQ][0 ... 100]).type_guess
178 Fasta.open(infile2, "w") do |fasta_io|
179 fasta_io.puts seq.to_fasta
183 type2 = Seq.new(nil, record[:SEQ][0 ... 100]).type_guess
187 blast = Blast.new(infile1, infile2, outfile)
188 blast.program_set(type1, type2, options[:program])
191 blast.each do |new_record|
192 output.puts new_record
204 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<