3 # Copyright (C) 2007-2011 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
21 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
23 # This program is part of the Biopieces framework (www.biopieces.org).
25 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
27 # Run Uclust on sequences in the stream.
29 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
32 require 'maasha/biopieces'
33 require 'maasha/fasta'
38 def initialize(infile1, infile2, outfile)
45 # Method to choose the appropriate program, unless already
46 # specified, for the given data types.
47 def program_set(type1, type2, program)
49 @program = program_choose(type1, type2)
55 # Method to run BLAST with given options.
58 commands << "nice -n 19"
60 commands << "-i #{@infile1}"
61 commands << "-j #{@infile2}"
62 commands << "-o #{@outfile}"
63 commands << "-p #{@program}"
64 commands << "-W #{options[:word_size]}" if options[:word_size]
65 commands << "-D 1" # tabular output
66 commands << "-e #{options[:e_val]}"
67 commands << (options[:megablast] ? "-m T" : "-m F")
68 commands << (options[:filter] == 'yes'.to_sym ? "-F T" : "-F F") # TODO fix this to_sym
69 commands << (options[:no_gaps] ? "-g F" : "-g T")
70 commands << "> /dev/null 2>&1" unless options[:verbose]
72 command = commands.join(" ")
73 $stderr.puts command if options[:verbose]
75 raise "Command failed: #{command}" unless $?.success?
78 # Method to parse a BLAST tabular output and for each line of data
79 # yield a Biopiece record.
83 File.open(@outfile, 'r') do |ios|
84 ios.each_line do |line|
86 fields = line.chomp.split("\t")
88 record[:REC_TYPE] = "BLAST"
89 record[:Q_ID] = fields[0]
90 record[:S_ID] = fields[1]
91 record[:IDENT] = fields[2].to_f
92 record[:ALIGN_LEN] = fields[3].to_i
93 record[:MISMATCHES] = fields[4].to_i
94 record[:GAPS] = fields[5].to_i
95 record[:Q_BEG] = fields[6].to_i - 1
96 record[:Q_END] = fields[7].to_i - 1
97 record[:S_BEG] = fields[8].to_i - 1
98 record[:S_END] = fields[9].to_i - 1
99 record[:E_VAL] = fields[10].to_f
100 record[:BIT_SCORE] = fields[11].to_f
102 if record[:S_BEG] > record[:S_END]
103 record[:STRAND] = '-'
105 record[:S_BEG], record[:S_END] = record[:S_END], record[:S_BEG]
107 record[:STRAND] = '+'
115 self # conventionally
120 # Method that chose the correct BLAST program to work
121 # with two given types of data.
122 def program_choose(type1, type2)
143 ok_programs = "blastn,blastp,tblastn,blastx,tblastx"
146 casts << {:long=>'program', :short=>'p', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>ok_programs, :disallowed=>nil}
147 casts << {:long=>'e_val', :short=>'e', :type=>'float', :mandatory=>false, :default=>10.0, :allowed=>nil, :disallowed=>nil}
148 casts << {:long=>'filter', :short=>'f', :type=>'string', :mandatory=>false, :default=>'no', :allowed=>'yes,no', :disallowed=>nil}
149 casts << {:long=>'megablast', :short=>'m', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
150 casts << {:long=>'no_gaps', :short=>'G', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
151 casts << {:long=>'word_size', :short=>'w', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
153 options = Biopieces.options_parse(ARGV, casts)
155 tmpdir = Biopieces.mktmpdir
156 infile1 = File.join(tmpdir, "in1.fna")
157 infile2 = File.join(tmpdir, "in2.fna")
158 outfile = File.join(tmpdir, "blast.out")
165 Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
166 input.each_record do |record|
169 if record[:SEQ_NAME] and record[:SEQ]
170 seq = Seq.new_bp(record)
173 Fasta.open(infile1, "w") do |fasta_io|
174 fasta_io.puts seq.to_fasta
178 type1 = Seq.new(seq: record[:SEQ][0 ... 100]).type_guess
180 Fasta.open(infile2, "w") do |fasta_io|
181 fasta_io.puts seq.to_fasta
185 type2 = Seq.new(seq: record[:SEQ][0 ... 100]).type_guess
189 blast = Blast.new(infile1, infile2, outfile)
190 blast.program_set(type1, type2, options[:program])
193 blast.each do |new_record|
194 output.puts new_record
206 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<