]> git.donarmstrong.com Git - biopieces.git/blob - bp_bin/hmmer_seq
fixed seq qual length check
[biopieces.git] / bp_bin / hmmer_seq
1 #!/usr/bin/env ruby
2
3 # Copyright (C) 2007-2012 Martin A. Hansen.
4
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
9
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 # GNU General Public License for more details.
14
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
19 # http://www.gnu.org/copyleft/gpl.html
20
21 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
22
23 # This program is part of the Biopieces framework (www.biopieces.org).
24
25 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
26
27 # HMMER search sequences in the stream against a specified database.
28
29 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
30
31 require 'pp'
32 require 'maasha/biopieces'
33 require 'maasha/fasta'
34
35 class Hash
36   def to_bp
37     record         = self
38     bp             = {}
39     bp[:REC_TYPE]  = "HMMER" 
40     bp[:S_ID]      = record[:target_name]
41     bp[:S_AC]      = record[:target_accession]
42     bp[:Q_ID]      = record[:query_name]
43     bp[:Q_AC]      = record[:query_accession]
44     bp[:E_VAL_SEQ] = record[:e_value_sequence]
45     bp[:SCORE_SEQ] = record[:score_sequence]
46     bp[:BIAS_SEQ]  = record[:bias_sequence]
47     bp[:E_VAL_DOM] = record[:e_value_domain]
48     bp[:SCORE_DOM] = record[:score_domain]
49     bp[:BIAS_DOM]  = record[:bias_domain]
50     bp[:EXP]       = record[:exp]
51     bp[:REG]       = record[:reg]
52     bp[:CLU]       = record[:clu]
53     bp[:OV]        = record[:ov]
54     bp[:ENV]       = record[:env]
55     bp[:DOM]       = record[:dom]
56     bp[:REP]       = record[:rep]
57     bp[:INC]       = record[:inc]
58     bp[:DESC]      = record[:description_of_target]
59
60     bp
61   end
62 end
63
64 class Hmmer
65   def initialize(output_file, options)
66     @output_file = output_file
67     @options     = options
68     @command     = []
69   end
70
71   def search_db(query_file)
72     @command << "hmmsearch"
73     @command << "--tblout #{@output_file}"
74     @command << "--cpu #{@options[:cpus]}"
75     @command << @options[:database]
76     @command << query_file
77
78     execute
79   end
80
81   def each
82     records = []
83
84     File.open(@output_file, "r") do |ios|
85
86       # target name    accession  query name           accession    E-value  score  bias   E-value  score  bias   exp reg
87       # clu  ov env dom rep inc description of target
88       ios.each_line do |line|
89         next if line[0] == '#'
90         fields = line.chomp.split(" ")
91
92         record = {}
93         record[:target_name]           = fields[0]
94         record[:target_accession]      = fields[1]
95         record[:query_name]            = fields[2]
96         record[:query_accession]       = fields[3]
97         record[:e_value_sequence]      = fields[4].to_f
98         record[:score_sequence]        = fields[5].to_f
99         record[:bias_sequence]         = fields[6].to_f
100         record[:e_value_domain]        = fields[7].to_f
101         record[:score_domain]          = fields[8].to_f
102         record[:bias_domain]           = fields[9].to_f
103         record[:exp]                   = fields[10].to_f
104         record[:reg]                   = fields[11].to_i
105         record[:clu]                   = fields[12].to_i
106         record[:ov]                    = fields[13].to_i
107         record[:env]                   = fields[14].to_i
108         record[:dom]                   = fields[15].to_i
109         record[:rep]                   = fields[16].to_i
110         record[:inc]                   = fields[17].to_i
111         record[:description_of_target] = fields[18]
112
113         if block_given?
114           yield record
115         else
116           records << record
117         end
118       end
119     end
120
121     return records if block_given?
122   end
123
124   private
125
126   # Method to execute a command using a system() call.
127   # The command is composed of bits from the @command variable.
128   def execute
129     @command.unshift "nice -n 19"
130     @command << "> /dev/null 2>&1" unless @options[:verbose]
131
132     command = @command.join(" ")
133     $stderr.puts "Running command: #{command}" if @options[:verbose]
134     system(command)
135     raise "Command failed: #{command}" unless $?.success?
136
137     @command = []
138   end
139 end
140
141 casts = []
142 casts << {:long=>'database', :short=>'d', :type=>'file!', :mandatory=>true,  :default=>nil, :allowed=>nil, :disallowed=>nil}
143 casts << {:long=>'cpus',     :short=>'c', :type=>'uint',  :mandatory=>false, :default=>1,   :allowed=>nil, :disallowed=>"0"}
144
145 options     = Biopieces.options_parse(ARGV, casts)
146 tmpdir      = Biopieces.mktmpdir
147 output_file = File.join(tmpdir, "output.tab")
148 query_file  = File.join(tmpdir, "query.fna")
149
150 Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
151   Fasta.open(query_file, "w") do |fasta_io|
152     input.each_record do |record|
153       if record[:SEQ_NAME] and record[:SEQ]
154         entry = Seq.new_bp(record)
155
156         fasta_io.puts entry.to_fasta
157       end
158
159       output.puts record
160     end
161   end
162
163   hm = Hmmer.new(output_file, options)
164   hm.search_db(query_file)
165
166   hm.each do |hit|
167     output.puts hit.to_bp
168   end
169 end
170
171 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
172
173
174 __END__