]> git.donarmstrong.com Git - biopieces.git/blob - code_ruby/lib/maasha/genbank.rb
b1c9f8734a7a7f6dac1df4c7a18a3d59a62e2af1
[biopieces.git] / code_ruby / lib / maasha / genbank.rb
1 # Copyright (C) 2007-2011 Martin A. Hansen.
2
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
7
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16
17 # http://www.gnu.org/copyleft/gpl.html
18
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
20
21 # This software is part of the Biopieces framework (www.biopieces.org).
22
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
24
25 require 'maasha/locator'
26 require 'maasha/seq'
27 require 'maasha/filesys'
28 require 'pp'
29
30 # Error class for all exceptions to do with Genbank.
31 class GenbankError < StandardError; end
32
33 class Genbank < Filesys
34   def initialize(io)
35     @io      = io
36     @entry   = []
37   end
38
39   # Iterator method for parsing Genbank entries.
40   def each(hash_keys = nil, hash_feats = nil, hash_quals = nil)
41     while @entry = get_entry do
42       keys = get_keys(hash_keys)
43       seq  = get_seq
44
45       features = GenbankFeatures.new(@entry, hash_feats, hash_quals)
46
47       features.each do |record|
48         keys.each_pair { |key,val| record[key] = val }
49
50                                 loc = Locator.new(record[:LOCATOR], seq)
51                                 record[:SEQ]     = loc.subseq.seq
52         record[:SEQ_LEN] = loc.subseq.length
53                                 record[:STRAND]  = loc.strand
54                                 record[:S_BEG]   = loc.s_beg
55                                 record[:S_END]   = loc.s_end
56
57         yield record
58       end
59     end
60   end
61
62   private
63
64   # Method to get the next Genbank entry form an ios and return this.
65   def get_entry
66     block = @io.gets("//" + $/)
67     return nil if block.nil?
68
69     block.chomp!("//" + $/ )
70
71     entry = block.tr("\r", "\n").split $/
72
73     return nil if entry.empty?
74
75     entry
76   end
77
78   # Method to get the DNA sequence from a Genbank entry and return
79   # this as a Seq object.
80   def get_seq
81     i = @entry.size
82     j = i - 1
83
84     while @entry[j] and @entry[j] =~ /^\s+\d/
85       j -= 1
86     end
87
88     seq = @entry[j + 1 .. i].join.delete(" 0123456789")
89
90     Seq.new(nil, seq, :dna) if seq
91   end
92
93   # Method to get the base keys from Genbank entry and return these
94   # in a hash.
95   def get_keys(hash_keys)
96     keys = {}
97     i    = 0
98     j    = 0
99
100     while @entry[i] and @entry[i] !~ /^FEATURES/
101       if @entry[i] =~ /^\s{0,3}([A-Z]{2})/
102         if want_key?(hash_keys, $1)
103           j = i + 1
104
105           key, val = @entry[i].lstrip.split(/\s+/, 2)
106
107           while @entry[j] and @entry[j] !~ /^\s{0,3}[A-Z]/
108             val << @entry[j].lstrip
109             j += 1
110           end
111
112           if keys[key.to_sym]
113             keys[key.to_sym] << ";" + val
114           else
115             keys[key.to_sym] = val
116           end
117         end
118       end
119
120       j > i ? i = j : i += 1
121     end
122
123     keys
124   end
125
126   def want_key?(hash_keys, key)
127     if hash_keys
128       if hash_keys[key.to_sym]
129         return true
130       else
131         return false
132       end
133     else
134       return true
135     end
136   end
137 end
138
139 class GenbankFeatures
140   def initialize(entry, hash_feats, hash_quals)
141     @entry      = entry
142     @hash_feats = hash_feats
143     @hash_quals = hash_quals
144         @i          = 0
145         @j          = 0
146   end
147
148   def each
149     while @entry[@i] and @entry[@i] !~ /^ORIGIN/
150       if @entry[@i] =~ /^\s{5}([A-Za-z_-]+)/
151         if want_feat? $1
152           record = {}
153
154           feat, loc = @entry[@i].lstrip.split(/\s+/, 2)
155
156           @j = @i + 1
157
158           while @entry[@j] and @entry[@j] !~ /^(\s{21}\/|\s{5}[A-Za-z_-]|[A-Z])/
159             loc << @entry[@j].lstrip
160             @j += 1
161           end
162
163           get_quals.each_pair { |k,v|
164             record[k.upcase.to_sym] = v
165           }
166
167           record[:FEATURE] = feat
168           record[:LOCATOR] = loc
169
170           yield record
171         end
172       end
173
174       @j > @i ? @i = @j : @i += 1
175     end
176   end
177
178   private
179
180   def get_quals
181     quals = {}
182     k     = 0
183
184     while @entry[@j] and @entry[@j] !~ /^\s{5}[A-Za-z_-]|^[A-Z]/
185       if @entry[@j] =~ /^\s{21}\/([^=]+)="([^"]+)/
186         qual = $1
187         val  = $2
188
189         if want_qual? qual
190           k = @j + 1
191
192           while @entry[k] and @entry[k] !~ /^(\s{21}\/|\s{5}[A-Za-z_-]|[A-Z])/
193             val << @entry[k].lstrip.chomp('"')
194             k += 1
195           end
196
197           if quals[qual]
198             quals[qual] << ";" + val
199           else
200             quals[qual] = val
201           end
202         end
203       end
204
205       k > @j ? @j = k : @j += 1
206     end
207
208     quals
209   end
210
211   def want_feat?(feat)
212     if @hash_feats
213       if @hash_feats[feat.upcase.to_sym]
214         return true
215       else
216         return false
217       end
218     else
219       return true
220     end
221   end
222
223   def want_qual?(qual)
224     if @hash_quals
225       if @hash_quals[qual.upcase.to_sym]
226         return true
227       else
228         return false
229       end
230     else
231       return true
232     end
233   end
234 end
235
236 __END__