1 # Copyright (C) 2007-2011 Martin A. Hansen.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # http://www.gnu.org/copyleft/gpl.html
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21 # This software is part of the Biopieces framework (www.biopieces.org).
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 require 'maasha/locator'
27 require 'maasha/filesys'
30 # Error class for all exceptions to do with Genbank.
31 class GenbankError < StandardError; end
33 class Genbank < Filesys
39 # Iterator method for parsing Genbank entries.
40 def each(hash_keys = nil, hash_feats = nil, hash_quals = nil)
41 while @entry = get_entry do
42 keys = get_keys(hash_keys)
45 features = GenbankFeatures.new(@entry, hash_feats, hash_quals)
47 features.each do |record|
48 keys.each_pair { |key,val| record[key] = val }
50 loc = Locator.new(record[:LOCATOR], seq)
51 record[:SEQ] = loc.subseq.seq
52 record[:SEQ_LEN] = loc.subseq.length
53 record[:STRAND] = loc.strand
54 record[:S_BEG] = loc.s_beg
55 record[:S_END] = loc.s_end
64 # Method to get the next Genbank entry form an ios and return this.
66 block = @io.gets("//" + $/)
67 return nil if block.nil?
69 block.chomp!("//" + $/ )
71 entry = block.tr("\r", "\n").split $/
73 return nil if entry.empty?
78 # Method to get the DNA sequence from a Genbank entry and return
79 # this as a Seq object.
84 while @entry[j] and @entry[j] =~ /^\s+\d/
88 seq = @entry[j + 1 .. i].join.delete(" 0123456789")
90 Seq.new(seq: seq, type: :dna) if seq
93 # Method to get the base keys from Genbank entry and return these
95 def get_keys(hash_keys)
100 while @entry[i] and @entry[i] !~ /^FEATURES/
101 if @entry[i] =~ /^\s{0,3}([A-Z]{2})/
102 if want_key?(hash_keys, $1)
105 key, val = @entry[i].lstrip.split(/\s+/, 2)
107 while @entry[j] and @entry[j] !~ /^\s{0,3}[A-Z]/
108 val << @entry[j].lstrip
113 keys[key.to_sym] << ";" + val
115 keys[key.to_sym] = val
120 j > i ? i = j : i += 1
126 def want_key?(hash_keys, key)
128 if hash_keys[key.to_sym]
139 class GenbankFeatures
140 def initialize(entry, hash_feats, hash_quals)
142 @hash_feats = hash_feats
143 @hash_quals = hash_quals
149 while @entry[@i] and @entry[@i] !~ /^ORIGIN/
150 if @entry[@i] =~ /^\s{5}([53'A-Za-z_-]+)/
154 feat, loc = @entry[@i].lstrip.split(/\s+/, 2)
158 while @entry[@j] and @entry[@j] !~ /^(\s{21}\/|\s{5}[53'A-Za-z_-]|[A-Z])/
159 loc << @entry[@j].lstrip
163 get_quals.each_pair { |k,v|
164 record[k.upcase.to_sym] = v
167 record[:FEATURE] = feat
168 record[:LOCATOR] = loc
174 @j > @i ? @i = @j : @i += 1
184 while @entry[@j] and @entry[@j] !~ /^\s{5}[53'A-Za-z_-]|^[A-Z]/
185 if @entry[@j] =~ /^\s{21}\/([^=]+)="([^"]+)/
192 while @entry[k] and @entry[k] !~ /^(\s{21}\/|\s{5}[53'A-Za-z_-]|[A-Z])/
193 val << @entry[k].lstrip.chomp('"')
198 quals[qual] << ";" + val
205 k > @j ? @j = k : @j += 1
213 if @hash_feats[feat.upcase.to_sym]
225 if @hash_quals[qual.upcase.to_sym]