1 # Copyright (C) 2007-2011 Martin A. Hansen.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # http://www.gnu.org/copyleft/gpl.html
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21 # This software is part of the Biopieces framework (www.biopieces.org).
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 require 'maasha/locator'
27 require 'maasha/filesys'
30 # Error class for all exceptions to do with EMBL.
31 class EMBLError < StandardError; end
39 # Iterator method for parsing EMBL entries.
40 def each(hash_keys = nil, hash_feats = nil, hash_quals = nil)
41 while @entry = get_entry do
42 keys = get_keys(hash_keys)
45 features = EMBLFeatures.new(@entry, hash_feats, hash_quals)
47 features.each do |record|
48 keys.each_pair { |key,val| record[key] = val }
50 loc = Locator.new(record[:LOCATOR], seq)
51 record[:SEQ] = loc.subseq.seq
52 record[:SEQ_LEN] = loc.subseq.length
53 record[:STRAND] = loc.strand
54 record[:S_BEG] = loc.s_beg
55 record[:S_END] = loc.s_end
64 # Method to get the next Genbank entry form an ios and return this.
66 block = @io.gets("//" + $/)
67 return nil if block.nil?
69 block.chomp!("//" + $/ )
71 entry = block.tr("\r", "\n").split $/
73 return nil if entry.empty?
78 # Method to get the DNA sequence from an EMBL entry and return
79 # this as a Seq object.
84 while @entry[j] and @entry[j] !~ /^[A-Z]/
88 seq = @entry[j + 1 .. i].join.delete(" 0123456789")
90 Seq.new(seq: seq, type: :dna) if seq
93 # Method to get the base keys from EMBL entry and return these
95 def get_keys(hash_keys)
101 key, val = @entry[i].split(/\s+/, 2)
104 if want_key?(hash_keys, key)
108 next_key, next_val = @entry[j].split(/\s+/, 2)
110 val << " " + next_val
118 keys[key.to_sym] << " " + val
120 keys[key.to_sym] = val
125 j > i ? i = j : i += 1
131 def want_key?(hash_keys, key)
133 if hash_keys[key.to_sym]
145 def initialize(entry, hash_feats, hash_quals)
147 @hash_feats = hash_feats
148 @hash_quals = hash_quals
154 while @entry[@i] and @entry[@i][0 ... 2] != "SQ"
155 if @entry[@i] =~ /^FT\s{3}([53'A-Za-z_-]+)/
159 key, feat, loc = @entry[@i].split(/\s+/, 3)
163 while @entry[@j] and @entry[@j][0 ... 2] == "FT" and @entry[@j] !~ /^FT(\s{19}\/|\s{3}[53'A-Za-z_-])/
164 loc << @entry[@j].split(/\s+/, 2).last
168 get_quals.each_pair { |k,v|
169 record[k.upcase.to_sym] = v
172 record[:FEATURE] = feat
173 record[:LOCATOR] = loc
179 @j > @i ? @i = @j : @i += 1
189 while @entry[@j] and @entry[@j][0 ... 2] == "FT" and @entry[@j] !~ /^FT\s{3}[53'A-Za-z_-]/
190 if @entry[@j] =~ /^FT\s{19}\/([^=]+)="([^"]+)/
197 while @entry[k] and @entry[k][0 ... 2] == "FT" and @entry[k] !~ /^FT(\s{19}\/|\s{3}[53'A-Za-z_-])/
198 val << @entry[k].split(/\s+/, 2).last.chomp('"')
203 quals[qual] << ";" + val
210 k > @j ? @j = k : @j += 1
218 if @hash_feats[feat.upcase.to_sym]
230 if @hash_quals[qual.upcase.to_sym]