From: martinahansen Date: Mon, 12 Dec 2011 11:17:20 +0000 (+0000) Subject: added embl.rb X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=670f77776e42b5d4527b4cc8706bb8fbcfb015f2;p=biopieces.git added embl.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1702 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/embl.rb b/code_ruby/lib/maasha/embl.rb new file mode 100644 index 0000000..d3532ae --- /dev/null +++ b/code_ruby/lib/maasha/embl.rb @@ -0,0 +1,241 @@ +# Copyright (C) 2007-2011 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'maasha/locator' +require 'maasha/seq' +require 'maasha/filesys' +require 'pp' + +# Error class for all exceptions to do with EMBL. +class EMBLError < StandardError; end + +class EMBL < Filesys + def initialize(io) + @io = io + @entry = [] + end + + # Iterator method for parsing EMBL entries. + def each(hash_keys = nil, hash_feats = nil, hash_quals = nil) + while @entry = get_entry do + keys = get_keys(hash_keys) + seq = get_seq + + features = EMBLFeatures.new(@entry, hash_feats, hash_quals) + + features.each do |record| + keys.each_pair { |key,val| record[key] = val } + + loc = Locator.new(record[:LOCATOR], seq) + record[:SEQ] = loc.subseq.seq + record[:SEQ_LEN] = loc.subseq.length + record[:STRAND] = loc.strand + record[:S_BEG] = loc.s_beg + record[:S_END] = loc.s_end + + yield record + end + end + end + + private + + # Method to get the next Genbank entry form an ios and return this. + def get_entry + block = @io.gets("//" + $/) + return nil if block.nil? + + block.chomp!("//" + $/ ) + + entry = block.split $/ + + return nil if entry.empty? + + entry + end + + # Method to get the DNA sequence from an EMBL entry and return + # this as a Seq object. + def get_seq + i = @entry.size + j = i - 1 + + while @entry[j] and @entry[j] !~ /^[A-Z]/ + j -= 1 + end + + seq = @entry[j + 1 .. i].join.delete(" 0123456789") + + Seq.new(nil, seq, "dna") if seq + end + + # Method to get the base keys from EMBL entry and return these + # in a hash. + def get_keys(hash_keys) + keys = {} + i = 0 + j = 0 + + while @entry[i] + key, val = @entry[i].split(/\s+/, 2) + break if key == "FH" + unless key == "XX" + if want_key?(hash_keys, key) + j = i + 1 + + while @entry[j] + next_key, next_val = @entry[j].split(/\s+/, 2) + if key == next_key + val << " " + next_val + j += 1 + else + break + end + end + + if keys.has_key? key.to_sym + keys[key.to_sym] << " " + val + else + keys[key.to_sym] = val + end + end + end + + j > i ? i = j : i += 1 + end + + keys + end + + def want_key?(hash_keys, key) + if hash_keys + if hash_keys[key.to_sym] + return true + else + return false + end + else + return true + end + end +end + +class EMBLFeatures + def initialize(entry, hash_feats, hash_quals) + @entry = entry + @hash_feats = hash_feats + @hash_quals = hash_quals + @i = 0 + @j = 0 + end + + def each + while @entry[@i] and @entry[@i][0 ... 2] != "SQ" + if @entry[@i] =~ /^FT\s{3}([A-Za-z_-]+)/ + if want_feat? $1 + record = {} + + key, feat, loc = @entry[@i].split(/\s+/, 3) + + @j = @i + 1 + + while @entry[@j] and @entry[@j][0 ... 2] == "FT" and @entry[@j] !~ /^FT(\s{19}\/|\s{3}[A-Za-z_-])/ + loc << @entry[@j].split(/\s+/, 2).last + @j += 1 + end + + get_quals.each_pair { |k,v| + record[k.upcase.to_sym] = v + } + + record[:FEATURE] = feat + record[:LOCATOR] = loc + + yield record + end + end + + @j > @i ? @i = @j : @i += 1 + end + end + + private + + def get_quals + quals = {} + k = 0 + + while @entry[@j] and @entry[@j][0 ... 2] == "FT" and @entry[@j] !~ /^FT\s{3}[A-Za-z_-]/ + if @entry[@j] =~ /^FT\s{19}\/([^=]+)="([^"]+)/ + qual = $1 + val = $2 + + if want_qual? qual + k = @j + 1 + + while @entry[k] and @entry[k][0 ... 2] == "FT" and @entry[k] !~ /^FT(\s{19}\/|\s{3}[A-Za-z_-])/ + val << @entry[k].split(/\s+/, 2).last.chomp('"') + k += 1 + end + + if quals[qual] + quals[qual] << ";" + val + else + quals[qual] = val + end + end + end + + k > @j ? @j = k : @j += 1 + end + + quals + end + + def want_feat?(feat) + if @hash_feats + if @hash_feats[feat.upcase.to_sym] + return true + else + return false + end + else + return true + end + end + + def want_qual?(qual) + if @hash_quals + if @hash_quals[qual.upcase.to_sym] + return true + else + return false + end + else + return true + end + end +end + +__END__