]> git.donarmstrong.com Git - biopieces.git/commitdiff
added embl.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 12 Dec 2011 11:17:20 +0000 (11:17 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 12 Dec 2011 11:17:20 +0000 (11:17 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1702 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/embl.rb [new file with mode: 0644]

diff --git a/code_ruby/lib/maasha/embl.rb b/code_ruby/lib/maasha/embl.rb
new file mode 100644 (file)
index 0000000..d3532ae
--- /dev/null
@@ -0,0 +1,241 @@
+# Copyright (C) 2007-2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+require 'maasha/locator'
+require 'maasha/seq'
+require 'maasha/filesys'
+require 'pp'
+
+# Error class for all exceptions to do with EMBL.
+class EMBLError < StandardError; end
+
+class EMBL < Filesys
+  def initialize(io)
+    @io      = io
+    @entry   = []
+  end
+
+  # Iterator method for parsing EMBL entries.
+  def each(hash_keys = nil, hash_feats = nil, hash_quals = nil)
+    while @entry = get_entry do
+      keys = get_keys(hash_keys)
+      seq  = get_seq
+
+      features = EMBLFeatures.new(@entry, hash_feats, hash_quals)
+
+      features.each do |record|
+        keys.each_pair { |key,val| record[key] = val }
+
+                               loc = Locator.new(record[:LOCATOR], seq)
+                               record[:SEQ]     = loc.subseq.seq
+        record[:SEQ_LEN] = loc.subseq.length
+                               record[:STRAND]  = loc.strand
+                               record[:S_BEG]   = loc.s_beg
+                               record[:S_END]   = loc.s_end
+
+        yield record
+      end
+    end
+  end
+
+  private
+
+  # Method to get the next Genbank entry form an ios and return this.
+  def get_entry
+    block = @io.gets("//" + $/)
+    return nil if block.nil?
+
+    block.chomp!("//" + $/ )
+
+    entry = block.split $/
+
+    return nil if entry.empty?
+
+    entry
+  end
+
+  # Method to get the DNA sequence from an EMBL entry and return
+  # this as a Seq object.
+  def get_seq
+    i = @entry.size
+    j = i - 1
+
+    while @entry[j] and @entry[j] !~ /^[A-Z]/
+      j -= 1
+    end
+
+    seq = @entry[j + 1 .. i].join.delete(" 0123456789")
+
+    Seq.new(nil, seq, "dna") if seq
+  end
+
+  # Method to get the base keys from EMBL entry and return these
+  # in a hash.
+  def get_keys(hash_keys)
+    keys = {}
+    i    = 0
+    j    = 0
+
+    while @entry[i]
+      key, val = @entry[i].split(/\s+/, 2)
+      break if key == "FH"
+      unless key == "XX"
+        if want_key?(hash_keys, key)
+          j = i + 1
+
+          while @entry[j]
+            next_key, next_val = @entry[j].split(/\s+/, 2)
+            if key == next_key
+              val << " " + next_val
+              j += 1
+            else
+              break
+            end
+          end
+
+          if keys.has_key? key.to_sym
+            keys[key.to_sym] << " " + val
+          else
+            keys[key.to_sym] = val
+          end
+        end
+      end
+
+      j > i ? i = j : i += 1
+    end
+
+    keys
+  end
+
+  def want_key?(hash_keys, key)
+    if hash_keys
+      if hash_keys[key.to_sym]
+        return true
+      else
+        return false
+      end
+    else
+      return true
+    end
+  end
+end
+
+class EMBLFeatures
+  def initialize(entry, hash_feats, hash_quals)
+    @entry      = entry
+    @hash_feats = hash_feats
+    @hash_quals = hash_quals
+       @i          = 0
+       @j          = 0
+  end
+
+  def each
+    while @entry[@i] and @entry[@i][0 ... 2] != "SQ"
+      if @entry[@i] =~ /^FT\s{3}([A-Za-z_-]+)/
+        if want_feat? $1
+          record = {}
+
+          key, feat, loc = @entry[@i].split(/\s+/, 3)
+
+          @j = @i + 1
+
+          while @entry[@j] and @entry[@j][0 ... 2] == "FT" and @entry[@j] !~ /^FT(\s{19}\/|\s{3}[A-Za-z_-])/
+            loc << @entry[@j].split(/\s+/, 2).last
+            @j += 1
+          end
+
+          get_quals.each_pair { |k,v|
+            record[k.upcase.to_sym] = v
+          }
+
+          record[:FEATURE] = feat
+          record[:LOCATOR] = loc
+
+          yield record
+        end
+      end
+
+      @j > @i ? @i = @j : @i += 1
+    end
+  end
+
+  private
+
+  def get_quals
+    quals = {}
+    k     = 0
+
+    while @entry[@j] and @entry[@j][0 ... 2] == "FT" and @entry[@j] !~ /^FT\s{3}[A-Za-z_-]/
+      if @entry[@j] =~ /^FT\s{19}\/([^=]+)="([^"]+)/
+        qual = $1
+        val  = $2
+
+        if want_qual? qual
+          k = @j + 1
+
+          while @entry[k] and @entry[k][0 ... 2] == "FT" and @entry[k] !~ /^FT(\s{19}\/|\s{3}[A-Za-z_-])/
+            val << @entry[k].split(/\s+/, 2).last.chomp('"')
+            k += 1
+          end
+
+          if quals[qual]
+            quals[qual] << ";" + val
+          else
+            quals[qual] = val
+          end
+        end
+      end
+
+      k > @j ? @j = k : @j += 1
+    end
+
+    quals
+  end
+
+  def want_feat?(feat)
+    if @hash_feats
+      if @hash_feats[feat.upcase.to_sym]
+        return true
+      else
+        return false
+      end
+    else
+      return true
+    end
+  end
+
+  def want_qual?(qual)
+    if @hash_quals
+      if @hash_quals[qual.upcase.to_sym]
+        return true
+      else
+        return false
+      end
+    else
+      return true
+    end
+  end
+end
+
+__END__