-# Copyright (C) 2007-2010 Martin A. Hansen.
+# Copyright (C) 2007-2011 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+require 'maasha/locator'
require 'maasha/seq'
require 'maasha/filesys'
require 'pp'
block.chomp!("//" + $/ )
- entry = block.split $/
+ entry = block.tr("\r", "\n").split $/
return nil if entry.empty?
i = @entry.size
j = i - 1
- while @entry[j] and @entry[j] !~ /^[A-Z]/
+ while @entry[j] and @entry[j] =~ /^\s+\d/
j -= 1
end
seq = @entry[j + 1 .. i].join.delete(" 0123456789")
- Seq.new(nil, seq, "dna") if seq
+ Seq.new(nil, seq, :dna) if seq
end
# Method to get the base keys from Genbank entry and return these
j += 1
end
- if keys.has_key? key.to_sym
+ if keys[key.to_sym]
keys[key.to_sym] << ";" + val
else
keys[key.to_sym] = val
def each
while @entry[@i] and @entry[@i] !~ /^ORIGIN/
- if @entry[@i] =~ /^\s{5}([A-Za-z_-]+)/
+ if @entry[@i] =~ /^\s{5}([53'A-Za-z_-]+)/
if want_feat? $1
record = {}
@j = @i + 1
- while @entry[@j] and @entry[@j] !~ /^(\s{21}\/|\s{5}[A-Za-z_-]|[A-Z])/
+ while @entry[@j] and @entry[@j] !~ /^(\s{21}\/|\s{5}[53'A-Za-z_-]|[A-Z])/
loc << @entry[@j].lstrip
@j += 1
end
quals = {}
k = 0
- while @entry[@j] and @entry[@j] !~ /^\s{5}[A-Za-z_-]|^[A-Z]/
+ while @entry[@j] and @entry[@j] !~ /^\s{5}[53'A-Za-z_-]|^[A-Z]/
if @entry[@j] =~ /^\s{21}\/([^=]+)="([^"]+)/
qual = $1
val = $2
if want_qual? qual
k = @j + 1
- while @entry[k] and @entry[k] !~ /^(\s{21}\/|\s{5}[A-Za-z_-]|[A-Z])/
+ while @entry[k] and @entry[k] !~ /^(\s{21}\/|\s{5}[53'A-Za-z_-]|[A-Z])/
val << @entry[k].lstrip.chomp('"')
k += 1
end
end
end
-
-# Error class for all exceptions to do with Genbank/EMBL/DDBJ feature table locators.
-class LocatorError < StandardError; end
-
-class Locator
- attr_accessor :locator, :seq, :subseq
-
- def initialize(locator, seq)
- @locator = locator
- @seq = seq
- @subseq = Seq.new(nil, "", "dna")
- parse_locator(locator)
- end
-
- def strand
- if @locator.match("complement")
- return "-"
- else
- return "+"
- end
- end
-
- def s_beg
- if @locator =~ /(\d+)/
- return $1.to_i - 1
- end
- end
-
- def s_end
- if @locator.reverse =~ /(\d+)/
- return $1.reverse.to_i - 1
- end
- end
-
- private
-
- # Method that uses recursion to parse a locator string from a feature
- # table and fetches the appropriate subsequence. the operators
- # join(), complement(), and order() are handled.
- # the locator string is broken into a comma separated lists, and
- # modified if the parens donnot balance. otherwise the comma separated
- # list of ranges are stripped from operators, and the subsequence are
- # fetched and handled according to the operators.
- # SNP locators are also dealt with (single positions).
- def parse_locator(locator, join = nil, comp = nil, order = nil)
- intervals = locator.split(",")
-
- unless balance_parens?(intervals.first) # locator includes a join/comp/order of several ranges
- case locator
- when /^join\((.*)\)$/
- locator = $1
- join = true
- when /^complement\((.*)\)$/
- locator = $1
- comp = true
- when /^order\((.*)\)$/
- locator = $1
- order = true
- end
-
- parse_locator(locator, join, comp, order)
- else
- intervals.each do |interval|
- case interval
- when /^join\((.*)\)$/
- locator = $1
- join = true
- parse_locator(locator, join, comp, order)
- when /^complement\((.*)\)$/
- locator = $1
- comp = true
- parse_locator(locator, join, comp, order)
- when /^order\((.*)\)$/
- locator = $1
- order = true
- parse_locator(locator, join, comp, order)
- when /^[<>]?(\d+)[^\d]+(\d+)$/
- int_beg = $1.to_i - 1
- int_end = $2.to_i - 1
-
- newseq = Seq.new(nil, @seq.seq[int_beg...int_end], "dna")
-
- unless newseq.seq.nil?
- newseq.revcomp if comp
-
- @subseq.seq << (order ? " " + newseq.seq : newseq.seq)
- end
- when /^(\d+)$/
- pos = $1.to_i - 1
-
- newseq = Seq.new(nil, @seq.seq[pos], "dna")
-
- unless newseq.seq.nil?
- newseq.revcomp if comp
-
- @subseq.seq << (order ? " " + newseq.seq : newseq.seq)
- end
- else
- $stderr.puts "WARNING: Could not match locator -> #{locator}";
- @subseq.seq << ""
- end
- end
- end
-
- return @subseq
- end
-
- def balance_parens?(locator)
- parens = 0
-
- locator.each_char do |char|
- case char
- when '(' then parens += 1
- when ')' then parens -= 1
- end
- end
-
- if parens == 0
- return true
- else
- return false
- end
- end
-end
-
-
__END__