1 # Copyright (C) 2007-2011 Martin A. Hansen.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # http://www.gnu.org/copyleft/gpl.html
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21 # This software is part of the Biopieces framework (www.biopieces.org).
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
27 # Error class for all exceptions to do with Genbank/EMBL/DDBJ feature table locators.
28 class LocatorError < StandardError; end
30 # Class that handles Genbank/EMBL/DDBJ feature table locators.
32 attr_accessor :locator, :seq, :subseq
34 # Method to initialize a locator object.
35 def initialize(locator, seq)
38 @subseq = Seq.new(nil, "", "dna")
39 parse_locator(locator)
42 # Method that returns the strand (+ or - ) of the locator.
44 if @locator.match("complement")
51 # Method that returns the begin position of the locator (0-based).
53 if @locator =~ /(\d+)/
58 # Method that returns the end position of the locator (0-based).
60 if @locator.reverse =~ /(\d+)/
61 return $1.reverse.to_i - 1
67 # Method that uses recursion to parse a locator string from a feature table
68 # and fetches the appropriate subsequence. The operators join(), complement(),
69 # and order() are handled. The locator string is broken into a comma separated
70 # lists, and modified if the parens donnot balance. Otherwise the comma
71 # separated list of ranges are stripped from operators, and the subsequence
72 # are fetched and handled according to the operators. SNP locators are also
73 # dealt with (single positions).
74 def parse_locator(locator, join = nil, comp = nil, order = nil)
75 intervals = locator.split(",")
77 unless balance_parens?(intervals.first) # locator includes a join/comp/order of several ranges
82 when /^complement\((.*)\)$/
85 when /^order\((.*)\)$/
90 parse_locator(locator, join, comp, order)
92 intervals.each do |interval|
97 parse_locator(locator, join, comp, order)
98 when /^complement\((.*)\)$/
101 parse_locator(locator, join, comp, order)
102 when /^order\((.*)\)$/
105 parse_locator(locator, join, comp, order)
106 when /^[<>]?(\d+)[^\d]+(\d+)$/
107 int_beg = $1.to_i - 1
108 int_end = $2.to_i - 1
110 newseq = Seq.new(nil, @seq.seq[int_beg..int_end], "dna")
112 unless newseq.seq.nil?
113 newseq.revcomp if comp
115 @subseq.seq << (order ? " " + newseq.seq : newseq.seq)
120 newseq = Seq.new(nil, @seq.seq[pos], "dna")
122 unless newseq.seq.nil?
123 newseq.revcomp if comp
125 @subseq.seq << (order ? " " + newseq.seq : newseq.seq)
128 $stderr.puts "WARNING: Could not match locator -> #{locator}";
137 # Method that checks if the parans in a locater string balances.
138 def balance_parens?(locator)
141 locator.each_char do |char|
143 when '(' then parens += 1
144 when ')' then parens -= 1