From: martinahansen Date: Mon, 12 Dec 2011 08:18:01 +0000 (+0000) Subject: split off locator stuff from genbank.rb to locator.rb X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=471889816720cc10e68f9b9a31a5fc9b6732a23b;p=biopieces.git split off locator stuff from genbank.rb to locator.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1701 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/genbank.rb b/code_ruby/lib/maasha/genbank.rb index 3dd202a..d444b4b 100644 --- a/code_ruby/lib/maasha/genbank.rb +++ b/code_ruby/lib/maasha/genbank.rb @@ -1,4 +1,4 @@ -# Copyright (C) 2007-2010 Martin A. Hansen. +# Copyright (C) 2007-2011 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -22,6 +22,7 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +require 'maasha/locator' require 'maasha/seq' require 'maasha/filesys' require 'pp' @@ -232,130 +233,4 @@ class GenbankFeatures end end - -# Error class for all exceptions to do with Genbank/EMBL/DDBJ feature table locators. -class LocatorError < StandardError; end - -class Locator - attr_accessor :locator, :seq, :subseq - - def initialize(locator, seq) - @locator = locator - @seq = seq - @subseq = Seq.new(nil, "", "dna") - parse_locator(locator) - end - - def strand - if @locator.match("complement") - return "-" - else - return "+" - end - end - - def s_beg - if @locator =~ /(\d+)/ - return $1.to_i - 1 - end - end - - def s_end - if @locator.reverse =~ /(\d+)/ - return $1.reverse.to_i - 1 - end - end - - private - - # Method that uses recursion to parse a locator string from a feature - # table and fetches the appropriate subsequence. the operators - # join(), complement(), and order() are handled. - # the locator string is broken into a comma separated lists, and - # modified if the parens donnot balance. otherwise the comma separated - # list of ranges are stripped from operators, and the subsequence are - # fetched and handled according to the operators. - # SNP locators are also dealt with (single positions). - def parse_locator(locator, join = nil, comp = nil, order = nil) - intervals = locator.split(",") - - unless balance_parens?(intervals.first) # locator includes a join/comp/order of several ranges - case locator - when /^join\((.*)\)$/ - locator = $1 - join = true - when /^complement\((.*)\)$/ - locator = $1 - comp = true - when /^order\((.*)\)$/ - locator = $1 - order = true - end - - parse_locator(locator, join, comp, order) - else - intervals.each do |interval| - case interval - when /^join\((.*)\)$/ - locator = $1 - join = true - parse_locator(locator, join, comp, order) - when /^complement\((.*)\)$/ - locator = $1 - comp = true - parse_locator(locator, join, comp, order) - when /^order\((.*)\)$/ - locator = $1 - order = true - parse_locator(locator, join, comp, order) - when /^[<>]?(\d+)[^\d]+(\d+)$/ - int_beg = $1.to_i - 1 - int_end = $2.to_i - 1 - - newseq = Seq.new(nil, @seq.seq[int_beg...int_end], "dna") - - unless newseq.seq.nil? - newseq.revcomp if comp - - @subseq.seq << (order ? " " + newseq.seq : newseq.seq) - end - when /^(\d+)$/ - pos = $1.to_i - 1 - - newseq = Seq.new(nil, @seq.seq[pos], "dna") - - unless newseq.seq.nil? - newseq.revcomp if comp - - @subseq.seq << (order ? " " + newseq.seq : newseq.seq) - end - else - $stderr.puts "WARNING: Could not match locator -> #{locator}"; - @subseq.seq << "" - end - end - end - - return @subseq - end - - def balance_parens?(locator) - parens = 0 - - locator.each_char do |char| - case char - when '(' then parens += 1 - when ')' then parens -= 1 - end - end - - if parens == 0 - return true - else - return false - end - end -end - - __END__ diff --git a/code_ruby/lib/maasha/locator.rb b/code_ruby/lib/maasha/locator.rb new file mode 100644 index 0000000..a2a0db0 --- /dev/null +++ b/code_ruby/lib/maasha/locator.rb @@ -0,0 +1,158 @@ +# Copyright (C) 2007-2011 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'maasha/seq' + +# Error class for all exceptions to do with Genbank/EMBL/DDBJ feature table locators. +class LocatorError < StandardError; end + +# Class that handles Genbank/EMBL/DDBJ feature table locators. +class Locator + attr_accessor :locator, :seq, :subseq + + # Method to initialize a locator object. + def initialize(locator, seq) + @locator = locator + @seq = seq + @subseq = Seq.new(nil, "", "dna") + parse_locator(locator) + end + + # Method that returns the strand (+ or - ) of the locator. + def strand + if @locator.match("complement") + return "-" + else + return "+" + end + end + + # Method that returns the begin position of the locator (0-based). + def s_beg + if @locator =~ /(\d+)/ + return $1.to_i - 1 + end + end + + # Method that returns the end position of the locator (0-based). + def s_end + if @locator.reverse =~ /(\d+)/ + return $1.reverse.to_i - 1 + end + end + + private + + # Method that uses recursion to parse a locator string from a feature table + # and fetches the appropriate subsequence. The operators join(), complement(), + # and order() are handled. The locator string is broken into a comma separated + # lists, and modified if the parens donnot balance. Otherwise the comma + # separated list of ranges are stripped from operators, and the subsequence + # are fetched and handled according to the operators. SNP locators are also + # dealt with (single positions). + def parse_locator(locator, join = nil, comp = nil, order = nil) + intervals = locator.split(",") + + unless balance_parens?(intervals.first) # locator includes a join/comp/order of several ranges + case locator + when /^join\((.*)\)$/ + locator = $1 + join = true + when /^complement\((.*)\)$/ + locator = $1 + comp = true + when /^order\((.*)\)$/ + locator = $1 + order = true + end + + parse_locator(locator, join, comp, order) + else + intervals.each do |interval| + case interval + when /^join\((.*)\)$/ + locator = $1 + join = true + parse_locator(locator, join, comp, order) + when /^complement\((.*)\)$/ + locator = $1 + comp = true + parse_locator(locator, join, comp, order) + when /^order\((.*)\)$/ + locator = $1 + order = true + parse_locator(locator, join, comp, order) + when /^[<>]?(\d+)[^\d]+(\d+)$/ + int_beg = $1.to_i - 1 + int_end = $2.to_i - 1 + + newseq = Seq.new(nil, @seq.seq[int_beg...int_end], "dna") + + unless newseq.seq.nil? + newseq.revcomp if comp + + @subseq.seq << (order ? " " + newseq.seq : newseq.seq) + end + when /^(\d+)$/ + pos = $1.to_i - 1 + + newseq = Seq.new(nil, @seq.seq[pos], "dna") + + unless newseq.seq.nil? + newseq.revcomp if comp + + @subseq.seq << (order ? " " + newseq.seq : newseq.seq) + end + else + $stderr.puts "WARNING: Could not match locator -> #{locator}"; + @subseq.seq << "" + end + end + end + + return @subseq + end + + # Method that checks if the parans in a locater string balances. + def balance_parens?(locator) + parens = 0 + + locator.each_char do |char| + case char + when '(' then parens += 1 + when ')' then parens -= 1 + end + end + + if parens == 0 + return true + else + return false + end + end +end + + +__END__ + diff --git a/code_ruby/test/maasha/test_genbank.rb b/code_ruby/test/maasha/test_genbank.rb deleted file mode 100755 index 8f36440..0000000 --- a/code_ruby/test/maasha/test_genbank.rb +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env ruby -$:.unshift File.join(File.dirname(__FILE__),'..','lib') - -# Copyright (C) 2007-2010 Martin A. Hansen. - -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -# http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# This software is part of the Biopieces framework (www.biopieces.org). - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -require 'maasha/genbank' -require 'maasha/seq' -require 'test/unit' -require 'pp' - -class TestGenbank < Test::Unit::TestCase - def setup - @seq = Seq.new(nil, "tcatgatcaagatctaacagcagaagtacacttctattta", "dna") - end - - def test_Locator_with_single_position_returns_correctly - loc = Locator.new("10", @seq) - assert_equal("a", loc.subseq.seq) - end - - def test_Locator_with_single_interval_returns_correctly - loc = Locator.new("5..10", @seq) - assert_equal("gatca", loc.subseq.seq) - end - - def test_Locator_with_multiple_intervals_return_correctly - loc = Locator.new("5..10,15..20", @seq) - assert_equal("gatcataaca", loc.subseq.seq) - end - - def test_Locator_with_join_multiple_intervals_return_correctly - loc = Locator.new("join(5..10,15..20)", @seq) - assert_equal("gatcataaca", loc.subseq.seq) - end - - def test_Locator_with_complement_and_single_interval_return_correctly - loc = Locator.new("complement(5..10)", @seq) - assert_equal("tgatc", loc.subseq.seq) - end -end - - -__END__ diff --git a/code_ruby/test/maasha/test_locator.rb b/code_ruby/test/maasha/test_locator.rb new file mode 100755 index 0000000..2340578 --- /dev/null +++ b/code_ruby/test/maasha/test_locator.rb @@ -0,0 +1,65 @@ +#!/usr/bin/env ruby +$:.unshift File.join(File.dirname(__FILE__),'..','lib') + +# Copyright (C) 2007-2011 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'maasha/seq' +require 'maasha/locator' +require 'test/unit' +require 'pp' + +class TestLocator < Test::Unit::TestCase + def setup + @seq = Seq.new(nil, "tcatgatcaagatctaacagcagaagtacacttctattta", "dna") + end + + def test_Locator_with_single_position_returns_correctly + loc = Locator.new("10", @seq) + assert_equal("a", loc.subseq.seq) + end + + def test_Locator_with_single_interval_returns_correctly + loc = Locator.new("5..10", @seq) + assert_equal("gatca", loc.subseq.seq) + end + + def test_Locator_with_multiple_intervals_return_correctly + loc = Locator.new("5..10,15..20", @seq) + assert_equal("gatcataaca", loc.subseq.seq) + end + + def test_Locator_with_join_multiple_intervals_return_correctly + loc = Locator.new("join(5..10,15..20)", @seq) + assert_equal("gatcataaca", loc.subseq.seq) + end + + def test_Locator_with_complement_and_single_interval_return_correctly + loc = Locator.new("complement(5..10)", @seq) + assert_equal("tgatc", loc.subseq.seq) + end +end + + +__END__