--- /dev/null
+require 'getoptlong'
+
+class BP_optparse
+ attr_accessor :options
+
+ def initialize( options = [] )
+ @options = options
+ end
+
+ # Method that adds default options to the option list.
+ def add_default_options
+ @options << { :long => 'help', :short => '?', :type => :flag, :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil }
+ @options << { :long => 'stream_in', :short => 'I', :type => :file!, :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil }
+ @options << { :long => 'stream_out', :short => 'O', :type => :file, :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil }
+ @options << { :long => 'verbose', :short => 'v', :type => :flag, :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil }
+ end
+
+ # Method to check if the option list contains duplicate long options.
+ def check_duplicate_long
+ hash = {}
+
+ @options.each do |opt|
+ raise ArgumentError, "Duplicate long argument" if hash.include?( opt[ :long ] )
+
+ hash[ opt[ :long ] ] = true
+ end
+ end
+
+ # Method to check if the option list contains duplicate short options.
+ def check_duplicate_short
+ hash = {}
+
+ @options.each do |opt|
+ raise ArgumentError, "Duplicate short argument" if hash.include?( opt[ :short ] )
+
+ hash[ opt[ :short ] ] = true
+ end
+ end
+
+ # Method to prepare the option list into the format of GetoptLong.
+ def compile_option_list
+ option_list = []
+
+ @options.each do |opt|
+ if opt[ :type ] == :flag
+ option_list << [ "--#{ opt[ :long ] }", "-#{ opt[ :short ] }", GetoptLong::NO_ARGUMENT ]
+ else
+ option_list << [ "--#{ opt[ :long ] }", "-#{ opt[ :short ] }", GetoptLong::REQUIRED_ARGUMENT ]
+ end
+ end
+
+ option_list
+ end
+
+ def parse_options( option_list )
+ opts = GetoptLong.new()
+ end
+end
+require 'Maasha/lib/seq'
+require 'pp'
+
class Match
attr_accessor :q_beg, :s_beg, :len
+ # Method to initialize a Match with query begin (q_beg) and subject begin (s_beg)
+ # positions and the match length (len).
def initialize( q_beg, s_beg, len )
+ raise ArgumentError, "q_beg must be a positive integer." if q_beg < 0
+ raise ArgumentError, "s_beg must be a positive integer." if s_beg < 0
+ raise ArgumentError, "len must be a positive integer." if len <= 0
@q_beg = q_beg
@s_beg = s_beg
@len = len
end
+ # Method to convert a match to a string.
+ def to_s
+ "q_beg: #{ @q_beg } s_beg: #{ @s_beg } len: #{ len }"
+ end
+
+ # Method that includes all begin positions of a match in a given lookup hash which is returned.
+ # This allows discrimination of redundant matches with the redundant? method. TODO: q_beg or s_beg position?
+ def redundant_index( lookup_hash )
+ @q_beg.upto( @q_beg + @len - 1 ) { |pos| lookup_hash[ pos ] = true }
+ end
+
+ # Method that determines if a match is already included in an array of matches.
+ # This is done by querying a lookup hash of the begin positions. TODO: q_beg or s_beg position?
+ def redundant?( lookup_hash )
+ if lookup_hash.include? self.q_beg
+ return true
+ else
+ return false
+ end
+ end
+
# Method that expands a match forwards and backwards given two strings.
def expand( q_seq, s_seq )
self.expand_forward( q_seq, s_seq )
protected :expand_forward, :expand_backward
end
+
+
+class Seq
+ # Method to create a word index from a sequence. The positions of all sequence words of
+ # a given size are saved in a hash of arrays that is returned. Words containing N or n
+ # are disregarded.
+ def word_index( size )
+ raise ArgumentError, "Size must be a positive integer." if size <= 0
+
+ index = {}
+
+ 0.upto( self.length - size ) do |i|
+ word = self[ i .. i + size - 1 ].upcase
+ if word.count( "N" ) == 0
+ word = word.to_sym
+
+ if index.include? word
+ index[ word ] << i
+ else
+ index[ word ] = [ i ]
+ end
+ end
+ end
+
+ index
+ end
+end
+
+
+class FindMatches
+ attr_accessor :q_seq, :s_seq, :word_size
+
+ # Method to initialize an object with two sequences and a word size as arguments.
+ def initialize( q_seq, s_seq, word_size )
+ raise ArgumentError, "q_seq is empty." if q_seq.empty?
+ raise ArgumentError, "s_seq is empty." if s_seq.empty?
+ raise ArgumentError, "word_size must be a positive integer." if word_size <= 0
+ @q_seq = q_seq
+ @s_seq = s_seq
+ @word_size = word_size
+ end
+
+ # Find all maximum expanded matches between two sequences and return
+ # these in an array.
+ def find_matches
+ matches = []
+ lookup_hash = {}
+
+ index = @q_seq.word_index( @word_size )
+
+ 0.upto( @s_seq.length - @word_size ) do |s_beg|
+ word = @s_seq[ s_beg .. s_beg + @word_size - 1 ].upcase
+ if word.count( "N" ) == 0
+ word = word.to_sym
+
+ if index.include? word
+ index[ word ].each do |q_beg|
+ match = Match.new( q_beg, s_beg, word_size )
+
+ unless match.redundant?( lookup_hash )
+ match.expand( @q_seq, @s_seq )
+ match.redundant_index( lookup_hash )
+
+ matches << match
+ end
+ end
+ end
+ end
+ end
+
+ matches
+ end
+
+ # private :find_matches
+end
# Class containing generic sequence methods and nucleic acid and amino acid subclasses.
class Seq < String
- attr_accessor :seq, :offset
-
- def initialize ( seq = "", offset = 0 )
- super( seq )
- @offset = offset
- end
-
# Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
def guess_type
raise ArgumentError, "No sequence." if self.empty?
# Method to wrap a sequence to a given width using a given delimiter.
def wrap( width = 80, delimit = "\n" )
- raise ArgumentError, "Wrap width must be an integer." unless width.is_a? Fixnum
raise ArgumentError, "Cannot wrap sequence to negative width: #{ width }." if width <= 0
self.delete!( " \t\n\r" )
# Method that generates a random sequence of a given length.
def generate( length )
- raise ArgumentError, "Length must be an integer." unless length.is_a? Fixnum
raise ArgumentError, "Cannot generate negative sequence length: #{ length }." if length <= 0
alph = self.residues
self.replace( self.generate( length ) )
end
- # Method that returns the next word from a given offset and size.
- def next_word( size, step = 1 )
- return nil if @offset + size > self.length
- word = self[ @offset .. @offset + size - 1 ]
- @offset += step
-
- word
- end
-
- # Method that creates a list of words from a string.
- def to_words( size, step = 1 )
- words = []
-
- while word = self.next_word( size, step )
- words << word
- end
-
- words
- end
-
# Class containing methods specific for amino acid (AA) sequences.
class AA < Seq
# Method that returns an array of amino acid residues.
--- /dev/null
+#!/usr/bin/env ruby
+
+require 'Maasha/lib/bp_optparse'
+require 'test/unit'
+require 'pp'
+
+class Test_bp_optparse < Test::Unit::TestCase
+ def test_add_default_options
+ new = BP_optparse.new()
+ assert_equal( new.add_default_options.last[ :long ], "verbose" )
+ end
+
+ def test_check_duplicate_long
+ new = BP_optparse.new( [ { :long => "long_option" }, { :long => "long_option" } ] )
+
+ assert_raise( ArgumentError ) { new.check_duplicate_long }
+ end
+
+ def test_check_duplicate_short
+ new = BP_optparse.new( [ { :short => "s" }, { :short => "s" } ] )
+
+ assert_raise( ArgumentError ) { new.check_duplicate_short }
+ end
+
+ def test_compile_option_list_has_4_elements
+ new = BP_optparse.new()
+ new.add_default_options
+ option_list = new.compile_option_list
+ assert_equal( option_list.count, 4 )
+ end
+
+ def test_compile_option_list_last_is_verbose
+ new = BP_optparse.new()
+ new.add_default_options
+ option_list = new.compile_option_list
+ assert_equal( option_list.last.first, "--verbose" )
+ end
+
+ def test_parse_options
+ n = BP_optparse.new()
+ n.add_default_options
+ option_list = n.compile_option_list
+
+ options = n.parse_options( option_list )
+
+ pp options
+ end
+end
require 'Maasha/lib/match'
require 'test/unit'
+require 'pp'
class TestMatch < Test::Unit::TestCase
+ def test_to_s
+ match = Match.new( 0, 1, 2 )
+ assert_equal( "q_beg: 0 s_beg: 1 len: 2", match.to_s )
+ end
+
+ def test_redundant_index_true
+ match = Match.new( 0, 1, 2 )
+
+ lookup_hash = {}
+
+ match.redundant_index( lookup_hash )
+
+ assert_equal( true, lookup_hash[ 0 ] )
+ assert_equal( true, lookup_hash[ 1 ] )
+ end
+
+ def test_redundant_index_nil
+ match = Match.new( 0, 1, 2 )
+
+ lookup_hash = {}
+
+ match.redundant_index( lookup_hash )
+
+ assert_equal( nil, lookup_hash[ 2 ] )
+ end
+
+ def test_redundant_begin_true?
+ match1 = Match.new( 10, 20, 30 )
+ match2 = Match.new( 10, 0, 1 )
+
+ lookup_hash = {}
+
+ match1.redundant_index( lookup_hash )
+
+ assert_equal( true, match2.redundant?( lookup_hash ) )
+ end
+
+ def test_redundant_begin_false?
+ match1 = Match.new( 10, 20, 30 )
+ match2 = Match.new( 9, 0, 1 )
+
+ lookup_hash = {}
+
+ match1.redundant_index( lookup_hash )
+
+ assert_equal( false, match2.redundant?( lookup_hash ) )
+ end
+
+ def test_redundant_end_true?
+ match1 = Match.new( 10, 20, 2 )
+ match2 = Match.new( 11, 0, 1 )
+
+ lookup_hash = {}
+
+ match1.redundant_index( lookup_hash )
+
+ assert_equal( true, match2.redundant?( lookup_hash ) )
+ end
+
+ def test_redundant_end_false?
+ match1 = Match.new( 10, 20, 2 )
+ match2 = Match.new( 12, 0, 1 )
+
+ lookup_hash = {}
+
+ match1.redundant_index( lookup_hash )
+
+ assert_equal( false, match2.redundant?( lookup_hash ) )
+ end
def test_expand_forward
match = Match.new( 1, 1, 2 )
assert_equal( 4, match.len )
end
+
+ # Testing Seq#word_index
+
+ def test_word_index_bad_arg
+ seq = Seq.new( "ATCG" )
+
+ assert_raise( ArgumentError ) { seq.word_index( 0 ) }
+ assert_raise( ArgumentError ) { seq.word_index( -1 ) }
+ end
+
+ def test_word_index_simple
+ seq = Seq.new( "ATCG" )
+
+ index = seq.word_index( 2 )
+
+ assert_equal( 0, index[ "AT".to_sym ].first )
+ end
+
+ def test_word_index_non_simple
+ seq = Seq.new( "ATCGATCG" )
+
+ index = seq.word_index( 2 )
+
+ assert_equal( 4, index[ "AT".to_sym ].last )
+ end
+
+ def test_word_index_skip_Ns
+ seq = Seq.new( "ATNG" )
+
+ index = seq.word_index( 2 )
+
+ assert_equal( nil, index[ "NG".to_sym ] )
+ end
+
+ # Testing FindMatches#find_matches
+
+ def test_find_matches
+ q_seq = Seq.new( "ATCG" )
+ s_seq = Seq.new( "ATCG" )
+
+ matches = FindMatches.new( q_seq, s_seq, 2 ).find_matches # TODO: get rid of explicit find_matches call
+
+ assert_equal( 0, matches.first.q_beg )
+ assert_equal( 0, matches.first.s_beg )
+ assert_equal( 4, matches.first.len )
+ end
end
+
# Testing Seq#wrap
- def test_wrap_arg_is_a_number
- s = Seq.new
-
- assert_raise( ArgumentError ) { s.wrap( "FOO" ) }
- end
-
def test_wrap_arg_is_a_positive_number
s = Seq.new
# Testing Seq#generate
- def test_generate_arg_is_a_number
- s = Seq.new
-
- assert_raise( ArgumentError ) { s.generate( "FOO" ) }
- end
-
def test_generate_arg_is_a_positive_number
s = Seq.new
assert_equal( 40, s.length )
end
- # Testing Seq#next_word
-
- def test_next_word
- s = Seq::NA::DNA.new( "ATCG" )
-
- assert_equal( "AT", s.next_word( 2 ) )
- end
-
- # Testing Seq#to_words
- def test_to_words
- s = Seq::NA::DNA.new( "ATCG" )
-
- assert_equal( "AT", s.to_words( 2 ).first )
- end
-
# Testing Seq::AA#residues
def test_Seq_AA_residues