From: martinahansen Date: Sun, 19 Jul 2009 19:20:18 +0000 (+0000) Subject: adding to ruby source X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=872aaf77494551588665b1fa89ec74e86e37edab;p=biopieces.git adding to ruby source git-svn-id: http://biopieces.googlecode.com/svn/trunk@567 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/Maasha/lib/bp_optparse.rb b/code_ruby/Maasha/lib/bp_optparse.rb new file mode 100644 index 0000000..1946fda --- /dev/null +++ b/code_ruby/Maasha/lib/bp_optparse.rb @@ -0,0 +1,58 @@ +require 'getoptlong' + +class BP_optparse + attr_accessor :options + + def initialize( options = [] ) + @options = options + end + + # Method that adds default options to the option list. + def add_default_options + @options << { :long => 'help', :short => '?', :type => :flag, :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil } + @options << { :long => 'stream_in', :short => 'I', :type => :file!, :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil } + @options << { :long => 'stream_out', :short => 'O', :type => :file, :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil } + @options << { :long => 'verbose', :short => 'v', :type => :flag, :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil } + end + + # Method to check if the option list contains duplicate long options. + def check_duplicate_long + hash = {} + + @options.each do |opt| + raise ArgumentError, "Duplicate long argument" if hash.include?( opt[ :long ] ) + + hash[ opt[ :long ] ] = true + end + end + + # Method to check if the option list contains duplicate short options. + def check_duplicate_short + hash = {} + + @options.each do |opt| + raise ArgumentError, "Duplicate short argument" if hash.include?( opt[ :short ] ) + + hash[ opt[ :short ] ] = true + end + end + + # Method to prepare the option list into the format of GetoptLong. + def compile_option_list + option_list = [] + + @options.each do |opt| + if opt[ :type ] == :flag + option_list << [ "--#{ opt[ :long ] }", "-#{ opt[ :short ] }", GetoptLong::NO_ARGUMENT ] + else + option_list << [ "--#{ opt[ :long ] }", "-#{ opt[ :short ] }", GetoptLong::REQUIRED_ARGUMENT ] + end + end + + option_list + end + + def parse_options( option_list ) + opts = GetoptLong.new() + end +end diff --git a/code_ruby/Maasha/lib/match.rb b/code_ruby/Maasha/lib/match.rb index 2467b93..6853971 100644 --- a/code_ruby/Maasha/lib/match.rb +++ b/code_ruby/Maasha/lib/match.rb @@ -1,12 +1,41 @@ +require 'Maasha/lib/seq' +require 'pp' + class Match attr_accessor :q_beg, :s_beg, :len + # Method to initialize a Match with query begin (q_beg) and subject begin (s_beg) + # positions and the match length (len). def initialize( q_beg, s_beg, len ) + raise ArgumentError, "q_beg must be a positive integer." if q_beg < 0 + raise ArgumentError, "s_beg must be a positive integer." if s_beg < 0 + raise ArgumentError, "len must be a positive integer." if len <= 0 @q_beg = q_beg @s_beg = s_beg @len = len end + # Method to convert a match to a string. + def to_s + "q_beg: #{ @q_beg } s_beg: #{ @s_beg } len: #{ len }" + end + + # Method that includes all begin positions of a match in a given lookup hash which is returned. + # This allows discrimination of redundant matches with the redundant? method. TODO: q_beg or s_beg position? + def redundant_index( lookup_hash ) + @q_beg.upto( @q_beg + @len - 1 ) { |pos| lookup_hash[ pos ] = true } + end + + # Method that determines if a match is already included in an array of matches. + # This is done by querying a lookup hash of the begin positions. TODO: q_beg or s_beg position? + def redundant?( lookup_hash ) + if lookup_hash.include? self.q_beg + return true + else + return false + end + end + # Method that expands a match forwards and backwards given two strings. def expand( q_seq, s_seq ) self.expand_forward( q_seq, s_seq ) @@ -31,3 +60,78 @@ class Match protected :expand_forward, :expand_backward end + + +class Seq + # Method to create a word index from a sequence. The positions of all sequence words of + # a given size are saved in a hash of arrays that is returned. Words containing N or n + # are disregarded. + def word_index( size ) + raise ArgumentError, "Size must be a positive integer." if size <= 0 + + index = {} + + 0.upto( self.length - size ) do |i| + word = self[ i .. i + size - 1 ].upcase + if word.count( "N" ) == 0 + word = word.to_sym + + if index.include? word + index[ word ] << i + else + index[ word ] = [ i ] + end + end + end + + index + end +end + + +class FindMatches + attr_accessor :q_seq, :s_seq, :word_size + + # Method to initialize an object with two sequences and a word size as arguments. + def initialize( q_seq, s_seq, word_size ) + raise ArgumentError, "q_seq is empty." if q_seq.empty? + raise ArgumentError, "s_seq is empty." if s_seq.empty? + raise ArgumentError, "word_size must be a positive integer." if word_size <= 0 + @q_seq = q_seq + @s_seq = s_seq + @word_size = word_size + end + + # Find all maximum expanded matches between two sequences and return + # these in an array. + def find_matches + matches = [] + lookup_hash = {} + + index = @q_seq.word_index( @word_size ) + + 0.upto( @s_seq.length - @word_size ) do |s_beg| + word = @s_seq[ s_beg .. s_beg + @word_size - 1 ].upcase + if word.count( "N" ) == 0 + word = word.to_sym + + if index.include? word + index[ word ].each do |q_beg| + match = Match.new( q_beg, s_beg, word_size ) + + unless match.redundant?( lookup_hash ) + match.expand( @q_seq, @s_seq ) + match.redundant_index( lookup_hash ) + + matches << match + end + end + end + end + end + + matches + end + + # private :find_matches +end diff --git a/code_ruby/Maasha/lib/seq.rb b/code_ruby/Maasha/lib/seq.rb index 99f2124..ff5bd44 100644 --- a/code_ruby/Maasha/lib/seq.rb +++ b/code_ruby/Maasha/lib/seq.rb @@ -1,12 +1,5 @@ # Class containing generic sequence methods and nucleic acid and amino acid subclasses. class Seq < String - attr_accessor :seq, :offset - - def initialize ( seq = "", offset = 0 ) - super( seq ) - @offset = offset - end - # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes. def guess_type raise ArgumentError, "No sequence." if self.empty? @@ -24,7 +17,6 @@ class Seq < String # Method to wrap a sequence to a given width using a given delimiter. def wrap( width = 80, delimit = "\n" ) - raise ArgumentError, "Wrap width must be an integer." unless width.is_a? Fixnum raise ArgumentError, "Cannot wrap sequence to negative width: #{ width }." if width <= 0 self.delete!( " \t\n\r" ) @@ -38,7 +30,6 @@ class Seq < String # Method that generates a random sequence of a given length. def generate( length ) - raise ArgumentError, "Length must be an integer." unless length.is_a? Fixnum raise ArgumentError, "Cannot generate negative sequence length: #{ length }." if length <= 0 alph = self.residues @@ -50,26 +41,6 @@ class Seq < String self.replace( self.generate( length ) ) end - # Method that returns the next word from a given offset and size. - def next_word( size, step = 1 ) - return nil if @offset + size > self.length - word = self[ @offset .. @offset + size - 1 ] - @offset += step - - word - end - - # Method that creates a list of words from a string. - def to_words( size, step = 1 ) - words = [] - - while word = self.next_word( size, step ) - words << word - end - - words - end - # Class containing methods specific for amino acid (AA) sequences. class AA < Seq # Method that returns an array of amino acid residues. diff --git a/code_ruby/Maasha/test/test_bp_optparse.rb b/code_ruby/Maasha/test/test_bp_optparse.rb new file mode 100755 index 0000000..eff2699 --- /dev/null +++ b/code_ruby/Maasha/test/test_bp_optparse.rb @@ -0,0 +1,48 @@ +#!/usr/bin/env ruby + +require 'Maasha/lib/bp_optparse' +require 'test/unit' +require 'pp' + +class Test_bp_optparse < Test::Unit::TestCase + def test_add_default_options + new = BP_optparse.new() + assert_equal( new.add_default_options.last[ :long ], "verbose" ) + end + + def test_check_duplicate_long + new = BP_optparse.new( [ { :long => "long_option" }, { :long => "long_option" } ] ) + + assert_raise( ArgumentError ) { new.check_duplicate_long } + end + + def test_check_duplicate_short + new = BP_optparse.new( [ { :short => "s" }, { :short => "s" } ] ) + + assert_raise( ArgumentError ) { new.check_duplicate_short } + end + + def test_compile_option_list_has_4_elements + new = BP_optparse.new() + new.add_default_options + option_list = new.compile_option_list + assert_equal( option_list.count, 4 ) + end + + def test_compile_option_list_last_is_verbose + new = BP_optparse.new() + new.add_default_options + option_list = new.compile_option_list + assert_equal( option_list.last.first, "--verbose" ) + end + + def test_parse_options + n = BP_optparse.new() + n.add_default_options + option_list = n.compile_option_list + + options = n.parse_options( option_list ) + + pp options + end +end diff --git a/code_ruby/Maasha/test/test_match.rb b/code_ruby/Maasha/test/test_match.rb index bec9e4f..6f08bb1 100755 --- a/code_ruby/Maasha/test/test_match.rb +++ b/code_ruby/Maasha/test/test_match.rb @@ -2,8 +2,78 @@ require 'Maasha/lib/match' require 'test/unit' +require 'pp' class TestMatch < Test::Unit::TestCase + def test_to_s + match = Match.new( 0, 1, 2 ) + assert_equal( "q_beg: 0 s_beg: 1 len: 2", match.to_s ) + end + + def test_redundant_index_true + match = Match.new( 0, 1, 2 ) + + lookup_hash = {} + + match.redundant_index( lookup_hash ) + + assert_equal( true, lookup_hash[ 0 ] ) + assert_equal( true, lookup_hash[ 1 ] ) + end + + def test_redundant_index_nil + match = Match.new( 0, 1, 2 ) + + lookup_hash = {} + + match.redundant_index( lookup_hash ) + + assert_equal( nil, lookup_hash[ 2 ] ) + end + + def test_redundant_begin_true? + match1 = Match.new( 10, 20, 30 ) + match2 = Match.new( 10, 0, 1 ) + + lookup_hash = {} + + match1.redundant_index( lookup_hash ) + + assert_equal( true, match2.redundant?( lookup_hash ) ) + end + + def test_redundant_begin_false? + match1 = Match.new( 10, 20, 30 ) + match2 = Match.new( 9, 0, 1 ) + + lookup_hash = {} + + match1.redundant_index( lookup_hash ) + + assert_equal( false, match2.redundant?( lookup_hash ) ) + end + + def test_redundant_end_true? + match1 = Match.new( 10, 20, 2 ) + match2 = Match.new( 11, 0, 1 ) + + lookup_hash = {} + + match1.redundant_index( lookup_hash ) + + assert_equal( true, match2.redundant?( lookup_hash ) ) + end + + def test_redundant_end_false? + match1 = Match.new( 10, 20, 2 ) + match2 = Match.new( 12, 0, 1 ) + + lookup_hash = {} + + match1.redundant_index( lookup_hash ) + + assert_equal( false, match2.redundant?( lookup_hash ) ) + end def test_expand_forward match = Match.new( 1, 1, 2 ) @@ -36,4 +106,51 @@ class TestMatch < Test::Unit::TestCase assert_equal( 4, match.len ) end + + # Testing Seq#word_index + + def test_word_index_bad_arg + seq = Seq.new( "ATCG" ) + + assert_raise( ArgumentError ) { seq.word_index( 0 ) } + assert_raise( ArgumentError ) { seq.word_index( -1 ) } + end + + def test_word_index_simple + seq = Seq.new( "ATCG" ) + + index = seq.word_index( 2 ) + + assert_equal( 0, index[ "AT".to_sym ].first ) + end + + def test_word_index_non_simple + seq = Seq.new( "ATCGATCG" ) + + index = seq.word_index( 2 ) + + assert_equal( 4, index[ "AT".to_sym ].last ) + end + + def test_word_index_skip_Ns + seq = Seq.new( "ATNG" ) + + index = seq.word_index( 2 ) + + assert_equal( nil, index[ "NG".to_sym ] ) + end + + # Testing FindMatches#find_matches + + def test_find_matches + q_seq = Seq.new( "ATCG" ) + s_seq = Seq.new( "ATCG" ) + + matches = FindMatches.new( q_seq, s_seq, 2 ).find_matches # TODO: get rid of explicit find_matches call + + assert_equal( 0, matches.first.q_beg ) + assert_equal( 0, matches.first.s_beg ) + assert_equal( 4, matches.first.len ) + end end + diff --git a/code_ruby/Maasha/test/test_seq.rb b/code_ruby/Maasha/test/test_seq.rb index 39f4714..7eb8d23 100755 --- a/code_ruby/Maasha/test/test_seq.rb +++ b/code_ruby/Maasha/test/test_seq.rb @@ -78,12 +78,6 @@ class TestSeq < Test::Unit::TestCase # Testing Seq#wrap - def test_wrap_arg_is_a_number - s = Seq.new - - assert_raise( ArgumentError ) { s.wrap( "FOO" ) } - end - def test_wrap_arg_is_a_positive_number s = Seq.new @@ -145,12 +139,6 @@ class TestSeq < Test::Unit::TestCase # Testing Seq#generate - def test_generate_arg_is_a_number - s = Seq.new - - assert_raise( ArgumentError ) { s.generate( "FOO" ) } - end - def test_generate_arg_is_a_positive_number s = Seq.new @@ -184,21 +172,6 @@ class TestSeq < Test::Unit::TestCase assert_equal( 40, s.length ) end - # Testing Seq#next_word - - def test_next_word - s = Seq::NA::DNA.new( "ATCG" ) - - assert_equal( "AT", s.next_word( 2 ) ) - end - - # Testing Seq#to_words - def test_to_words - s = Seq::NA::DNA.new( "ATCG" ) - - assert_equal( "AT", s.to_words( 2 ).first ) - end - # Testing Seq::AA#residues def test_Seq_AA_residues