From: martinahansen Date: Sat, 25 Jul 2009 08:45:37 +0000 (+0000) Subject: fixed feature in analyze_vals, issue 3 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=ee3bad350dd7f0b98ef97086e0ed78c343020e4e;p=biopieces.git fixed feature in analyze_vals, issue 3 git-svn-id: http://biopieces.googlecode.com/svn/trunk@595 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/analyze_vals b/bp_bin/analyze_vals index 91da15a..d1d9ede 100755 --- a/bp_bin/analyze_vals +++ b/bp_bin/analyze_vals @@ -36,19 +36,21 @@ use Data::Dumper; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -my ( $options, $in, $out, $record, $analysis, $key, $len, +my ( $options, $in, $out, $record, $analysis, $key, $len, %skip_hash, %key_hash, $skip, $keys, $types, $counts, $mins, $maxs, $sums, $means ); $options = Maasha::Biopieces::parse_options( [ { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, { long => 'keys', short => 'k', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'no_keys', short => 'K', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, ] ); $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); +map { $skip_hash{ $_ } = 1 } @{ $options->{ "no_keys" } }; map { $key_hash{ $_ } = 1; $skip = 1 } @{ $options->{ "keys" } }; while ( $record = Maasha::Biopieces::get_record( $in ) ) @@ -56,6 +58,7 @@ while ( $record = Maasha::Biopieces::get_record( $in ) ) foreach $key ( keys %{ $record } ) { next if $skip and not exists $key_hash{ $key }; + next if $skip_hash{ $key }; if ( Maasha::Calc::is_a_number( $record->{ $key } ) ) { @@ -108,7 +111,7 @@ $maxs = "MAX "; $sums = "SUM "; $means = "MEAN "; -foreach $key ( keys %{ $analysis } ) +foreach $key ( sort keys %{ $analysis } ) { $keys .= sprintf "% 15s", $key; $types .= sprintf "% 15s", $analysis->{ $key }->{ "TYPE" }; diff --git a/code_ruby/Maasha/lib/match.rb b/code_ruby/Maasha/lib/match.rb index 6853971..01654fd 100644 --- a/code_ruby/Maasha/lib/match.rb +++ b/code_ruby/Maasha/lib/match.rb @@ -20,20 +20,16 @@ class Match "q_beg: #{ @q_beg } s_beg: #{ @s_beg } len: #{ len }" end - # Method that includes all begin positions of a match in a given lookup hash which is returned. - # This allows discrimination of redundant matches with the redundant? method. TODO: q_beg or s_beg position? - def redundant_index( lookup_hash ) - @q_beg.upto( @q_beg + @len - 1 ) { |pos| lookup_hash[ pos ] = true } - end - - # Method that determines if a match is already included in an array of matches. - # This is done by querying a lookup hash of the begin positions. TODO: q_beg or s_beg position? - def redundant?( lookup_hash ) - if lookup_hash.include? self.q_beg - return true - else - return false + # Method to determine if a match is already included in an array of matches. + def redundant?( matches ) + matches.each do |match| + if self.q_beg >= match.q_beg and self.q_beg + self.len <= match.q_beg + match.len and + self.s_beg >= match.s_beg and self.s_beg + self.len <= match.s_beg + match.len and + return true + end end + + return false end # Method that expands a match forwards and backwards given two strings. @@ -89,7 +85,7 @@ class Seq end -class FindMatches +class Matches attr_accessor :q_seq, :s_seq, :word_size # Method to initialize an object with two sequences and a word size as arguments. @@ -102,11 +98,10 @@ class FindMatches @word_size = word_size end - # Find all maximum expanded matches between two sequences and return + # Locate all maximum expanded matches between two sequences and return # these in an array. - def find_matches - matches = [] - lookup_hash = {} + def locate + matches = [] index = @q_seq.word_index( @word_size ) @@ -119,9 +114,8 @@ class FindMatches index[ word ].each do |q_beg| match = Match.new( q_beg, s_beg, word_size ) - unless match.redundant?( lookup_hash ) + unless match.redundant?( matches ) match.expand( @q_seq, @s_seq ) - match.redundant_index( lookup_hash ) matches << match end @@ -132,6 +126,5 @@ class FindMatches matches end - - # private :find_matches end + diff --git a/code_ruby/Maasha/test/test_match.rb b/code_ruby/Maasha/test/test_match.rb index 6f08bb1..2d298e1 100755 --- a/code_ruby/Maasha/test/test_match.rb +++ b/code_ruby/Maasha/test/test_match.rb @@ -10,73 +10,38 @@ class TestMatch < Test::Unit::TestCase assert_equal( "q_beg: 0 s_beg: 1 len: 2", match.to_s ) end - def test_redundant_index_true - match = Match.new( 0, 1, 2 ) - - lookup_hash = {} + def test_redundant? + match1 = Match.new( 0, 0, 2 ) + match2 = Match.new( 0, 0, 2 ) - match.redundant_index( lookup_hash ) + matches = [] + matches << match1 - assert_equal( true, lookup_hash[ 0 ] ) - assert_equal( true, lookup_hash[ 1 ] ) + assert_equal( true, match2.redundant?( matches ) ) end - def test_redundant_index_nil - match = Match.new( 0, 1, 2 ) - - lookup_hash = {} + def test_not_redundant_1? + match1 = Match.new( 0, 0, 2 ) + match2 = Match.new( 1, 1, 2 ) - match.redundant_index( lookup_hash ) + matches = [] + matches << match1 - assert_equal( nil, lookup_hash[ 2 ] ) + assert_equal( false, match2.redundant?( matches ) ) end - def test_redundant_begin_true? - match1 = Match.new( 10, 20, 30 ) - match2 = Match.new( 10, 0, 1 ) - - lookup_hash = {} - - match1.redundant_index( lookup_hash ) - - assert_equal( true, match2.redundant?( lookup_hash ) ) - end - - def test_redundant_begin_false? - match1 = Match.new( 10, 20, 30 ) - match2 = Match.new( 9, 0, 1 ) - - lookup_hash = {} - - match1.redundant_index( lookup_hash ) - - assert_equal( false, match2.redundant?( lookup_hash ) ) - end - - def test_redundant_end_true? - match1 = Match.new( 10, 20, 2 ) - match2 = Match.new( 11, 0, 1 ) - - lookup_hash = {} + def test_not_redundant_2? + match1 = Match.new( 1, 1, 2 ) + match2 = Match.new( 0, 0, 2 ) - match1.redundant_index( lookup_hash ) + matches = [] + matches << match1 - assert_equal( true, match2.redundant?( lookup_hash ) ) - end - - def test_redundant_end_false? - match1 = Match.new( 10, 20, 2 ) - match2 = Match.new( 12, 0, 1 ) - - lookup_hash = {} - - match1.redundant_index( lookup_hash ) - - assert_equal( false, match2.redundant?( lookup_hash ) ) + assert_equal( false, match2.redundant?( matches ) ) end def test_expand_forward - match = Match.new( 1, 1, 2 ) + match = Match.new( 1, 1, 1 ) class << match public :expand_forward @@ -84,11 +49,13 @@ class TestMatch < Test::Unit::TestCase match.expand_forward( "ATCG", "ATCG" ) + assert_equal( 1, match.q_beg ) + assert_equal( 1, match.s_beg ) assert_equal( 3, match.len ) end def test_expand_backward - match = Match.new( 1, 1, 2 ) + match = Match.new( 1, 1, 1 ) class << match public :expand_backward @@ -96,7 +63,9 @@ class TestMatch < Test::Unit::TestCase match.expand_backward( "ATCG", "ATCG" ) - assert_equal( 3, match.len ) + assert_equal( 0, match.q_beg ) + assert_equal( 0, match.s_beg ) + assert_equal( 2, match.len ) end def test_expand @@ -104,6 +73,8 @@ class TestMatch < Test::Unit::TestCase match.expand( "ATCG", "ATCG" ) + assert_equal( 0, match.q_beg ) + assert_equal( 0, match.s_beg ) assert_equal( 4, match.len ) end @@ -140,17 +111,34 @@ class TestMatch < Test::Unit::TestCase assert_equal( nil, index[ "NG".to_sym ] ) end - # Testing FindMatches#find_matches + # Testing Matches#find_matches - def test_find_matches + def test_find_matches_1_match q_seq = Seq.new( "ATCG" ) s_seq = Seq.new( "ATCG" ) - matches = FindMatches.new( q_seq, s_seq, 2 ).find_matches # TODO: get rid of explicit find_matches call + m = Matches.new( q_seq, s_seq, 2 ) + matches = m.locate assert_equal( 0, matches.first.q_beg ) assert_equal( 0, matches.first.s_beg ) assert_equal( 4, matches.first.len ) end + + def test_find_matches_2_matches + q_seq = Seq.new( "ATCG_GCTA" ) + s_seq = Seq.new( "GCTA.ATCG" ) + + m = Matches.new( q_seq, s_seq, 2 ) + matches = m.locate + + assert_equal( 2, matches.count ) + assert_equal( 0, matches.first.s_beg ) + assert_equal( 5, matches.first.q_beg ) + assert_equal( 4, matches.first.len ) + assert_equal( 5, matches.last.s_beg ) + assert_equal( 0, matches.last.q_beg ) + assert_equal( 4, matches.last.len ) + end end