# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-my ( $options, $in, $out, $record, $analysis, $key, $len,
+my ( $options, $in, $out, $record, $analysis, $key, $len, %skip_hash,
%key_hash, $skip, $keys, $types, $counts, $mins, $maxs, $sums, $means );
$options = Maasha::Biopieces::parse_options(
[
{ long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
{ long => 'keys', short => 'k', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ { long => 'no_keys', short => 'K', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
]
);
$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
+map { $skip_hash{ $_ } = 1 } @{ $options->{ "no_keys" } };
map { $key_hash{ $_ } = 1; $skip = 1 } @{ $options->{ "keys" } };
while ( $record = Maasha::Biopieces::get_record( $in ) )
foreach $key ( keys %{ $record } )
{
next if $skip and not exists $key_hash{ $key };
+ next if $skip_hash{ $key };
if ( Maasha::Calc::is_a_number( $record->{ $key } ) )
{
$sums = "SUM ";
$means = "MEAN ";
-foreach $key ( keys %{ $analysis } )
+foreach $key ( sort keys %{ $analysis } )
{
$keys .= sprintf "% 15s", $key;
$types .= sprintf "% 15s", $analysis->{ $key }->{ "TYPE" };
"q_beg: #{ @q_beg } s_beg: #{ @s_beg } len: #{ len }"
end
- # Method that includes all begin positions of a match in a given lookup hash which is returned.
- # This allows discrimination of redundant matches with the redundant? method. TODO: q_beg or s_beg position?
- def redundant_index( lookup_hash )
- @q_beg.upto( @q_beg + @len - 1 ) { |pos| lookup_hash[ pos ] = true }
- end
-
- # Method that determines if a match is already included in an array of matches.
- # This is done by querying a lookup hash of the begin positions. TODO: q_beg or s_beg position?
- def redundant?( lookup_hash )
- if lookup_hash.include? self.q_beg
- return true
- else
- return false
+ # Method to determine if a match is already included in an array of matches.
+ def redundant?( matches )
+ matches.each do |match|
+ if self.q_beg >= match.q_beg and self.q_beg + self.len <= match.q_beg + match.len and
+ self.s_beg >= match.s_beg and self.s_beg + self.len <= match.s_beg + match.len and
+ return true
+ end
end
+
+ return false
end
# Method that expands a match forwards and backwards given two strings.
end
-class FindMatches
+class Matches
attr_accessor :q_seq, :s_seq, :word_size
# Method to initialize an object with two sequences and a word size as arguments.
@word_size = word_size
end
- # Find all maximum expanded matches between two sequences and return
+ # Locate all maximum expanded matches between two sequences and return
# these in an array.
- def find_matches
- matches = []
- lookup_hash = {}
+ def locate
+ matches = []
index = @q_seq.word_index( @word_size )
index[ word ].each do |q_beg|
match = Match.new( q_beg, s_beg, word_size )
- unless match.redundant?( lookup_hash )
+ unless match.redundant?( matches )
match.expand( @q_seq, @s_seq )
- match.redundant_index( lookup_hash )
matches << match
end
matches
end
-
- # private :find_matches
end
+
assert_equal( "q_beg: 0 s_beg: 1 len: 2", match.to_s )
end
- def test_redundant_index_true
- match = Match.new( 0, 1, 2 )
-
- lookup_hash = {}
+ def test_redundant?
+ match1 = Match.new( 0, 0, 2 )
+ match2 = Match.new( 0, 0, 2 )
- match.redundant_index( lookup_hash )
+ matches = []
+ matches << match1
- assert_equal( true, lookup_hash[ 0 ] )
- assert_equal( true, lookup_hash[ 1 ] )
+ assert_equal( true, match2.redundant?( matches ) )
end
- def test_redundant_index_nil
- match = Match.new( 0, 1, 2 )
-
- lookup_hash = {}
+ def test_not_redundant_1?
+ match1 = Match.new( 0, 0, 2 )
+ match2 = Match.new( 1, 1, 2 )
- match.redundant_index( lookup_hash )
+ matches = []
+ matches << match1
- assert_equal( nil, lookup_hash[ 2 ] )
+ assert_equal( false, match2.redundant?( matches ) )
end
- def test_redundant_begin_true?
- match1 = Match.new( 10, 20, 30 )
- match2 = Match.new( 10, 0, 1 )
-
- lookup_hash = {}
-
- match1.redundant_index( lookup_hash )
-
- assert_equal( true, match2.redundant?( lookup_hash ) )
- end
-
- def test_redundant_begin_false?
- match1 = Match.new( 10, 20, 30 )
- match2 = Match.new( 9, 0, 1 )
-
- lookup_hash = {}
-
- match1.redundant_index( lookup_hash )
-
- assert_equal( false, match2.redundant?( lookup_hash ) )
- end
-
- def test_redundant_end_true?
- match1 = Match.new( 10, 20, 2 )
- match2 = Match.new( 11, 0, 1 )
-
- lookup_hash = {}
+ def test_not_redundant_2?
+ match1 = Match.new( 1, 1, 2 )
+ match2 = Match.new( 0, 0, 2 )
- match1.redundant_index( lookup_hash )
+ matches = []
+ matches << match1
- assert_equal( true, match2.redundant?( lookup_hash ) )
- end
-
- def test_redundant_end_false?
- match1 = Match.new( 10, 20, 2 )
- match2 = Match.new( 12, 0, 1 )
-
- lookup_hash = {}
-
- match1.redundant_index( lookup_hash )
-
- assert_equal( false, match2.redundant?( lookup_hash ) )
+ assert_equal( false, match2.redundant?( matches ) )
end
def test_expand_forward
- match = Match.new( 1, 1, 2 )
+ match = Match.new( 1, 1, 1 )
class << match
public :expand_forward
match.expand_forward( "ATCG", "ATCG" )
+ assert_equal( 1, match.q_beg )
+ assert_equal( 1, match.s_beg )
assert_equal( 3, match.len )
end
def test_expand_backward
- match = Match.new( 1, 1, 2 )
+ match = Match.new( 1, 1, 1 )
class << match
public :expand_backward
match.expand_backward( "ATCG", "ATCG" )
- assert_equal( 3, match.len )
+ assert_equal( 0, match.q_beg )
+ assert_equal( 0, match.s_beg )
+ assert_equal( 2, match.len )
end
def test_expand
match.expand( "ATCG", "ATCG" )
+ assert_equal( 0, match.q_beg )
+ assert_equal( 0, match.s_beg )
assert_equal( 4, match.len )
end
assert_equal( nil, index[ "NG".to_sym ] )
end
- # Testing FindMatches#find_matches
+ # Testing Matches#find_matches
- def test_find_matches
+ def test_find_matches_1_match
q_seq = Seq.new( "ATCG" )
s_seq = Seq.new( "ATCG" )
- matches = FindMatches.new( q_seq, s_seq, 2 ).find_matches # TODO: get rid of explicit find_matches call
+ m = Matches.new( q_seq, s_seq, 2 )
+ matches = m.locate
assert_equal( 0, matches.first.q_beg )
assert_equal( 0, matches.first.s_beg )
assert_equal( 4, matches.first.len )
end
+
+ def test_find_matches_2_matches
+ q_seq = Seq.new( "ATCG_GCTA" )
+ s_seq = Seq.new( "GCTA.ATCG" )
+
+ m = Matches.new( q_seq, s_seq, 2 )
+ matches = m.locate
+
+ assert_equal( 2, matches.count )
+ assert_equal( 0, matches.first.s_beg )
+ assert_equal( 5, matches.first.q_beg )
+ assert_equal( 4, matches.first.len )
+ assert_equal( 5, matches.last.s_beg )
+ assert_equal( 0, matches.last.q_beg )
+ assert_equal( 4, matches.last.len )
+ end
end