From f5acf465aca28be318d232ca04c7fa63c4d40a29 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Fri, 29 Jun 2012 18:27:56 +0000 Subject: [PATCH] clean-up of ruby code to remove rake warnings git-svn-id: http://biopieces.googlecode.com/svn/trunk@1852 74ccb610-7750-0410-82ae-013aeee3265d --- bp_test/out/mean_scores.out.4 | 49 ------ code_ruby/lib/maasha/align.rb | 145 +++--------------- code_ruby/lib/maasha/biopieces.rb | 24 +-- code_ruby/lib/maasha/fastq.rb | 2 +- code_ruby/lib/maasha/seq/patternmatcher.rb | 2 +- .../test/maasha/seq/test_patternmatcher.rb | 12 +- 6 files changed, 42 insertions(+), 192 deletions(-) delete mode 100644 bp_test/out/mean_scores.out.4 diff --git a/bp_test/out/mean_scores.out.4 b/bp_test/out/mean_scores.out.4 deleted file mode 100644 index 9b40030..0000000 --- a/bp_test/out/mean_scores.out.4 +++ /dev/null @@ -1,49 +0,0 @@ -SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh -SCORES_LOCAL_MEAN: 40.00 -SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -SCORES_LOCAL_POS: -1 -SEQ_LEN: 39 -SEQ_NAME: test1 ---- -SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh -SCORES_LOCAL_MEAN: 21.00 -SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -SCORES_LOCAL_POS: -1 -SEQ_LEN: 39 -SEQ_NAME: test2 ---- -SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh -SCORES_LOCAL_MEAN: 2.00 -SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -SCORES_LOCAL_POS: 4 -SEQ_LEN: 39 -SEQ_NAME: test3 ---- -SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB -SCORES_LOCAL_MEAN: 21.00 -SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -SCORES_LOCAL_POS: -1 -SEQ_LEN: 39 -SEQ_NAME: test4 ---- -SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB -SCORES_LOCAL_MEAN: 2.00 -SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -SCORES_LOCAL_POS: 29 -SEQ_LEN: 39 -SEQ_NAME: test5 ---- -SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh -SCORES_LOCAL_MEAN: 21.00 -SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -SCORES_LOCAL_POS: -1 -SEQ_LEN: 39 -SEQ_NAME: test6 ---- -SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh -SCORES_LOCAL_MEAN: 2.00 -SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -SCORES_LOCAL_POS: 0 -SEQ_LEN: 39 -SEQ_NAME: test7 ---- diff --git a/code_ruby/lib/maasha/align.rb b/code_ruby/lib/maasha/align.rb index a828721..572e9ab 100755 --- a/code_ruby/lib/maasha/align.rb +++ b/code_ruby/lib/maasha/align.rb @@ -26,6 +26,7 @@ require 'pp' require 'open3' require 'narray' require 'maasha/fasta' +require 'maasha/mum' class AlignError < StandardError; end; @@ -141,7 +142,7 @@ class Align q_entry.seq.downcase! s_entry.seq.downcase! - ap = AlignPair.new(q_entry.seq, s_entry.seq) + ap = AlignPair.new(q_entry, s_entry) ap.align(q_space_beg, q_space_end, s_space_beg, s_space_end, 15, []) @@ -231,10 +232,10 @@ class Align class AlignPair attr_reader :matches - # Method to initialize an AlignPair object given two sequence strings. - def initialize(q_seq, s_seq) - @q_seq = q_seq - @s_seq = s_seq + # Method to initialize an AlignPair object given two Seq objects. + def initialize(q_entry, s_entry) + @q_entry = q_entry + @s_entry = s_entry @matches = [] end @@ -260,15 +261,16 @@ class Align end while new_matches.empty? and kmer_size > 0 - if @q_seq.length >= kmer_size and @s_seq.length >= kmer_size - q_index = index_seq(q_space_beg, q_space_end, @q_seq, kmer_size, kmer_size) - s_index = index_seq(s_space_beg, s_space_end, @s_seq, kmer_size, 1) - new_matches = find_matches(q_index, s_index, kmer_size) + if @q_entry.length >= kmer_size and @s_entry.length >= kmer_size + new_matches = MUMs.find(@q_entry, @s_entry, {"q_space_beg" => q_space_beg, + "q_space_end" => q_space_end, + "s_space_beg" => s_space_beg, + "s_space_end" => s_space_end, + "kmer_size" => kmer_size} ) end unless new_matches.empty? longest_match = new_matches.sort_by { |match| match.length }.pop - longest_match.expand(@q_seq, @s_seq, q_space_beg, q_space_end, s_space_beg, s_space_end) # pp longest_match @matches << longest_match @@ -280,8 +282,8 @@ class Align q_space_beg_right = longest_match.q_end + 1 s_space_beg_right = longest_match.s_end + 1 - q_space_end_right = (q_space_end == @q_seq.length) ? @q_seq.length : q_space_end - 1 - s_space_end_right = (s_space_end == @s_seq.length) ? @s_seq.length : s_space_end - 1 + q_space_end_right = (q_space_end == @q_entry.length) ? @q_entry.length : q_space_end - 1 + s_space_end_right = (s_space_end == @s_entry.length) ? @s_entry.length : s_space_end - 1 if q_space_end_left - q_space_beg_left > 0 and s_space_end_left - s_space_beg_left > 0 align(q_space_beg_left, q_space_end_left, s_space_beg_left, s_space_end_left, kmer_size, new_matches) @@ -300,8 +302,8 @@ class Align # sequence is kept in lower case. def upcase_match @matches.each do |match| - @q_seq[match.q_beg .. match.q_end] = @q_seq[match.q_beg .. match.q_end].upcase - @s_seq[match.s_beg .. match.s_end] = @s_seq[match.s_beg .. match.s_end].upcase + @q_entry.seq[match.q_beg .. match.q_end] = @q_entry.seq[match.q_beg .. match.q_end].upcase + @s_entry.seq[match.s_beg .. match.s_end] = @s_entry.seq[match.s_beg .. match.s_end].upcase end end @@ -320,20 +322,20 @@ class Align #pp "q_gaps #{q_gaps} s_gaps #{s_gaps} diff #{diff}" if diff < 0 - @q_seq.insert(match.q_beg + q_gaps, "-" * diff.abs) + @q_entry.seq.insert(match.q_beg + q_gaps, "-" * diff.abs) q_gaps += diff.abs elsif diff > 0 - @s_seq.insert(match.s_beg + s_gaps, "-" * diff.abs) + @s_entry.seq.insert(match.s_beg + s_gaps, "-" * diff.abs) s_gaps += diff.abs end end - diff = @q_seq.length - @s_seq.length + diff = @q_entry.length - @s_entry.length if diff < 0 - @q_seq << ("-" * diff.abs) + @q_entry.seq << ("-" * diff.abs) else - @s_seq << ("-" * diff.abs) + @s_entry.seq << ("-" * diff.abs) end end @@ -351,111 +353,6 @@ class Align puts "---" } end - - private - - # Method for indexing a sequence given a search space, oligo size - # and step size. All oligo positions are saved in a lookup hash - # where the oligo is the key and the position is the value. Non- - # unique oligos are removed and the index returned. - def index_seq(space_beg, space_end, seq, kmer_size, step_size) - index = Hash.new - index_count = Hash.new(0) - - i = space_beg - - while i < space_end - kmer_size + 1 - oligo = seq[i ... i + kmer_size] - - index[oligo] = i - index_count[oligo] += 1 - i += step_size - end - - index_count.each do |oligo, count| - index.delete(oligo) if count > 1 - end - - index - end - - # Method for locating matches between two indexes where one is created - # using non-overlapping unique oligos and their positions and one is - # using overlapping unique oligos and positions. Thus iterating over the - # non-overlapping oligos and checking last match, this can be extended. - def find_matches(q_index, s_index, kmer_size) - matches = [] - - q_index.each do |oligo, q_pos| - if s_pos = s_index[oligo] - - last_match = matches.last - new_match = Match.new(q_pos, s_pos, kmer_size) - - if last_match and - last_match.q_beg + last_match.length == new_match.q_beg and - last_match.s_beg + last_match.length == new_match.s_beg - - last_match.length += kmer_size - else - matches << new_match - end - end - end - - matches - end - - # Class for Match objects. - class Match - attr_accessor :length - attr_reader :q_beg, :s_beg - - def initialize(q_beg, s_beg, length) - @q_beg = q_beg - @s_beg = s_beg - @length = length - end - - def q_end - @q_beg + @length - 1 - end - - def s_end - @s_beg + @length - 1 - end - - def to_s - "q_beg=#{q_beg} q_end=#{q_end} s_beg=#{s_beg} s_end=#{s_end} length=#{length}" - end - - # Method to expand a match as far as possible to the left and right - # within a given search space. - def expand(q_seq, s_seq, q_space_beg, q_space_end, s_space_beg, s_space_end) - expand_left(q_seq, s_seq, q_space_beg, s_space_beg) - expand_right(q_seq, s_seq, q_space_end, s_space_end) - end - - private - - # Method to expand a match as far as possible to the left within a given - # search space. - def expand_left(q_seq, s_seq, q_space_beg, s_space_beg) - while @q_beg > q_space_beg and @s_beg > s_space_beg and q_seq[@q_beg - 1] == s_seq[@s_beg - 1] - @q_beg -= 1 - @s_beg -= 1 - @length += 1 - end - end - - # Method to expand a match as far as possible to the right within a given - # search space. - def expand_right(q_seq, s_seq, q_space_end, s_space_end) - while q_end + 1 <= q_space_end and s_end + 1 <= s_space_end and q_seq[q_end + 1] == s_seq[s_end + 1] - @length += 1 - end - end - end end class Consensus diff --git a/code_ruby/lib/maasha/biopieces.rb b/code_ruby/lib/maasha/biopieces.rb index e994963..6c54658 100644 --- a/code_ruby/lib/maasha/biopieces.rb +++ b/code_ruby/lib/maasha/biopieces.rb @@ -153,10 +153,10 @@ class Biopieces if STDIN.tty? input = self.new(StringIO.new) else - input = self.new(STDIN, stdio = true) + input = self.new(STDIN, true) end elsif File.exists? input - ios = File.open(input, mode='r') + ios = File.open(input, 'r') begin ios = Zlib::GzipReader.new(ios) @@ -174,9 +174,9 @@ class Biopieces # Records are written to STDOUT (default) or file. def self.open_output(output) if output.nil? - output = self.new(STDOUT, stdio = true) + output = self.new(STDOUT, true) elsif not output.is_a? IO - output = self.new(File.open(output, mode='w')) + output = self.new(File.open(output, 'w')) end output @@ -377,7 +377,7 @@ class OptionHandler option_parser.parse!(@argv) if print_usage_full? - print_usage_and_exit(full=true) + print_usage_and_exit(true) elsif print_usage_short? print_usage_and_exit end @@ -566,7 +566,7 @@ class Status def set time0 = Time.new.strftime("%Y-%m-%d %X") - File.open(path, mode="w") do |fh| + File.open(path, "w") do |fh| fh.puts [time0, ARGV.join(" ")].join(";") end end @@ -575,13 +575,13 @@ class Status def set_tmpdir(tmpdir_path) status = "" - File.open(path, mode="r") do |fh| + File.open(path, "r") do |fh| status = fh.read.chomp end status = "#{status};#{tmpdir_path}\n" - File.open(path, mode="w") do |fh| + File.open(path, "w") do |fh| fh << status end end @@ -589,7 +589,7 @@ class Status # Extract the temporary directory path from the status file, # and return this or nil if not found. def get_tmpdir - File.open(path, mode="r") do |fh| + File.open(path, "r") do |fh| tmpdir_path = fh.read.chomp.split(";").last return tmpdir_path if File.directory?(tmpdir_path) end @@ -604,14 +604,14 @@ class Status script = File.basename($0) stream = File.open(path) - time0, args, tmp_dir = stream.first.split(";") + time0, args = stream.first.split(";") stream.close elap = time_diff(time0, time1) command = [script, args].join(" ") log_file = File.join(ENV["BP_LOG"], "biopieces.log") - File.open(log_file, mode = "a") { |file| file.puts [time0, time1, elap, user, exit_status, command].join("\t") } + File.open(log_file, "a") { |file| file.puts [time0, time1, elap, user, exit_status, command].join("\t") } end # Delete status file. @@ -627,6 +627,8 @@ class Status script = File.basename($0) pid = $$ path = File.join(ENV["BP_TMP"], [user, script, pid, "status"].join(".")) + + path end # Get the elapsed time from the difference between two time stamps. diff --git a/code_ruby/lib/maasha/fastq.rb b/code_ruby/lib/maasha/fastq.rb index ac97be0..c57f4eb 100644 --- a/code_ruby/lib/maasha/fastq.rb +++ b/code_ruby/lib/maasha/fastq.rb @@ -36,7 +36,7 @@ class Fastq < Filesys begin seq_name = @io.gets.chomp! seq = @io.gets.chomp! - qual_name = @io.gets.chomp! + @io.gets qual = @io.gets.chomp! entry = Seq.new diff --git a/code_ruby/lib/maasha/seq/patternmatcher.rb b/code_ruby/lib/maasha/seq/patternmatcher.rb index a07fd3a..ba83c15 100644 --- a/code_ruby/lib/maasha/seq/patternmatcher.rb +++ b/code_ruby/lib/maasha/seq/patternmatcher.rb @@ -134,7 +134,7 @@ class Vector @vector = [] (0 ... @pattern.length + 1).each do |i| - @vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i, deletions = 0, edit_distance = i) + @vector[i] = Score.new(0, 0, i, 0, i) end end diff --git a/code_ruby/test/maasha/seq/test_patternmatcher.rb b/code_ruby/test/maasha/seq/test_patternmatcher.rb index 9a7a50e..03b0926 100755 --- a/code_ruby/test/maasha/seq/test_patternmatcher.rb +++ b/code_ruby/test/maasha/seq/test_patternmatcher.rb @@ -65,7 +65,7 @@ class TestPatternMatcher < Test::Unit::TestCase end def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_one_returns_correctly - m = @p.match("aCcg", pos = 0, edit_distance = 1) + m = @p.match("aCcg", 0, 1) assert_equal(0, m.pos) assert_equal("atcg", m.match) assert_equal(3, m.matches) @@ -76,7 +76,7 @@ class TestPatternMatcher < Test::Unit::TestCase end def test_PatternMatcher_match_with_two_mismatch_and_edit_dist_one_returns_nil - assert_nil(@p.match("aGcA", pos = 0, edit_distance = 1)) + assert_nil(@p.match("aGcA", 0, 1)) end def test_PatternMatcher_match_with_one_insertion_and_edit_dist_zero_returns_nil @@ -84,7 +84,7 @@ class TestPatternMatcher < Test::Unit::TestCase end def test_PatternMatcher_match_with_one_insertion_and_edit_dist_one_returns_correctly - m = @p.match("atGcg", pos = 0, edit_distance = 1) + m = @p.match("atGcg", 0, 1) assert_equal(0, m.pos) assert_equal("atcg", m.match) assert_equal(4, m.matches) @@ -95,11 +95,11 @@ class TestPatternMatcher < Test::Unit::TestCase end def test_PatternMatcher_match_with_two_insertions_and_edit_dist_one_returns_nil - assert_nil(@p.match("atGcTg", pos = 0, edit_distance = 1)) + assert_nil(@p.match("atGcTg", 0, 1)) end def test_PatternMatcher_match_with_two_insertions_and_edit_dist_two_returns_correctly - m = @p.match("atGcTg", pos = 0, edit_distance = 2) + m = @p.match("atGcTg", 0, 2) assert_equal(0, m.pos) assert_equal("atcg", m.match) assert_equal(4, m.matches) @@ -114,7 +114,7 @@ class TestPatternMatcher < Test::Unit::TestCase end def test_PatternMatcher_match_with_one_deletion_and_edit_distance_one_returns_correctly - m = @p.match("acg", pos = 0, edit_distance = 1) + m = @p.match("acg", 0, 1) assert_equal(0, m.pos) assert_equal("atcg", m.match) assert_equal(3, m.matches) -- 2.39.2