+++ /dev/null
-SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
-SCORES_LOCAL_MEAN: 40.00
-SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-SCORES_LOCAL_POS: -1
-SEQ_LEN: 39
-SEQ_NAME: test1
----
-SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
-SCORES_LOCAL_MEAN: 21.00
-SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-SCORES_LOCAL_POS: -1
-SEQ_LEN: 39
-SEQ_NAME: test2
----
-SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh
-SCORES_LOCAL_MEAN: 2.00
-SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-SCORES_LOCAL_POS: 4
-SEQ_LEN: 39
-SEQ_NAME: test3
----
-SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB
-SCORES_LOCAL_MEAN: 21.00
-SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-SCORES_LOCAL_POS: -1
-SEQ_LEN: 39
-SEQ_NAME: test4
----
-SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB
-SCORES_LOCAL_MEAN: 2.00
-SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-SCORES_LOCAL_POS: 29
-SEQ_LEN: 39
-SEQ_NAME: test5
----
-SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
-SCORES_LOCAL_MEAN: 21.00
-SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-SCORES_LOCAL_POS: -1
-SEQ_LEN: 39
-SEQ_NAME: test6
----
-SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
-SCORES_LOCAL_MEAN: 2.00
-SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-SCORES_LOCAL_POS: 0
-SEQ_LEN: 39
-SEQ_NAME: test7
----
require 'open3'
require 'narray'
require 'maasha/fasta'
+require 'maasha/mum'
class AlignError < StandardError; end;
q_entry.seq.downcase!
s_entry.seq.downcase!
- ap = AlignPair.new(q_entry.seq, s_entry.seq)
+ ap = AlignPair.new(q_entry, s_entry)
ap.align(q_space_beg, q_space_end, s_space_beg, s_space_end, 15, [])
class AlignPair
attr_reader :matches
- # Method to initialize an AlignPair object given two sequence strings.
- def initialize(q_seq, s_seq)
- @q_seq = q_seq
- @s_seq = s_seq
+ # Method to initialize an AlignPair object given two Seq objects.
+ def initialize(q_entry, s_entry)
+ @q_entry = q_entry
+ @s_entry = s_entry
@matches = []
end
end
while new_matches.empty? and kmer_size > 0
- if @q_seq.length >= kmer_size and @s_seq.length >= kmer_size
- q_index = index_seq(q_space_beg, q_space_end, @q_seq, kmer_size, kmer_size)
- s_index = index_seq(s_space_beg, s_space_end, @s_seq, kmer_size, 1)
- new_matches = find_matches(q_index, s_index, kmer_size)
+ if @q_entry.length >= kmer_size and @s_entry.length >= kmer_size
+ new_matches = MUMs.find(@q_entry, @s_entry, {"q_space_beg" => q_space_beg,
+ "q_space_end" => q_space_end,
+ "s_space_beg" => s_space_beg,
+ "s_space_end" => s_space_end,
+ "kmer_size" => kmer_size} )
end
unless new_matches.empty?
longest_match = new_matches.sort_by { |match| match.length }.pop
- longest_match.expand(@q_seq, @s_seq, q_space_beg, q_space_end, s_space_beg, s_space_end)
# pp longest_match
@matches << longest_match
q_space_beg_right = longest_match.q_end + 1
s_space_beg_right = longest_match.s_end + 1
- q_space_end_right = (q_space_end == @q_seq.length) ? @q_seq.length : q_space_end - 1
- s_space_end_right = (s_space_end == @s_seq.length) ? @s_seq.length : s_space_end - 1
+ q_space_end_right = (q_space_end == @q_entry.length) ? @q_entry.length : q_space_end - 1
+ s_space_end_right = (s_space_end == @s_entry.length) ? @s_entry.length : s_space_end - 1
if q_space_end_left - q_space_beg_left > 0 and s_space_end_left - s_space_beg_left > 0
align(q_space_beg_left, q_space_end_left, s_space_beg_left, s_space_end_left, kmer_size, new_matches)
# sequence is kept in lower case.
def upcase_match
@matches.each do |match|
- @q_seq[match.q_beg .. match.q_end] = @q_seq[match.q_beg .. match.q_end].upcase
- @s_seq[match.s_beg .. match.s_end] = @s_seq[match.s_beg .. match.s_end].upcase
+ @q_entry.seq[match.q_beg .. match.q_end] = @q_entry.seq[match.q_beg .. match.q_end].upcase
+ @s_entry.seq[match.s_beg .. match.s_end] = @s_entry.seq[match.s_beg .. match.s_end].upcase
end
end
#pp "q_gaps #{q_gaps} s_gaps #{s_gaps} diff #{diff}"
if diff < 0
- @q_seq.insert(match.q_beg + q_gaps, "-" * diff.abs)
+ @q_entry.seq.insert(match.q_beg + q_gaps, "-" * diff.abs)
q_gaps += diff.abs
elsif diff > 0
- @s_seq.insert(match.s_beg + s_gaps, "-" * diff.abs)
+ @s_entry.seq.insert(match.s_beg + s_gaps, "-" * diff.abs)
s_gaps += diff.abs
end
end
- diff = @q_seq.length - @s_seq.length
+ diff = @q_entry.length - @s_entry.length
if diff < 0
- @q_seq << ("-" * diff.abs)
+ @q_entry.seq << ("-" * diff.abs)
else
- @s_seq << ("-" * diff.abs)
+ @s_entry.seq << ("-" * diff.abs)
end
end
puts "---"
}
end
-
- private
-
- # Method for indexing a sequence given a search space, oligo size
- # and step size. All oligo positions are saved in a lookup hash
- # where the oligo is the key and the position is the value. Non-
- # unique oligos are removed and the index returned.
- def index_seq(space_beg, space_end, seq, kmer_size, step_size)
- index = Hash.new
- index_count = Hash.new(0)
-
- i = space_beg
-
- while i < space_end - kmer_size + 1
- oligo = seq[i ... i + kmer_size]
-
- index[oligo] = i
- index_count[oligo] += 1
- i += step_size
- end
-
- index_count.each do |oligo, count|
- index.delete(oligo) if count > 1
- end
-
- index
- end
-
- # Method for locating matches between two indexes where one is created
- # using non-overlapping unique oligos and their positions and one is
- # using overlapping unique oligos and positions. Thus iterating over the
- # non-overlapping oligos and checking last match, this can be extended.
- def find_matches(q_index, s_index, kmer_size)
- matches = []
-
- q_index.each do |oligo, q_pos|
- if s_pos = s_index[oligo]
-
- last_match = matches.last
- new_match = Match.new(q_pos, s_pos, kmer_size)
-
- if last_match and
- last_match.q_beg + last_match.length == new_match.q_beg and
- last_match.s_beg + last_match.length == new_match.s_beg
-
- last_match.length += kmer_size
- else
- matches << new_match
- end
- end
- end
-
- matches
- end
-
- # Class for Match objects.
- class Match
- attr_accessor :length
- attr_reader :q_beg, :s_beg
-
- def initialize(q_beg, s_beg, length)
- @q_beg = q_beg
- @s_beg = s_beg
- @length = length
- end
-
- def q_end
- @q_beg + @length - 1
- end
-
- def s_end
- @s_beg + @length - 1
- end
-
- def to_s
- "q_beg=#{q_beg} q_end=#{q_end} s_beg=#{s_beg} s_end=#{s_end} length=#{length}"
- end
-
- # Method to expand a match as far as possible to the left and right
- # within a given search space.
- def expand(q_seq, s_seq, q_space_beg, q_space_end, s_space_beg, s_space_end)
- expand_left(q_seq, s_seq, q_space_beg, s_space_beg)
- expand_right(q_seq, s_seq, q_space_end, s_space_end)
- end
-
- private
-
- # Method to expand a match as far as possible to the left within a given
- # search space.
- def expand_left(q_seq, s_seq, q_space_beg, s_space_beg)
- while @q_beg > q_space_beg and @s_beg > s_space_beg and q_seq[@q_beg - 1] == s_seq[@s_beg - 1]
- @q_beg -= 1
- @s_beg -= 1
- @length += 1
- end
- end
-
- # Method to expand a match as far as possible to the right within a given
- # search space.
- def expand_right(q_seq, s_seq, q_space_end, s_space_end)
- while q_end + 1 <= q_space_end and s_end + 1 <= s_space_end and q_seq[q_end + 1] == s_seq[s_end + 1]
- @length += 1
- end
- end
- end
end
class Consensus
if STDIN.tty?
input = self.new(StringIO.new)
else
- input = self.new(STDIN, stdio = true)
+ input = self.new(STDIN, true)
end
elsif File.exists? input
- ios = File.open(input, mode='r')
+ ios = File.open(input, 'r')
begin
ios = Zlib::GzipReader.new(ios)
# Records are written to STDOUT (default) or file.
def self.open_output(output)
if output.nil?
- output = self.new(STDOUT, stdio = true)
+ output = self.new(STDOUT, true)
elsif not output.is_a? IO
- output = self.new(File.open(output, mode='w'))
+ output = self.new(File.open(output, 'w'))
end
output
option_parser.parse!(@argv)
if print_usage_full?
- print_usage_and_exit(full=true)
+ print_usage_and_exit(true)
elsif print_usage_short?
print_usage_and_exit
end
def set
time0 = Time.new.strftime("%Y-%m-%d %X")
- File.open(path, mode="w") do |fh|
+ File.open(path, "w") do |fh|
fh.puts [time0, ARGV.join(" ")].join(";")
end
end
def set_tmpdir(tmpdir_path)
status = ""
- File.open(path, mode="r") do |fh|
+ File.open(path, "r") do |fh|
status = fh.read.chomp
end
status = "#{status};#{tmpdir_path}\n"
- File.open(path, mode="w") do |fh|
+ File.open(path, "w") do |fh|
fh << status
end
end
# Extract the temporary directory path from the status file,
# and return this or nil if not found.
def get_tmpdir
- File.open(path, mode="r") do |fh|
+ File.open(path, "r") do |fh|
tmpdir_path = fh.read.chomp.split(";").last
return tmpdir_path if File.directory?(tmpdir_path)
end
script = File.basename($0)
stream = File.open(path)
- time0, args, tmp_dir = stream.first.split(";")
+ time0, args = stream.first.split(";")
stream.close
elap = time_diff(time0, time1)
command = [script, args].join(" ")
log_file = File.join(ENV["BP_LOG"], "biopieces.log")
- File.open(log_file, mode = "a") { |file| file.puts [time0, time1, elap, user, exit_status, command].join("\t") }
+ File.open(log_file, "a") { |file| file.puts [time0, time1, elap, user, exit_status, command].join("\t") }
end
# Delete status file.
script = File.basename($0)
pid = $$
path = File.join(ENV["BP_TMP"], [user, script, pid, "status"].join("."))
+
+ path
end
# Get the elapsed time from the difference between two time stamps.
begin
seq_name = @io.gets.chomp!
seq = @io.gets.chomp!
- qual_name = @io.gets.chomp!
+ @io.gets
qual = @io.gets.chomp!
entry = Seq.new
@vector = []
(0 ... @pattern.length + 1).each do |i|
- @vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i, deletions = 0, edit_distance = i)
+ @vector[i] = Score.new(0, 0, i, 0, i)
end
end
end
def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_one_returns_correctly
- m = @p.match("aCcg", pos = 0, edit_distance = 1)
+ m = @p.match("aCcg", 0, 1)
assert_equal(0, m.pos)
assert_equal("atcg", m.match)
assert_equal(3, m.matches)
end
def test_PatternMatcher_match_with_two_mismatch_and_edit_dist_one_returns_nil
- assert_nil(@p.match("aGcA", pos = 0, edit_distance = 1))
+ assert_nil(@p.match("aGcA", 0, 1))
end
def test_PatternMatcher_match_with_one_insertion_and_edit_dist_zero_returns_nil
end
def test_PatternMatcher_match_with_one_insertion_and_edit_dist_one_returns_correctly
- m = @p.match("atGcg", pos = 0, edit_distance = 1)
+ m = @p.match("atGcg", 0, 1)
assert_equal(0, m.pos)
assert_equal("atcg", m.match)
assert_equal(4, m.matches)
end
def test_PatternMatcher_match_with_two_insertions_and_edit_dist_one_returns_nil
- assert_nil(@p.match("atGcTg", pos = 0, edit_distance = 1))
+ assert_nil(@p.match("atGcTg", 0, 1))
end
def test_PatternMatcher_match_with_two_insertions_and_edit_dist_two_returns_correctly
- m = @p.match("atGcTg", pos = 0, edit_distance = 2)
+ m = @p.match("atGcTg", 0, 2)
assert_equal(0, m.pos)
assert_equal("atcg", m.match)
assert_equal(4, m.matches)
end
def test_PatternMatcher_match_with_one_deletion_and_edit_distance_one_returns_correctly
- m = @p.match("acg", pos = 0, edit_distance = 1)
+ m = @p.match("acg", 0, 1)
assert_equal(0, m.pos)
assert_equal("atcg", m.match)
assert_equal(3, m.matches)