X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fseq%2Fassemble.rb;h=dffbf11ee43f25b1fcf684d72c86d7a95ead62f4;hb=ee14fa4dfd3f52f487a34e5c43ef6bbaacdcd1cb;hp=7745b5bed7141bd1ad4d500c71b9ac585b2ed0b2;hpb=256548158f770fa6b518a5cc86858655bdb910fe;p=biopieces.git diff --git a/code_ruby/lib/maasha/seq/assemble.rb b/code_ruby/lib/maasha/seq/assemble.rb index 7745b5b..dffbf11 100644 --- a/code_ruby/lib/maasha/seq/assemble.rb +++ b/code_ruby/lib/maasha/seq/assemble.rb @@ -22,12 +22,15 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# Class containing methods to assemble two overlapping sequences into a single. class Assemble + # Class method to assemble two Seq objects. def self.pair(entry1, entry2, options = {}) assemble = self.new(entry1, entry2, options) assemble.match end + # Method to initialize an Assembly object. def initialize(entry1, entry2, options) @entry1 = entry1 @entry2 = entry2 @@ -38,6 +41,7 @@ class Assemble @options[:overlap_max] = [@options[:overlap_max], entry1.length, entry2.length].min end + # Method to locate overlapping matche between two sequences. def match overlap = @options[:overlap_max] @@ -47,7 +51,7 @@ class Assemble while overlap >= @options[:overlap_min] hamming_dist = (na_seq1[-1 * overlap .. -1] ^ na_seq2[0 ... overlap]).count_true - if hamming_dist <= percent2real(overlap, @options[:mismatches_max]) + if hamming_dist <= (overlap * @options[:mismatches_max] * 0.01).round entry_left = @entry1[0 ... @entry1.length - overlap] entry_right = @entry2[overlap .. -1] @@ -70,10 +74,9 @@ class Assemble end end - def percent2real(length, percent) - (length * percent * 0.01).round - end - + # Method to merge sequence and quality scores in an overlap. + # The residue with the highest score at mismatch positions is selected. + # The quality scores of the overlap are the mean of the two sequences. def merge_overlap(entry_overlap1, entry_overlap2) na_seq = NArray.byte(entry_overlap1.length, 2) na_seq[true, 0] = NArray.to_na(entry_overlap1.seq.downcase, "byte")