X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fseq%2Fassemble.rb;h=7745b5bed7141bd1ad4d500c71b9ac585b2ed0b2;hb=8baaff296bcb2be5030906d02a36e5a27cb9d064;hp=e59e178b73df8ba45689836db02c852d5632f637;hpb=db292cc5d7f63299f8f2234a6150d3f16ac324ae;p=biopieces.git diff --git a/code_ruby/lib/maasha/seq/assemble.rb b/code_ruby/lib/maasha/seq/assemble.rb index e59e178..7745b5b 100644 --- a/code_ruby/lib/maasha/seq/assemble.rb +++ b/code_ruby/lib/maasha/seq/assemble.rb @@ -48,22 +48,22 @@ class Assemble hamming_dist = (na_seq1[-1 * overlap .. -1] ^ na_seq2[0 ... overlap]).count_true if hamming_dist <= percent2real(overlap, @options[:mismatches_max]) - merged = @entry1 + @entry2[overlap .. -1] + entry_left = @entry1[0 ... @entry1.length - overlap] + entry_right = @entry2[overlap .. -1] if @entry1.qual and @entry2.qual - qual1 = @entry1.qual[@entry1.length - overlap .. -1] - qual2 = @entry2.qual[0 ... overlap] + entry_overlap1 = @entry1[-1 * overlap .. -1] + entry_overlap2 = @entry2[0 ... overlap] - na_qual1 = NArray.to_na(qual1, "byte") - na_qual2 = NArray.to_na(qual2, "byte") - - qual = ((na_qual1 + na_qual2) / 2).to_s - - merged.seq_name = @entry1.seq_name + ":overlap=#{overlap}:hamming=#{hamming_dist}" - merged.qual[@entry1.length - overlap ... @entry1.length] = qual + entry_overlap = merge_overlap(entry_overlap1, entry_overlap2) + else + entry_overlap = @entry1[-1 * overlap .. -1] end - return merged + entry_merged = entry_left + entry_overlap + entry_right + entry_merged.seq_name = @entry1.seq_name + ":overlap=#{overlap}:hamming=#{hamming_dist}" + + return entry_merged end overlap -= 1 @@ -71,7 +71,26 @@ class Assemble end def percent2real(length, percent) - (length * percent * 0.01).round + (length * percent * 0.01).round + end + + def merge_overlap(entry_overlap1, entry_overlap2) + na_seq = NArray.byte(entry_overlap1.length, 2) + na_seq[true, 0] = NArray.to_na(entry_overlap1.seq.downcase, "byte") + na_seq[true, 1] = NArray.to_na(entry_overlap2.seq.downcase, "byte") + + na_qual = NArray.byte(entry_overlap1.length, 2) + na_qual[true, 0] = NArray.to_na(entry_overlap1.qual, "byte") + na_qual[true, 1] = NArray.to_na(entry_overlap2.qual, "byte") + + mask_xor = na_seq[true, 0] ^ na_seq[true, 1] > 0 + mask_seq = ((na_qual * mask_xor).eq( (na_qual * mask_xor).max(1))) + + merged = Seq.new() + merged.seq = (na_seq * mask_seq).max(1).to_s + merged.qual = na_qual.mean(1).round.to_type("byte").to_s + + merged end end