X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fseq%2Fassemble.rb;h=de14b64c1fe080fdcf65c34dd61c6e3dafc9e51f;hb=48bea5c28b89dc5586d0bddb338ccd6ba23aa1f9;hp=016322727cf15eb824f9e3f0518cca9b2ecb0241;hpb=af282a65d141826c15944437b07a0353dd14e79c;p=biopieces.git diff --git a/code_ruby/lib/maasha/seq/assemble.rb b/code_ruby/lib/maasha/seq/assemble.rb index 0163227..de14b64 100644 --- a/code_ruby/lib/maasha/seq/assemble.rb +++ b/code_ruby/lib/maasha/seq/assemble.rb @@ -22,8 +22,13 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +require 'inline' +require 'maasha/seq/ambiguity' + # Class containing methods to assemble two overlapping sequences into a single. class Assemble + extend Ambiguity + # Class method to assemble two Seq objects. def self.pair(entry1, entry2, options = {}) assemble = self.new(entry1, entry2, options) @@ -41,17 +46,14 @@ class Assemble @options[:overlap_max] = [@options[:overlap_max], entry1.length, entry2.length].min end - # Method to locate overlapping matche between two sequences. + # Method to locate overlapping matches between two sequences. def match overlap = @options[:overlap_max] - na_seq1 = NArray.to_na(@entry1.seq.downcase, "byte") - na_seq2 = NArray.to_na(@entry2.seq.downcase, "byte") - while overlap >= @options[:overlap_min] - hamming_dist = (na_seq1[-1 * overlap .. -1] ^ na_seq2[0 ... overlap]).count_true + mismatches_max = (overlap * @options[:mismatches_max] * 0.01).round - if hamming_dist <= (overlap * @options[:mismatches_max] * 0.01).round + if match_C(@entry1.seq, @entry2.seq, @entry1.length - overlap, 0, overlap, mismatches_max) entry_left = @entry1[0 ... @entry1.length - overlap] entry_right = @entry2[overlap .. -1] @@ -68,7 +70,7 @@ class Assemble entry_overlap.seq.upcase! entry_right.seq.downcase! entry_merged = entry_left + entry_overlap + entry_right - entry_merged.seq_name = @entry1.seq_name + ":overlap=#{overlap}:hamming=#{hamming_dist}" + entry_merged.seq_name = @entry1.seq_name + ":overlap=#{overlap}" return entry_merged end @@ -98,6 +100,59 @@ class Assemble merged end + + inline do |builder| + add_ambiguity_macro(builder) + + # C method for determining if two strings of equal length match + # given a maximum allowed mismatches and allowing for IUPAC + # ambiguity codes. Returns true if match, else false. + builder.c %{ + VALUE match_C( + VALUE _string1, // String 1 + VALUE _string2, // String 2 + VALUE _offset1, // Offset 1 + VALUE _offset2, // Offset 2 + VALUE _length, // String length + VALUE _max_mismatch // Maximum mismatches + ) + { + char *string1 = StringValuePtr(_string1); + char *string2 = StringValuePtr(_string2); + unsigned int offset1 = FIX2UINT(_offset1); + unsigned int offset2 = FIX2UINT(_offset2); + unsigned int length = FIX2UINT(_length); + unsigned int max_mismatch = FIX2UINT(_max_mismatch); + + unsigned int max_match = length - max_mismatch; + unsigned int match = 0; + unsigned int mismatch = 0; + unsigned int i = 0; + + for (i = 0; i < length; i++) + { + if (MATCH(string1[i + offset1], string2[i + offset2])) + { + match++; + + if (match >= max_match) { + return Qtrue; + } + } + else + { + mismatch++; + + if (mismatch > max_mismatch) { + return Qfalse; + } + } + } + + return Qfalse; + } + } + end end