]> git.donarmstrong.com Git - biopieces.git/commitdiff
added comments to code
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 13 Mar 2013 10:30:54 +0000 (10:30 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 13 Mar 2013 10:30:54 +0000 (10:30 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@2142 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/seq/assemble.rb

index 7745b5bed7141bd1ad4d500c71b9ac585b2ed0b2..dffbf11ee43f25b1fcf684d72c86d7a95ead62f4 100644 (file)
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
+# Class containing methods to assemble two overlapping sequences into a single.
 class Assemble
+  # Class method to assemble two Seq objects.
   def self.pair(entry1, entry2, options = {})
     assemble = self.new(entry1, entry2, options)
     assemble.match
   end
 
+  # Method to initialize an Assembly object.
   def initialize(entry1, entry2, options)
     @entry1  = entry1
     @entry2  = entry2
@@ -38,6 +41,7 @@ class Assemble
     @options[:overlap_max]      = [@options[:overlap_max], entry1.length, entry2.length].min
   end
 
+  # Method to locate overlapping matche between two sequences.
   def match
     overlap = @options[:overlap_max]
 
@@ -47,7 +51,7 @@ class Assemble
     while overlap >= @options[:overlap_min]
       hamming_dist = (na_seq1[-1 * overlap .. -1] ^ na_seq2[0 ... overlap]).count_true
 
-      if hamming_dist <= percent2real(overlap, @options[:mismatches_max])
+      if hamming_dist <= (overlap * @options[:mismatches_max] * 0.01).round
         entry_left  = @entry1[0 ... @entry1.length - overlap]
         entry_right = @entry2[overlap .. -1]
 
@@ -70,10 +74,9 @@ class Assemble
     end
   end
 
-  def percent2real(length, percent)
-    (length * percent * 0.01).round
-  end
-
+  # Method to merge sequence and quality scores in an overlap.
+  # The residue with the highest score at mismatch positions is selected.
+  # The quality scores of the overlap are the mean of the two sequences.
   def merge_overlap(entry_overlap1, entry_overlap2)
     na_seq = NArray.byte(entry_overlap1.length, 2)
     na_seq[true, 0] = NArray.to_na(entry_overlap1.seq.downcase, "byte")