]> git.donarmstrong.com Git - biopieces.git/commitdiff
added preservation of indels and case to align.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 12 Jan 2012 12:42:45 +0000 (12:42 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 12 Jan 2012 12:42:45 +0000 (12:42 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1725 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/align.rb

index 13fb96fd4e66f72ba4e557b2e08857aa3938cb98..12cb5e011d5453c0131ae68c9e114b93c86fed77 100755 (executable)
@@ -91,25 +91,34 @@ class Align
   # Class method to align sequences in a list of Seq objects and
   # return these as a new list of Seq objects.
   def self.muscle(entries)
-    cmd     = "muscle"
-    aligned = []
+    has_qual = false
+    result   = []
+    index    = {}
+
+    Open3.popen3("muscle") do |stdin, stdout, stderr|
+      entries.each do |entry|
+        index[entry.seq_name] = entry
+
+        stdin.puts entry.to_fasta
+      end
 
-    Open3.popen3(cmd) do |stdin, stdout, stderr|
-      entries.each { |entry| stdin.puts entry.to_fasta }
       stdin.close
 
-      fa = Fasta.new(stdout)
+      aligned_entries = Fasta.new(stdout)
 
-      fa.each do |align|
-        if block_given?
-          yield align
-        else
-          aligned << align
+      aligned_entries.each do |fa_entry|
+        fq_entry = index[fa_entry.seq_name]
+
+        fa_entry.seq.scan(/-+/) do |m|
+          fq_entry.seq  = fq_entry.seq[0 ... $`.length]  + ('-' * m.length) + fq_entry.seq[$`.length .. -1]
+          fq_entry.qual = fq_entry.qual[0 ... $`.length] + ('@' * m.length) + fq_entry.qual[$`.length .. -1]
         end
+
+        result << fq_entry
       end
     end
 
-    return aligned unless block_given?
+    result
   end
 
   # Class method to create a consensus sequence from a list of Seq objects and