]> git.donarmstrong.com Git - biopieces.git/commitdiff
added ustar and each_alignment methods to usearch.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Fri, 3 Feb 2012 07:10:13 +0000 (07:10 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Fri, 3 Feb 2012 07:10:13 +0000 (07:10 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1740 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/usearch.rb

index 9f09751bc2900069a0025416c8abcbcf5fc4cc75..1618f5023eda51fccf86dfe64e7937162fe51e51 100644 (file)
@@ -107,7 +107,7 @@ class Usearch
   def each_cluster
     record = {}
 
-    File.open(@outfile, mode="r") do |ios|
+    File.open(@outfile, "r") do |ios|
       ios.each_line do |line|
         if line !~ /^#/
           fields = line.chomp.split("\t")
@@ -132,7 +132,7 @@ class Usearch
   def each_hit
     record = {}
 
-    File.open(@outfile, mode="r") do |ios|
+    File.open(@outfile, "r") do |ios|
       ios.gets   # skip comment line
       ios.each_line do |line|
         fields = line.chomp.split("\t")
@@ -152,8 +152,49 @@ class Usearch
     self # conventionally
   end
 
+  # Method to parse a FASTA file with Ustar alignments and for each alignment
+  # yield an Align object.
+  def each_alignment
+    old_cluster = 0
+    entries     = []
+
+    Fasta.open(@outfile, "r") do |ios|
+      ios.each do |entry|
+        entry.seq.tr!('.', '-')   # Replace . with - in Ustar alignments.
+        cluster, identity, name = entry.seq_name.split('|')
+        cluster = cluster.to_i
+
+        if cluster == old_cluster
+          entries << entry
+        else
+          fix_alignment(entries)
+
+          yield Align.new(entries)
+
+          old_cluster = cluster
+          entries     = []
+          entries << entry
+        end
+      end
+    end
+  end
+
   private
 
+  # Method that fixed Ustar bug resulting in alignments with uneven 
+  # sequence length.
+  def fix_alignment(entries)
+    if entries.size > 1
+      min, max = entries.minmax { |a, b| a.length <=> b.length } 
+
+      if min.length != max.length
+        entries.each do |entry|
+          entry.seq << '-' * (max.length - entry.length)
+        end
+      end
+    end
+  end
+
        # Method to execute a command using a system() call.
        # The command is composed of bits from the @command variable.
        def execute