]> git.donarmstrong.com Git - biopieces.git/blobdiff - code_ruby/lib/maasha/usearch.rb
fixed indentation in biopieces.rb
[biopieces.git] / code_ruby / lib / maasha / usearch.rb
index 75da3f7ac6f0dd58c88f7b4a05e035b1e48a75ab..7aa14b43c879a0d019a3012468915833b604ea79 100644 (file)
@@ -67,6 +67,7 @@ class Usearch
        # Method to execute clustering de novo.
   def cluster
     @command << "usearch --cluster #{@infile} --uc #{@outfile} --id #{@options[:identity]}"
+    @command << "--rev" if @options[:comp]
 
                execute
   end
@@ -82,12 +83,31 @@ class Usearch
                execute
   end
 
+  # Method to execute ustar alignment.
+  def ustar
+    command = %Q{grep "^[SH]" #{@outfile} > #{@outfile}.sub}
+    system(command)
+    raise "Command failed: #{command}" unless $?.success?
+
+    File.rename "#{@outfile}.sub", @outfile
+
+    @command << "usearch --uc2fastax #{@outfile} --input #{@infile} --output #{@infile}.sub"
+
+    execute
+
+    @command << "usearch --staralign #{@infile}.sub --output #{@outfile}"
+
+    execute
+
+    File.delete "#{@infile}.sub"
+  end
+
        # Method to parse a Uclust .uc file and for each line of data
        # yield a Biopiece record.
   def each_cluster
     record = {}
 
-    File.open(@outfile, mode="r") do |ios|
+    File.open(@outfile, "r") do |ios|
       ios.each_line do |line|
         if line !~ /^#/
           fields = line.chomp.split("\t")
@@ -112,7 +132,7 @@ class Usearch
   def each_hit
     record = {}
 
-    File.open(@outfile, mode="r") do |ios|
+    File.open(@outfile, "r") do |ios|
       ios.gets   # skip comment line
       ios.each_line do |line|
         fields = line.chomp.split("\t")
@@ -132,15 +152,56 @@ class Usearch
     self # conventionally
   end
 
+  # Method to parse a FASTA file with Ustar alignments and for each alignment
+  # yield an Align object.
+  def each_alignment
+    old_cluster = 0
+    entries     = []
+
+    Fasta.open(@outfile, "r") do |ios|
+      ios.each do |entry|
+        entry.seq.tr!('.', '-')   # Replace . with - in Ustar alignments.
+        cluster, identity, name = entry.seq_name.split('|')
+        cluster = cluster.to_i
+
+        if cluster == old_cluster
+          entries << entry
+        else
+          fix_alignment(entries)
+
+          yield Align.new(entries)
+
+          old_cluster = cluster
+          entries     = []
+          entries << entry
+        end
+      end
+    end
+  end
+
   private
 
+  # Method that fixed Ustar bug resulting in alignments with uneven 
+  # sequence length.
+  def fix_alignment(entries)
+    if entries.size > 1
+      min, max = entries.minmax { |a, b| a.length <=> b.length } 
+
+      if min.length != max.length
+        entries.each do |entry|
+          entry.seq << '-' * (max.length - entry.length)
+        end
+      end
+    end
+  end
+
        # Method to execute a command using a system() call.
        # The command is composed of bits from the @command variable.
        def execute
                @command.unshift "nice -n 19"
-    @command << "--rev" if @options[:comp]
-               @command << "> /dev/null 2>&1" unless @options[:verbose]
+               @command << "--quiet" unless @options[:verbose]
                command = @command.join(" ")
+    $stderr.puts "Running command: #{command}" if @options[:verbose]
     system(command)
     raise "Command failed: #{command}" unless $?.success?