]> git.donarmstrong.com Git - biopieces.git/blobdiff - code_ruby/lib/maasha/usearch.rb
finished refactoring s/has_key?/[]/
[biopieces.git] / code_ruby / lib / maasha / usearch.rb
index b0c8c2f8ce69f63234b87e9da7d8f1b71ba3acad..837585010f41fda3d18ea91292bdef452ba3076d 100644 (file)
@@ -37,6 +37,8 @@ class Usearch
     @outfile = outfile
     @options = options
     @command = []
+
+    raise UsearchError, %{Empty input file -> "#{@infile}"} if File.size(@infile) == 0
   end
 
   # Method that calls Usearch sorting for sorting a FASTA file
@@ -57,7 +59,6 @@ class Usearch
   # Method that calls Usearch sorting for sorting a FASTA file
   # according to cluster size.
   def sort_size
-    # usearch -sortsize seqs.fasta -output seqs.sorted.fasta
     @command << "usearch --sortsize #{@infile} --output #{@infile}.sort"
 
                execute
@@ -68,6 +69,7 @@ class Usearch
        # Method to execute clustering de novo.
   def cluster
     @command << "usearch --cluster #{@infile} --uc #{@outfile} --id #{@options[:identity]}"
+    @command << "--rev" if @options[:comp]
 
                execute
   end
@@ -76,19 +78,45 @@ class Usearch
   def usearch
     @command << "usearch --query #{@infile} --db #{@options[:database]} --userout #{@outfile}"
     @command << "--userfields target+tloz+thiz+query+bits+strand"
-    @command << "--id #{@options[:identity]}"  if @options.has_key? :identity
-    @command << "--evalue #{@options[:e_val]}" if @options.has_key? :e_val
+    @command << "--id #{@options[:identity]}"  if @options[:identity]
+    @command << "--evalue #{@options[:e_val]}" if @options[:e_val]
     @command << "--rev"
 
                execute
   end
 
+  # Method to execute uchime chimera detection.
+  def uchime
+    @command << "usearch --uchime #{@infile} --db #{@options[:database]} --uchimeout #{@outfile}"
+
+    execute
+  end
+
+  # Method to execute ustar alignment.
+  def ustar
+    command = %Q{grep "^[SH]" #{@outfile} > #{@outfile}.sub}
+    system(command)
+    raise "Command failed: #{command}" unless $?.success?
+
+    File.rename "#{@outfile}.sub", @outfile
+
+    @command << "usearch --uc2fastax #{@outfile} --input #{@infile} --output #{@infile}.sub"
+
+    execute
+
+    @command << "usearch --staralign #{@infile}.sub --output #{@outfile}"
+
+    execute
+
+    File.delete "#{@infile}.sub"
+  end
+
        # Method to parse a Uclust .uc file and for each line of data
        # yield a Biopiece record.
   def each_cluster
     record = {}
 
-    File.open(@outfile, mode="r") do |ios|
+    File.open(@outfile, "r") do |ios|
       ios.each_line do |line|
         if line !~ /^#/
           fields = line.chomp.split("\t")
@@ -113,7 +141,7 @@ class Usearch
   def each_hit
     record = {}
 
-    File.open(@outfile, mode="r") do |ios|
+    File.open(@outfile, "r") do |ios|
       ios.gets   # skip comment line
       ios.each_line do |line|
         fields = line.chomp.split("\t")
@@ -133,15 +161,58 @@ class Usearch
     self # conventionally
   end
 
+  # Method to parse a FASTA file with Ustar alignments and for each alignment
+  # yield an Align object.
+  def each_alignment
+    old_cluster = 0
+    entries     = []
+
+    Fasta.open(@outfile, "r") do |ios|
+      ios.each do |entry|
+        entry.seq.tr!('.', '-')   # Replace . with - in Ustar alignments.
+        cluster, identity, name = entry.seq_name.split('|')
+        cluster = cluster.to_i
+
+        if cluster == old_cluster
+          entries << entry
+        else
+          fix_alignment(entries)
+
+          yield Align.new(entries)
+
+          old_cluster = cluster
+          entries     = []
+          entries << entry
+        end
+      end
+
+      yield Align.new(entries) unless entries.empty?
+    end
+  end
+
   private
 
+  # Method that fixed Ustar bug resulting in alignments with uneven 
+  # sequence length.
+  def fix_alignment(entries)
+    if entries.size > 1
+      min, max = entries.minmax { |a, b| a.length <=> b.length } 
+
+      if min.length != max.length
+        entries.each do |entry|
+          entry.seq << '-' * (max.length - entry.length)
+        end
+      end
+    end
+  end
+
        # Method to execute a command using a system() call.
        # The command is composed of bits from the @command variable.
        def execute
                @command.unshift "nice -n 19"
-    @command << "--rev" if @options[:comp]
-               @command << "> /dev/null 2>&1" unless @options[:verbose]
+               @command << "--quiet" unless @options[:verbose]
                command = @command.join(" ")
+    $stderr.puts "Running command: #{command}" if @options[:verbose]
     system(command)
     raise "Command failed: #{command}" unless $?.success?