]> git.donarmstrong.com Git - biopieces.git/blobdiff - code_ruby/lib/maasha/usearch.rb
fixing usearch upgrade
[biopieces.git] / code_ruby / lib / maasha / usearch.rb
index 0bc72352d648c92d958a82aed390b3118518d4d2..63757911deebea8e62de71ea688e29311d5427b8 100644 (file)
@@ -43,7 +43,9 @@ class Usearch
   # according to decending sequence length.
   def sortbylength
     # usearch -sortbylength seqs.fasta -output seqs_sorted.fasta -minseqlength 64
-    @command << "usearch -sortbylength #{@infile} -output #{@infile}.sort"
+    @command << "usearch"
+    @command << "-sortbylength #{@infile}"
+    @command << "-output #{@infile}.sort"
 
                execute
 
@@ -54,7 +56,9 @@ class Usearch
   # according to cluster size.
   def sortbysize
     # usearch -sortbysize seqs.fasta -output seqs_sorted.fasta -minsize 4
-    @command << "usearch -sortbysize #{@infile} -output #{@infile}.sort"
+    @command << "usearch"
+    @command << "-sortbysize #{@infile}"
+    @command << "-output #{@infile}.sort"
 
                execute
 
@@ -64,7 +68,11 @@ class Usearch
   # Method to execute cluster_fast.
   def cluster_fast
     # usearch -cluster_fast query.fasta -id 0.9 -centroids nr.fasta -uc clusters.uc
-    @command << "usearch -cluster_fast #{@infile} -id #{@options[:identity]} -uc #{@outfile}"
+    @command << "usearch"
+    @command << "-cluster_fast #{@infile}"
+    @command << "-id #{@options[:identity]}"
+    @command << "-uc #{@outfile}"
+    @command << "-threads #{@options[:cpus]}"
 
     execute
   end
@@ -73,21 +81,49 @@ class Usearch
   # NB sequences must be sorted with sortbylength or sortbysize.
   def cluster_smallmem
     # usearch -cluster_smallmem query.fasta -id 0.9 -centroids nr.fasta -uc clusters.uc
-    @command << "usearch -cluster_smallmem #{@infile} -id #{@options[:identity]} -uc #{@outfile}"
+    @command << "usearch"
+    @command << "-cluster_smallmem #{@infile}"
+    @command << "-id #{@options[:identity]}"
+    @command << "-uc #{@outfile}"
     @command << "-strand both" if @options[:comp]
 
     execute
   end
 
-  # Method to execute database search.
-  def usearch
-    @command << "usearch --query #{@infile} --db #{@options[:database]} --userout #{@outfile}"
-    @command << "--userfields target+tloz+thiz+query+bits+strand"
-    @command << "--id #{@options[:identity]}"  if @options[:identity]
-    @command << "--evalue #{@options[:e_val]}" if @options[:e_val]
-    @command << "--rev"
+  # Method to execute database local search.
+  def usearch_local
+    # usearch -usearch_local query.fasta -db proteins.udb -id 0.8 -alnout results.aln
+    # usearch -usearch_local query.fasta -db ESTs.fasta -id 0.9 -evalue 1e-6 -blast6out results.m8 -strand plus -maxaccepts 8 -maxrejects 256
+    
+    @command << "usearch"
+    @command << "-usearch_local #{@infile}"
+    @command << "-db #{@options[:database]}"
+    @command << "-blast6out #{@outfile}"
+    @command << "-strand both"
+    @command << "-id #{@options[:identity]}"  if @options[:identity]
+    @command << "-evalue #{@options[:e_val]}" if @options[:e_val]
+    @command << "-maxaccepts #{@options[:maxaccepts]}"
+    @command << "-threads #{@options[:cpus]}"
 
-               execute
+    execute
+  end
+
+  # Method to execute database global search.
+  def usearch_global
+    # usearch -usearch_global query.fasta -db proteins.udb -id 0.8 -alnout results.aln
+    # usearch -usearch_global query.fasta -db ESTs.fasta -id 0.9 -blast6out results.m8 -strand plus -maxaccepts 8 -maxrejects 256
+
+    @command << "usearch"
+    @command << "-usearch_global #{@infile}"
+    @command << "-db #{@options[:database]}"
+    @command << "-blast6out #{@outfile}"
+    @command << "-strand both"
+    @command << "-id #{@options[:identity]}"
+    @command << "-evalue #{@options[:e_val]}" if @options[:e_val]
+    @command << "-maxaccepts #{@options[:maxaccepts]}"
+    @command << "-threads #{@options[:cpus]}"
+
+    execute
   end
 
   # Method to execute uchime chimera detection.
@@ -147,17 +183,24 @@ class Usearch
     record = {}
 
     File.open(@outfile, "r") do |ios|
-      ios.gets   # skip comment line
       ios.each_line do |line|
         fields = line.chomp.split("\t")
-
-        record[:REC_TYPE] = "USEARCH"
-        record[:S_ID]     = fields[0]
-        record[:S_BEG]    = fields[1].to_i
-        record[:S_END]    = fields[2].to_i
-        record[:Q_ID]     = fields[3]
-        record[:SCORE]    = fields[4].to_f
-        record[:STRAND]   = fields[5]
+        record[:REC_TYPE]   = "USEARCH"
+        record[:Q_ID]       = fields[0]
+        record[:S_ID]       = fields[1]
+        record[:IDENT]      = fields[2].to_f
+        record[:ALIGN_LEN]  = fields[3].to_i
+        record[:MISMATCHES] = fields[4].to_i
+        record[:GAPS]       = fields[5].to_i
+        record[:Q_BEG]      = fields[6].to_i - 1
+        record[:Q_END]      = fields[7].to_i - 1
+        record[:S_BEG]      = fields[8].to_i - 1
+        record[:S_END]      = fields[9].to_i - 1
+        record[:E_VAL]      = fields[10] == '*' ? '*' : fields[10].to_f
+        record[:SCORE]      = fields[11] == '*' ? '*' : fields[11].to_f
+        record[:STRAND]     = record[:S_BEG].to_i < record[:S_END].to_i ? '+' : '-'
+
+        record[:S_BEG], record[:S_END] = record[:S_END], record[:S_BEG] if record[:STRAND] == '-'
 
         yield record
       end