X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fusearch.rb;h=0bc72352d648c92d958a82aed390b3118518d4d2;hb=2949257922cc90a28af23386a72dbad8157e4fa8;hp=837585010f41fda3d18ea91292bdef452ba3076d;hpb=7029104c208f9013c0e8804737cc4f5da8bf7524;p=biopieces.git diff --git a/code_ruby/lib/maasha/usearch.rb b/code_ruby/lib/maasha/usearch.rb index 8375850..0bc7235 100644 --- a/code_ruby/lib/maasha/usearch.rb +++ b/code_ruby/lib/maasha/usearch.rb @@ -27,8 +27,6 @@ require 'maasha/fasta' # Error class for all exceptions to do with Usearch. class UsearchError < StandardError; end -SORT_LIMIT = 2_000_000_000 # use mergesort for files biggern than 2Gb. - class Usearch include Enumerable @@ -43,13 +41,9 @@ class Usearch # Method that calls Usearch sorting for sorting a FASTA file # according to decending sequence length. - def sort_length - # usearch -sort seqs.fasta -output seqs.sorted.fasta - if File.size(@infile) < SORT_LIMIT - @command << "usearch --sort #{@infile} --output #{@infile}.sort" - else - @command << "usearch --mergesort #{@infile} --output #{@infile}.sort" - end + def sortbylength + # usearch -sortbylength seqs.fasta -output seqs_sorted.fasta -minseqlength 64 + @command << "usearch -sortbylength #{@infile} -output #{@infile}.sort" execute @@ -58,20 +52,31 @@ class Usearch # Method that calls Usearch sorting for sorting a FASTA file # according to cluster size. - def sort_size - @command << "usearch --sortsize #{@infile} --output #{@infile}.sort" + def sortbysize + # usearch -sortbysize seqs.fasta -output seqs_sorted.fasta -minsize 4 + @command << "usearch -sortbysize #{@infile} -output #{@infile}.sort" execute File.rename "#{@infile}.sort", @infile end - # Method to execute clustering de novo. - def cluster - @command << "usearch --cluster #{@infile} --uc #{@outfile} --id #{@options[:identity]}" - @command << "--rev" if @options[:comp] + # Method to execute cluster_fast. + def cluster_fast + # usearch -cluster_fast query.fasta -id 0.9 -centroids nr.fasta -uc clusters.uc + @command << "usearch -cluster_fast #{@infile} -id #{@options[:identity]} -uc #{@outfile}" - execute + execute + end + + # Method to execute cluster_smallmem. + # NB sequences must be sorted with sortbylength or sortbysize. + def cluster_smallmem + # usearch -cluster_smallmem query.fasta -id 0.9 -centroids nr.fasta -uc clusters.uc + @command << "usearch -cluster_smallmem #{@infile} -id #{@options[:identity]} -uc #{@outfile}" + @command << "-strand both" if @options[:comp] + + execute end # Method to execute database search. @@ -188,6 +193,8 @@ class Usearch yield Align.new(entries) unless entries.empty? end + + self # conventionally end private @@ -209,7 +216,6 @@ class Usearch # Method to execute a command using a system() call. # The command is composed of bits from the @command variable. def execute - @command.unshift "nice -n 19" @command << "--quiet" unless @options[:verbose] command = @command.join(" ") $stderr.puts "Running command: #{command}" if @options[:verbose]