X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fuclust_seq;h=b20a29c672673ce690870e7f1e31d54df24e30b0;hb=db8e4454c9c278ac274b9a997d95324218045d8d;hp=bd63df8e3935b64e99cbca600a335b6e2ccb9082;hpb=f40fdc8f55be97c491ad5d2caba71bab5a970248;p=biopieces.git diff --git a/bp_bin/uclust_seq b/bp_bin/uclust_seq index bd63df8..b20a29c 100755 --- a/bp_bin/uclust_seq +++ b/bp_bin/uclust_seq @@ -58,6 +58,13 @@ class Uclust File.rename "#{@infile}.sort", @infile end + # Method to execute clustering de novo. + def cluster + @command << "usearch --cluster #{@infile} --uc #{@outfile} --id #{@options[:identity]}" + + execute + end + # Method to execute database search. def usearch # usearch --query query.fasta --db db.fasta --uc results.uc --id 0.90 [--evalue E] @@ -67,13 +74,6 @@ class Uclust execute end - # Method to execute clustering de novo. - def cluster - @command << "usearch --cluster #{@infile} --uc #{@outfile} --id #{@options[:identity]}" - - execute - end - # Method to execute clustering to database plus de novo if not matched. def usearch_uclust # usearch --cluster seqs_sorted.fasta --db db.fasta --uc results.uc --id 0.90 @@ -129,7 +129,7 @@ class Uclust end end -ok_methods = "usearch,uclust,usearch_uclust" +ok_methods = "uclust,usearch,usearch_uclust" casts = [] casts << {:long=>'no_sort', :short=>'n', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} @@ -141,6 +141,10 @@ casts << {:long=>'e_val', :short=>'e', :type=>'float', :mandatory=>false, :d options = Biopieces.options_parse(ARGV, casts) +# At high identities, around 96% and above, compressed indexes are often more sensitive, faster +# and use less RAM. Compressed indexes are disabled by default, so I generally recommend that +# you specify the --slots and --w options when clustering at high identities. + tmpdir = Biopieces.mktmpdir infile = File.join(tmpdir, "in.fna") outfile = File.join(tmpdir, "out.uc") @@ -157,13 +161,12 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| end uclust = Uclust.new(infile, outfile, options) - uclust.sort unless options[:no_sort] + uclust.sort unless options[:no_sort] or options[:method] == "usearch" case options[:method].to_s - when "usearch" then uclust.usearch when "uclust" then uclust.cluster + when "usearch" then uclust.usearch when "usearch_uclust" then uclust.usearch_uclust - else raise "Unknown method: #{options[:method]}" end uclust.each do |record|