File.rename "#{@infile}.sort", @infile
end
+ # Method to execute clustering de novo.
+ def cluster
+ @command << "usearch --cluster #{@infile} --uc #{@outfile} --id #{@options[:identity]}"
+
+ execute
+ end
+
# Method to execute database search.
def usearch
# usearch --query query.fasta --db db.fasta --uc results.uc --id 0.90 [--evalue E]
execute
end
- # Method to execute clustering de novo.
- def cluster
- @command << "usearch --cluster #{@infile} --uc #{@outfile} --id #{@options[:identity]}"
-
- execute
- end
-
# Method to execute clustering to database plus de novo if not matched.
def usearch_uclust
# usearch --cluster seqs_sorted.fasta --db db.fasta --uc results.uc --id 0.90
end
end
-ok_methods = "usearch,uclust,usearch_uclust"
+ok_methods = "uclust,usearch,usearch_uclust"
casts = []
casts << {:long=>'no_sort', :short=>'n', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
options = Biopieces.options_parse(ARGV, casts)
+# At high identities, around 96% and above, compressed indexes are often more sensitive, faster
+# and use less RAM. Compressed indexes are disabled by default, so I generally recommend that
+# you specify the --slots and --w options when clustering at high identities.
+
tmpdir = Biopieces.mktmpdir
infile = File.join(tmpdir, "in.fna")
outfile = File.join(tmpdir, "out.uc")
end
uclust = Uclust.new(infile, outfile, options)
- uclust.sort unless options[:no_sort]
+ uclust.sort unless options[:no_sort] or options[:method] == "usearch"
case options[:method].to_s
- when "usearch" then uclust.usearch
when "uclust" then uclust.cluster
+ when "usearch" then uclust.usearch
when "usearch_uclust" then uclust.usearch_uclust
- else raise "Unknown method: #{options[:method]}"
end
uclust.each do |record|