--- /dev/null
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2010 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+#
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+require 'biopieces'
+require 'fasta'
+
+class Uclust
+ include Enumerable
+
+ def initialize(infile, outfile)
+ @infile = infile
+ @outfile = outfile
+ end
+
+ # Method that calls Uclusts sorting for sorting a FASTA file
+ # according to decending sequence length.
+ def sort(options)
+ command = "nice -n 19"
+ command << " uclust --sort #{@infile} --output #{@infile}.sort"
+ command << " > /dev/null 2>&1" unless options[:verbose]
+ system(command)
+ raise "Failed sorting file: #{@infile}" unless $?.success?
+
+ File.rename "#{@infile}.sort", @infile
+ end
+
+ def ublast(options)
+ # uclust --ublast query_seqs.fasta --db database.fasta --blast6out filename --evalue E
+ options[:e_val] = 10 if options[:e_val].is_nil?
+ command = "nice -n 19"
+ command << " uclust --ublast #{@infile} --db #{options[:database]} --uc #{@outfile} --evalue #{options[:e_val]}"
+ command << " > /dev/null 2>&1" unless options[:verbose]
+ system(command)
+ raise "Command failed: #{command}" unless $?.success?
+ end
+
+ def usearch(options)
+ # uclust --query query.fasta --db db.fasta --uc results.uc --id 0.90 [--evalue E]
+ command = "nice -n 19"
+ command << " uclust --query #{@infile} --db #{options[:database]} --uc #{@outfile} --id #{options[:identity]}"
+ command << " --evalue #{options[:e_val]}" if options.has_key? :e_val
+ command << " > /dev/null 2>&1" unless options[:verbose]
+ system(command)
+ raise "Command failed: #{command}" unless $?.success?
+ end
+
+ def uclust(options)
+ # uclust --input seqs_sorted.fasta --uc results.uc --id 0.90
+ command = "nice -n 19"
+ command << " uclust --input #{@infile} --uc #{@outfile} --id #{options[:identity]}"
+ command << " > /dev/null 2>&1" unless options[:verbose]
+ system(command)
+ raise "Command failed: #{command}" unless $?.success?
+ end
+
+ def usearch_uclust(options)
+ # uclust --input seqs_sorted.fasta --lib db.fasta --uc results.uc --id 0.90
+ command = "nice -n 19"
+ command << " uclust --input #{@infile} --lib #{options[:database]} --uc #{@outfile} --id #{options[:identity]}"
+ command << " --lib #{options[:database]}" if options.has_key? :database
+ command << " > /dev/null 2>&1" unless options[:verbose]
+ system(command)
+ raise "Command failed: #{command}" unless $?.success?
+ end
+
+ def each
+ record = {}
+
+ File.open(@outfile, mode="r") do |ios|
+ ios.each_line do |line|
+ if line !~ /^#/
+ fields = line.split("\t")
+
+ record[:REC_TYPE] = "UCLUST"
+ record[:TYPE] = fields[0]
+ record[:CLUSTER] = fields[1]
+ record[:SEQ_LEN] = fields[2]
+ record[:IDENT] = fields[3]
+ record[:STRAND] = fields[4]
+ record[:Q_BEG] = fields[5]
+ record[:S_BEG] = fields[6]
+ record[:CIGAR] = fields[7]
+ record[:Q_ID] = fields[8]
+ record[:S_ID] = fields[9]
+
+ yield record
+ end
+ end
+ end
+
+ self # conventionally
+ end
+end
+
+ok_methods = "ublast,usearch,uclust,usearch_uclust"
+
+casts = []
+casts << {:long=>'no_sort', :short=>'n', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'method', :short=>'m', :type=>'string', :mandatory=>true, :default=>"uclust", :allowed=>ok_methods, :disallowed=>nil}
+casts << {:long=>'database', :short=>'d', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>true, :default=>0.9, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'e_val', :short=>'e', :type=>'float', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+
+bp = Biopieces.new
+
+options = bp.parse(ARGV, casts)
+
+tmpdir = bp.mktmpdir
+infile = "#{tmpdir}/in.fna"
+outfile = "#{tmpdir}/out.uc"
+
+Fasta.open(infile, mode="w") do |fasta_io|
+ bp.each_record do |record|
+ bp.puts record
+ fasta_io.puts record
+ end
+end
+
+uclust = Uclust.new(infile, outfile)
+uclust.sort(options) unless options[:no_sort]
+
+case options[:method].to_s
+when "ublast" then uclust.ublast(options)
+when "usearch" then uclust.usearch(options)
+when "uclust" then uclust.uclust(options)
+when "usearch_uclust" then uclust.usearch_uclust(options)
+else raise "Unknown method: #{options[:method]}"
+end
+
+uclust.each do |record|
+ bp.puts record
+end
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__