@outfile = outfile
@options = options
@command = []
+
+ raise UsearchError, %{Empty input file -> "#{@infile}"} if File.size(@infile) == 0
end
# Method that calls Usearch sorting for sorting a FASTA file
# Method to execute clustering de novo.
def cluster
@command << "usearch --cluster #{@infile} --uc #{@outfile} --id #{@options[:identity]}"
+ @command << "--rev" if @options[:comp]
execute
end
def usearch
@command << "usearch --query #{@infile} --db #{@options[:database]} --userout #{@outfile}"
@command << "--userfields target+tloz+thiz+query+bits+strand"
- @command << "--id #{@options[:identity]}" if @options.has_key? :identity
- @command << "--evalue #{@options[:e_val]}" if @options.has_key? :e_val
+ @command << "--id #{@options[:identity]}" if @options[:identity]
+ @command << "--evalue #{@options[:e_val]}" if @options[:e_val]
@command << "--rev"
execute
end
+ # Method to execute uchime chimera detection.
+ def uchime
+ @command << "usearch --uchime #{@infile} --db #{@options[:database]} --uchimeout #{@outfile}"
+
+ execute
+ end
+
+ # Method to execute ustar alignment.
+ def ustar
+ command = %Q{grep "^[SH]" #{@outfile} > #{@outfile}.sub}
+ system(command)
+ raise "Command failed: #{command}" unless $?.success?
+
+ File.rename "#{@outfile}.sub", @outfile
+
+ @command << "usearch --uc2fastax #{@outfile} --input #{@infile} --output #{@infile}.sub"
+
+ execute
+
+ @command << "usearch --staralign #{@infile}.sub --output #{@outfile}"
+
+ execute
+
+ File.delete "#{@infile}.sub"
+ end
+
# Method to parse a Uclust .uc file and for each line of data
# yield a Biopiece record.
def each_cluster
record = {}
- File.open(@outfile, mode="r") do |ios|
+ File.open(@outfile, "r") do |ios|
ios.each_line do |line|
if line !~ /^#/
fields = line.chomp.split("\t")
def each_hit
record = {}
- File.open(@outfile, mode="r") do |ios|
+ File.open(@outfile, "r") do |ios|
ios.gets # skip comment line
ios.each_line do |line|
fields = line.chomp.split("\t")
self # conventionally
end
+ # Method to parse a FASTA file with Ustar alignments and for each alignment
+ # yield an Align object.
+ def each_alignment
+ old_cluster = 0
+ entries = []
+
+ Fasta.open(@outfile, "r") do |ios|
+ ios.each do |entry|
+ entry.seq.tr!('.', '-') # Replace . with - in Ustar alignments.
+ cluster, identity, name = entry.seq_name.split('|')
+ cluster = cluster.to_i
+
+ if cluster == old_cluster
+ entries << entry
+ else
+ fix_alignment(entries)
+
+ yield Align.new(entries)
+
+ old_cluster = cluster
+ entries = []
+ entries << entry
+ end
+ end
+
+ yield Align.new(entries) unless entries.empty?
+ end
+ end
+
private
+ # Method that fixed Ustar bug resulting in alignments with uneven
+ # sequence length.
+ def fix_alignment(entries)
+ if entries.size > 1
+ min, max = entries.minmax { |a, b| a.length <=> b.length }
+
+ if min.length != max.length
+ entries.each do |entry|
+ entry.seq << '-' * (max.length - entry.length)
+ end
+ end
+ end
+ end
+
# Method to execute a command using a system() call.
# The command is composed of bits from the @command variable.
def execute
@command.unshift "nice -n 19"
- @command << "--rev" if @options[:comp]
- @command << "> /dev/null 2>&1" unless @options[:verbose]
+ @command << "--quiet" unless @options[:verbose]
command = @command.join(" ")
+ $stderr.puts "Running command: #{command}" if @options[:verbose]
system(command)
raise "Command failed: #{command}" unless $?.success?