X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fusearch.rb;h=837585010f41fda3d18ea91292bdef452ba3076d;hb=c4b49c5ce1ed3b46ae37e6ebd73c21d67d6d4810;hp=75da3f7ac6f0dd58c88f7b4a05e035b1e48a75ab;hpb=9def10f098f8886f0a8a3358f6f18136cf727617;p=biopieces.git diff --git a/code_ruby/lib/maasha/usearch.rb b/code_ruby/lib/maasha/usearch.rb index 75da3f7..8375850 100644 --- a/code_ruby/lib/maasha/usearch.rb +++ b/code_ruby/lib/maasha/usearch.rb @@ -37,6 +37,8 @@ class Usearch @outfile = outfile @options = options @command = [] + + raise UsearchError, %{Empty input file -> "#{@infile}"} if File.size(@infile) == 0 end # Method that calls Usearch sorting for sorting a FASTA file @@ -67,6 +69,7 @@ class Usearch # Method to execute clustering de novo. def cluster @command << "usearch --cluster #{@infile} --uc #{@outfile} --id #{@options[:identity]}" + @command << "--rev" if @options[:comp] execute end @@ -75,19 +78,45 @@ class Usearch def usearch @command << "usearch --query #{@infile} --db #{@options[:database]} --userout #{@outfile}" @command << "--userfields target+tloz+thiz+query+bits+strand" - @command << "--id #{@options[:identity]}" if @options.has_key? :identity - @command << "--evalue #{@options[:e_val]}" if @options.has_key? :e_val + @command << "--id #{@options[:identity]}" if @options[:identity] + @command << "--evalue #{@options[:e_val]}" if @options[:e_val] @command << "--rev" execute end + # Method to execute uchime chimera detection. + def uchime + @command << "usearch --uchime #{@infile} --db #{@options[:database]} --uchimeout #{@outfile}" + + execute + end + + # Method to execute ustar alignment. + def ustar + command = %Q{grep "^[SH]" #{@outfile} > #{@outfile}.sub} + system(command) + raise "Command failed: #{command}" unless $?.success? + + File.rename "#{@outfile}.sub", @outfile + + @command << "usearch --uc2fastax #{@outfile} --input #{@infile} --output #{@infile}.sub" + + execute + + @command << "usearch --staralign #{@infile}.sub --output #{@outfile}" + + execute + + File.delete "#{@infile}.sub" + end + # Method to parse a Uclust .uc file and for each line of data # yield a Biopiece record. def each_cluster record = {} - File.open(@outfile, mode="r") do |ios| + File.open(@outfile, "r") do |ios| ios.each_line do |line| if line !~ /^#/ fields = line.chomp.split("\t") @@ -112,7 +141,7 @@ class Usearch def each_hit record = {} - File.open(@outfile, mode="r") do |ios| + File.open(@outfile, "r") do |ios| ios.gets # skip comment line ios.each_line do |line| fields = line.chomp.split("\t") @@ -132,15 +161,58 @@ class Usearch self # conventionally end + # Method to parse a FASTA file with Ustar alignments and for each alignment + # yield an Align object. + def each_alignment + old_cluster = 0 + entries = [] + + Fasta.open(@outfile, "r") do |ios| + ios.each do |entry| + entry.seq.tr!('.', '-') # Replace . with - in Ustar alignments. + cluster, identity, name = entry.seq_name.split('|') + cluster = cluster.to_i + + if cluster == old_cluster + entries << entry + else + fix_alignment(entries) + + yield Align.new(entries) + + old_cluster = cluster + entries = [] + entries << entry + end + end + + yield Align.new(entries) unless entries.empty? + end + end + private + # Method that fixed Ustar bug resulting in alignments with uneven + # sequence length. + def fix_alignment(entries) + if entries.size > 1 + min, max = entries.minmax { |a, b| a.length <=> b.length } + + if min.length != max.length + entries.each do |entry| + entry.seq << '-' * (max.length - entry.length) + end + end + end + end + # Method to execute a command using a system() call. # The command is composed of bits from the @command variable. def execute @command.unshift "nice -n 19" - @command << "--rev" if @options[:comp] - @command << "> /dev/null 2>&1" unless @options[:verbose] + @command << "--quiet" unless @options[:verbose] command = @command.join(" ") + $stderr.puts "Running command: #{command}" if @options[:verbose] system(command) raise "Command failed: #{command}" unless $?.success?