#!/usr/bin/env ruby # Copyright (C) 2007-2010 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # http://www.gnu.org/copyleft/gpl.html # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # This program is part of the Biopieces framework (www.biopieces.org). # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< require 'biopieces' require 'fasta' class Uclust include Enumerable def initialize(infile, outfile) @infile = infile @outfile = outfile end # Method that calls Uclusts sorting for sorting a FASTA file # according to decending sequence length. def sort(options) command = "nice -n 19" command << " uclust --sort #{@infile} --output #{@infile}.sort" command << " > /dev/null 2>&1" unless options[:verbose] system(command) raise "Failed sorting file: #{@infile}" unless $?.success? File.rename "#{@infile}.sort", @infile end def ublast(options) # uclust --ublast query_seqs.fasta --db database.fasta --blast6out filename --evalue E options[:e_val] = 10 if options[:e_val].is_nil? command = "nice -n 19" command << " uclust --ublast #{@infile} --db #{options[:database]} --uc #{@outfile} --evalue #{options[:e_val]}" command << " > /dev/null 2>&1" unless options[:verbose] system(command) raise "Command failed: #{command}" unless $?.success? end def usearch(options) # uclust --query query.fasta --db db.fasta --uc results.uc --id 0.90 [--evalue E] command = "nice -n 19" command << " uclust --query #{@infile} --db #{options[:database]} --uc #{@outfile} --id #{options[:identity]}" command << " --evalue #{options[:e_val]}" if options.has_key? :e_val command << " > /dev/null 2>&1" unless options[:verbose] system(command) raise "Command failed: #{command}" unless $?.success? end def uclust(options) # uclust --input seqs_sorted.fasta --uc results.uc --id 0.90 command = "nice -n 19" command << " uclust --input #{@infile} --uc #{@outfile} --id #{options[:identity]}" command << " > /dev/null 2>&1" unless options[:verbose] system(command) raise "Command failed: #{command}" unless $?.success? end def usearch_uclust(options) # uclust --input seqs_sorted.fasta --lib db.fasta --uc results.uc --id 0.90 command = "nice -n 19" command << " uclust --input #{@infile} --lib #{options[:database]} --uc #{@outfile} --id #{options[:identity]}" command << " --lib #{options[:database]}" if options.has_key? :database command << " > /dev/null 2>&1" unless options[:verbose] system(command) raise "Command failed: #{command}" unless $?.success? end def each record = {} File.open(@outfile, mode="r") do |ios| ios.each_line do |line| if line !~ /^#/ fields = line.split("\t") record[:REC_TYPE] = "UCLUST" record[:TYPE] = fields[0] record[:CLUSTER] = fields[1] record[:SEQ_LEN] = fields[2] record[:IDENT] = fields[3] record[:STRAND] = fields[4] record[:Q_BEG] = fields[5] record[:S_BEG] = fields[6] record[:CIGAR] = fields[7] record[:Q_ID] = fields[8] record[:S_ID] = fields[9] yield record end end end self # conventionally end end ok_methods = "ublast,usearch,uclust,usearch_uclust" casts = [] casts << {:long=>'no_sort', :short=>'n', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'method', :short=>'m', :type=>'string', :mandatory=>true, :default=>"uclust", :allowed=>ok_methods, :disallowed=>nil} casts << {:long=>'database', :short=>'d', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>true, :default=>0.9, :allowed=>nil, :disallowed=>nil} casts << {:long=>'e_val', :short=>'e', :type=>'float', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} bp = Biopieces.new options = bp.parse(ARGV, casts) tmpdir = bp.mktmpdir infile = "#{tmpdir}/in.fna" outfile = "#{tmpdir}/out.uc" Fasta.open(infile, mode="w") do |fasta_io| bp.each_record do |record| bp.puts record fasta_io.puts record end end uclust = Uclust.new(infile, outfile) uclust.sort(options) unless options[:no_sort] case options[:method].to_s when "ublast" then uclust.ublast(options) when "usearch" then uclust.usearch(options) when "uclust" then uclust.uclust(options) when "usearch_uclust" then uclust.usearch_uclust(options) else raise "Unknown method: #{options[:method]}" end uclust.each do |record| bp.puts record end # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< __END__