+++ /dev/null
-#!/usr/bin/env perl
-
-# Copyright (C) 2007-2009 Martin A. Hansen.
-
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-# http://www.gnu.org/copyleft/gpl.html
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# Cluster sequences in the stream.
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-use warnings;
-use strict;
-use Data::Dumper;
-use Maasha::Common;
-use Maasha::Biopieces;
-use Maasha::Fasta;
-use Maasha::Seq;
-use Maasha::Filesys;
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-my ( $options, $in, $out, $tmp_dir, $tmp_file, $tmp_fh, $record, $entry, @args, $arg_str, @fields, $line );
-
-$options = Maasha::Biopieces::parse_options(
- [
- { long => 'identity', short => 'i', type => 'float', mandatory => 'no', default => "0.9", allowed => undef, disallowed => undef },
- { long => 'library', short => 'l', type => 'file!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
- { long => 'no_sort', short => 'n', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
- ]
-);
-
-$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
-$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
-
-$tmp_dir = Maasha::Biopieces::get_tmpdir();
-$tmp_file = "$tmp_dir/uclust.fasta";
-$tmp_fh = Maasha::Filesys::file_write_open( $tmp_file );
-
-while ( $record = Maasha::Biopieces::get_record( $in ) )
-{
- if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) ) {
- Maasha::Fasta::put_entry( $entry, $tmp_fh );
- }
-
- Maasha::Biopieces::put_record( $record, $out );
-}
-
-close $tmp_fh;
-
-uclust_sort( $tmp_file, $tmp_dir, $options->{ 'verbose' } ) if not $options->{ 'no_sort' };
-
-push @args, "--input $tmp_file";
-push @args, "--id $options->{ 'identity' }";
-push @args, "--tmpdir $tmp_dir";
-push @args, "--uc $tmp_file.out";
-push @args, "--lib $options->{ 'library' }" if $options->{ 'library' };
-push @args, "--libonly" if $options->{ 'library' };
-push @args, "--quiet" if not $options->{ 'verbose' };
-push @args, "> /dev/null 2>&1" if not $options->{ 'verbose' };
-
-$arg_str = join " ", @args;
-
-Maasha::Common::run( "uclust", $arg_str );
-
-$tmp_fh = Maasha::Filesys::file_read_open( "$tmp_file.out" );
-
-while ( $line = <$tmp_fh> )
-{
- next if $line =~ /^#/;
-
- chomp $line;
-
- @fields = split "\t", $line;
-
- $record = {
- REC_TYPE => 'UCLUST',
- TYPE => $fields[ 0 ],
- CLUSTER => $fields[ 1 ],
- SIZE => $fields[ 2 ],
- IDENT => $fields[ 3 ],
- STRAND => $fields[ 4 ],
- Q_BEG => $fields[ 5 ],
- S_BEG => $fields[ 6 ],
- CIGAR => $fields[ 7 ],
- SEQ_NAME => $fields[ 8 ],
- };
-
- Maasha::Biopieces::put_record( $record, $out );
-}
-
-close $tmp_fh;
-
-Maasha::Biopieces::close_stream( $in );
-Maasha::Biopieces::close_stream( $out );
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-sub uclust_sort
-{
- # Martin A. Hansen, January 2010.
-
- # Sorts a FASTA file according to sequence length
- # - longest first - using uclust.
-
- my ( $file, # file to sort
- $tmp_dir, # temporary directory for sorting
- $verbose, # verbose flag - OPTIONAL
- ) = @_;
-
- # Returns nothing.
-
- my ( @args, $arg_str );
-
- push @args, "--mergesort $file";
- push @args, "--output $file.sort";
- push @args, "--tmpdir $tmp_dir";
- push @args, "--quiet" if not $verbose;
- push @args, "> /dev/null 2>&1" if not $verbose;
-
- $arg_str = join " ", @args;
-
- Maasha::Common::run( "uclust", $arg_str );
-
- rename "$file.sort", $file;
-}
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-BEGIN
-{
- Maasha::Biopieces::status_set();
-}
-
-
-END
-{
- Maasha::Biopieces::status_log();
-}
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-__END__
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each do |record|
if record.has_key? :SEQ
+ forward = false
+ reverse = false
seq = Seq.new_bp(record)
- pos = 0
- len = 0
- if forward = seq.patscan(options[:forward].to_s, pos, options[:mismatches], options[:insertions], options[:deletions])
- record[:FORWARD_POS] = forward.last.pos
- record[:FORWARD_LEN] = forward.last.length
- pos = forward.last.pos + forward.last.length
+ seq.patscan(options[:forward].to_s, 0, options[:mismatches], options[:insertions], options[:deletions]) do |match|
+ record[:FORWARD_POS] = match.pos
+ record[:FORWARD_LEN] = match.length
+ pos = match.pos + match.length
len = seq.length - pos
seq.subseq!(pos, len) if len > 0
+ forward = true
+ break
end
- if pos < seq.length && reverse = seq.patscan(options[:reverse].to_s, pos, options[:mismatches], options[:insertions], options[:deletions])
- record[:REVERSE_POS] = reverse.first.pos
- record[:REVERSE_LEN] = reverse.first.length
+ seq.patscan(options[:reverse].to_s, 0, options[:mismatches], options[:insertions], options[:deletions]) do |match|
+ record[:REVERSE_POS] = match.pos
+ record[:REVERSE_LEN] = match.length
pos = 0
- len = reverse.first.pos
+ len = match.pos
if len == 0
seq.seq = ""
seq.qual = "" if seq.qual
else
seq.subseq!(pos, len)
- end
+ end
+
+ reverse = true
+ break
end
if forward or reverse
execute
end
- # Method to execute database search.
- def usearch
- # usearch --query query.fasta --db db.fasta --uc results.uc --id 0.90 [--evalue E]
- @command << "usearch --query #{@infile} --db #{@options[:database]} --uc #{@outfile} --id #{@options[:identity]}"
- @command << "--evalue #{@options[:e_val]}" if @options.has_key? :e_val
-
- execute
- end
-
- # Method to execute clustering to database plus de novo if not matched.
- def usearch_uclust
- # usearch --cluster seqs_sorted.fasta --db db.fasta --uc results.uc --id 0.90
- @command << "usearch --cluster #{@infile} --db #{@options[:database]} --uc #{@outfile} --id #{@options[:identity]}"
-
- execute
- end
-
# Method to parse a Uclust .uc file and for each line of data
# yield a Biopiece record.
def each
if line !~ /^#/
fields = line.chomp.split("\t")
- record[:REC_TYPE] = "UCLUST"
+ next if fields[0] == 'C'
+
record[:TYPE] = fields[0]
record[:CLUSTER] = fields[1].to_i
- record[:SEQ_LEN] = fields[2].to_i
record[:IDENT] = fields[3].to_f
- record[:STRAND] = fields[4]
- record[:Q_BEG] = fields[5].to_i
- record[:S_BEG] = fields[6].to_i
- record[:S_END] = fields[6].to_i + fields[2].to_i
- record[:CIGAR] = fields[7]
record[:Q_ID] = fields[8]
- record[:S_ID] = fields[9]
yield record
end
end
end
-ok_methods = "uclust,usearch,usearch_uclust"
-
casts = []
casts << {:long=>'no_sort', :short=>'n', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'method', :short=>'m', :type=>'string', :mandatory=>true, :default=>"uclust", :allowed=>ok_methods, :disallowed=>nil}
-casts << {:long=>'database', :short=>'d', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
casts << {:long=>'comp', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>true, :default=>0.9, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'e_val', :short=>'e', :type=>'float', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
options = Biopieces.options_parse(ARGV, casts)
-# At high identities, around 96% and above, compressed indexes are often more sensitive, faster
-# and use less RAM. Compressed indexes are disabled by default, so I generally recommend that
-# you specify the --slots and --w options when clustering at high identities.
-
-tmpdir = Biopieces.mktmpdir
-infile = File.join(tmpdir, "in.fna")
-outfile = File.join(tmpdir, "out.uc")
+tmpdir = Biopieces.mktmpdir
+file_records = File.join(tmpdir, "data.stream")
+file_fasta = File.join(tmpdir, "in.fna")
+file_uclust = File.join(tmpdir, "out.uc")
-Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
- Fasta.open(infile, mode="w") do |fasta_io|
+Biopieces.open(options[:stream_in], file_records) do |input, output|
+ Fasta.open(file_fasta, mode="w") do |fasta_io|
input.each_record do |record|
output.puts record
end
end
end
+end
- uclust = Uclust.new(infile, outfile, options)
- uclust.sort unless options[:no_sort] or options[:method] == "usearch"
+uc = Uclust.new(file_fasta, file_uclust, options)
+uc.sort unless options[:no_sort]
+uc.cluster
- case options[:method].to_s
- when "uclust" then uclust.cluster
- when "usearch" then uclust.usearch
- when "usearch_uclust" then uclust.usearch_uclust
- end
+hash = {}
+
+uc.each do |record|
+ hash[record[:Q_ID].to_sym] = record.dup
+end
+
+Biopieces.open(file_records, options[:stream_out]) do |input, output|
+ input.each_record do |record|
+ if record.has_key? :SEQ_NAME and record.has_key? :SEQ
+ if hash.has_key? record[:SEQ_NAME].to_sym
+ uc = hash[record[:SEQ_NAME].to_sym]
+ record[:CLUSTER] = uc[:CLUSTER].to_i
+ record[:IDENT] = uc[:IDENT].to_i
+ record[:IDENT] = '*' if uc[:TYPE] == 'S'
+ end
+ end
- uclust.each do |record|
output.puts record
end
end
-#!/usr/bin/env perl
+#!/usr/bin/env ruby
-# Copyright (C) 2007-2010 Martin A. Hansen.
+# Copyright (C) 2007-2011 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# http://www.gnu.org/copyleft/gpl.html
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# USEARCH sequences in the stream against a specified database or genome.
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-use warnings;
-use strict;
-use Data::Dumper;
-use Maasha::Biopieces;
-use Maasha::Common;
-use Maasha::Seq;
-use Maasha::Fasta;
-
-
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+# This program is part of the Biopieces framework (www.biopieces.org).
-my ( $TYPE_HASH, $options, $in, $out, $tmp_dir, $tmp_in, $tmp_out, $q_type, $record, $entry, $fh_in, $fh_out );
-
-$TYPE_HASH = {
- 'L' => 'LibSeed',
- 'S' => 'NewSeed',
- 'H' => 'Hit',
- 'R' => 'Reject',
- 'D' => 'LibCluster',
- 'C' => 'NewCluster',
- 'N' => 'NoHit'
-};
-
-$options = Maasha::Biopieces::parse_options(
- [
- { long => 'database', short => 'd', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
- { long => 'genome', short => 'g', type => 'genome', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
- { long => 'id', short => 'i', type => 'float', mandatory => 'no', default => '0.90', allowed => undef, disallowed => undef },
- ]
-);
-
-Maasha::Common::error( qq(both --database and --genome specified) ) if $options->{ "genome" } and $options->{ "database" };
-Maasha::Common::error( qq(no --database or --genome specified) ) if not $options->{ "genome" } and not $options->{ "database" };
-
-$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
-$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
-
-$options->{ "database" } = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/fasta/$options->{ 'genome' }.fna" if $options->{ 'genome' };
-
-$tmp_dir = Maasha::Biopieces::get_tmpdir();
-$tmp_in = "$tmp_dir/usearch_query.seq";
-$tmp_out = "$tmp_dir/usearch.uc";
-
-$fh_out = Maasha::Filesys::file_write_open( $tmp_in );
-
-while ( $record = Maasha::Biopieces::get_record( $in ) )
-{
- if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) )
- {
- $q_type = Maasha::Seq::seq_guess_type( $record->{ 'SEQ' } ) if not $q_type;
-
- Maasha::Fasta::put_entry( $entry, $fh_out );
- }
-
- Maasha::Biopieces::put_record( $record, $out );
-}
-
-close $fh_out;
-
-
-if ( $options->{ 'verbose' } )
-{
- Maasha::Common::run(
- "uclust",
- join( " ",
- "--input $tmp_in",
- "--lib $options->{ 'database' }",
- "--libonly",
- "--uc $tmp_out",
- "--id $options->{ 'id' }",
- "--rev",
- ),
- 1
- );
-}
-else
-{
- Maasha::Common::run(
- "uclust",
- join( " ",
- "--input $tmp_in",
- "--lib $options->{ 'database' }",
- "--libonly",
- "--uc $tmp_out",
- "--id $options->{ 'id' }",
- "--rev",
- "> /dev/null 2>&1"
- ),
- 1
- );
-}
-
-unlink $tmp_in;
-
-$fh_in = Maasha::Filesys::file_read_open( $tmp_out );
-
-while ( $entry = get_tab_entry( $fh_in ) )
-{
- $record = uclust_tab2biopiece( $entry );
-
- Maasha::Biopieces::put_record( $record, $out ) if $record->{ 'TYPE' } eq 'Hit';
-}
-
-close $fh_out;
-
-unlink $tmp_out;
-
-Maasha::Biopieces::close_stream( $in );
-Maasha::Biopieces::close_stream( $out );
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-sub get_tab_entry
-{
- # Martin A. Hansen, May 2009.
-
- # Get the next tabular entry from a filehandle to a USEARCH file.
-
- my ( $fh, # filehandle
- ) = @_;
-
- # Returns a list
-
- my ( $line, @fields );
-
- while ( $line = <$fh> )
- {
- next if $line =~ /^#/;
-
- @fields = split /\s+/, $line;
-
- return wantarray ? @fields : \@fields;
- }
-
- return undef;
-}
-
-
-sub uclust_tab2biopiece
-{
- # Martin A. Hansen, May 2009.
-
- # Get the next USEARCH tabular entry and convert it to
- # a biopiece record that is returned.
-
- my ( $entry, # USEARCH tabular entry
- ) = @_;
-
- # Returns a hashref.
-
- my ( %record );
-
- $record{ "REC_TYPE" } = "USEARCH";
- $record{ "TYPE" } = $TYPE_HASH->{ $entry->[ 0 ] };
- $record{ "CLUSTER" } = $entry->[ 1 ];
- $record{ "ALIGN_LEN" } = $entry->[ 2 ];
- $record{ "ID" } = $entry->[ 3 ];
- $record{ "STRAND" } = $entry->[ 4 ];
- $record{ "Q_BEG" } = $entry->[ 5 ];
- $record{ "S_BEG" } = $entry->[ 6 ];
- $record{ "CIGAR" } = $entry->[ 7 ];
- $record{ "Q_ID" } = $entry->[ 8 ];
- $record{ "S_ID" } = $entry->[ 9 ];
-
- if ( $record{ 'TYPE' } eq 'Hit' )
- {
- $record{ "Q_END" } = $record{ "Q_BEG" } + $record{ "ALIGN_LEN" } - 1;
- $record{ "S_END" } = $record{ "S_BEG" } + $record{ "ALIGN_LEN" } - 1;
- }
-
- return wantarray ? %record : \%record;
-}
-
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+# Run Uclust on sequences in the stream.
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-BEGIN
-{
- Maasha::Biopieces::status_set();
-}
-
-
-END
-{
- Maasha::Biopieces::status_log();
-}
+require 'maasha/biopieces'
+require 'maasha/fasta'
+
+SORT_LIMIT = 2_000_000_000 # use mergesort for files biggern than 2Gb.
+
+class Uclust
+ include Enumerable
+
+ def initialize(infile, outfile, options)
+ @infile = infile
+ @outfile = outfile
+ @options = options
+ @command = []
+ end
+
+ # Method that calls Usearch sorting for sorting a FASTA file
+ # according to decending sequence length.
+ def sort
+ # usearch -sort seqs.fasta -output seqs.sorted.fasta
+ if File.size(@infile) < SORT_LIMIT
+ @command << "usearch --sort #{@infile} --output #{@infile}.sort"
+ else
+ @command << "usearch --mergesort #{@infile} --output #{@infile}.sort"
+ end
+
+ execute
+
+ File.rename "#{@infile}.sort", @infile
+ end
+
+ # Method to execute clustering de novo.
+ def cluster
+ @command << "usearch --cluster #{@infile} --uc #{@outfile} --id #{@options[:identity]}"
+
+ execute
+ end
+
+ # Method to execute database search.
+ def usearch
+ # usearch --query query.fasta --db db.fasta --uc results.uc --id 0.90 [--evalue E]
+ @command << "usearch --query #{@infile} --db #{@options[:database]} --uc #{@outfile} --id #{@options[:identity]}"
+ @command << "--evalue #{@options[:e_val]}" if @options.has_key? :e_val
+
+ execute
+ end
+
+ # Method to execute clustering to database plus de novo if not matched.
+ def usearch_uclust
+ # usearch --cluster seqs_sorted.fasta --db db.fasta --uc results.uc --id 0.90
+ @command << "usearch --cluster #{@infile} --db #{@options[:database]} --uc #{@outfile} --id #{@options[:identity]}"
+
+ execute
+ end
+
+ # Method to parse a Uclust .uc file and for each line of data
+ # yield a Biopiece record.
+ def each
+ record = {}
+
+ File.open(@outfile, mode="r") do |ios|
+ ios.each_line do |line|
+ if line !~ /^#/
+ fields = line.chomp.split("\t")
+
+ record[:REC_TYPE] = "UCLUST"
+ record[:TYPE] = fields[0]
+ record[:CLUSTER] = fields[1].to_i
+ record[:SEQ_LEN] = fields[2].to_i
+ record[:IDENT] = fields[3].to_f
+ record[:STRAND] = fields[4]
+ record[:Q_BEG] = fields[5].to_i
+ record[:S_BEG] = fields[6].to_i
+ record[:S_END] = fields[6].to_i + fields[2].to_i
+ record[:CIGAR] = fields[7]
+ record[:Q_ID] = fields[8]
+ record[:S_ID] = fields[9]
+
+ yield record
+ end
+ end
+ end
+
+ self # conventionally
+ end
+
+ private
+
+ # Method to execute a command using a system() call.
+ # The command is composed of bits from the @command variable.
+ def execute
+ @command.unshift "nice -n 19"
+ @command << "--rev" if @options[:comp]
+ @command << "> /dev/null 2>&1" unless @options[:verbose]
+ command = @command.join(" ")
+ system(command)
+ raise "Command failed: #{command}" unless $?.success?
+
+ @command = []
+ end
+end
+
+ok_methods = "uclust,usearch,usearch_uclust"
+
+casts = []
+casts << {:long=>'no_sort', :short=>'n', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'method', :short=>'m', :type=>'string', :mandatory=>true, :default=>"uclust", :allowed=>ok_methods, :disallowed=>nil}
+casts << {:long=>'database', :short=>'d', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'comp', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>true, :default=>0.9, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'e_val', :short=>'e', :type=>'float', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+
+options = Biopieces.options_parse(ARGV, casts)
+
+# At high identities, around 96% and above, compressed indexes are often more sensitive, faster
+# and use less RAM. Compressed indexes are disabled by default, so I generally recommend that
+# you specify the --slots and --w options when clustering at high identities.
+
+tmpdir = Biopieces.mktmpdir
+infile = File.join(tmpdir, "in.fna")
+outfile = File.join(tmpdir, "out.uc")
+
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+ Fasta.open(infile, mode="w") do |fasta_io|
+ input.each_record do |record|
+ output.puts record
+
+ if record.has_key? :SEQ_NAME and record.has_key? :SEQ
+ fasta_io.puts Seq.new_bp(record).to_fasta
+ end
+ end
+ end
+
+ uclust = Uclust.new(infile, outfile, options)
+ uclust.sort unless options[:no_sort] or options[:method] == "usearch"
+
+ case options[:method].to_s
+ when "uclust" then uclust.cluster
+ when "usearch" then uclust.usearch
+ when "usearch_uclust" then uclust.usearch_uclust
+ end
+
+ uclust.each do |record|
+ output.puts record
+ end
+end
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<