3 # Copyright (C) 2007-2009 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
24 # Cluster sequences in the stream.
26 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
33 use Maasha::Biopieces;
39 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
42 my ( $options, $in, $out, $tmp_dir, $tmp_file, $tmp_fh, $record, $entry, @args, $arg_str, @fields, $line );
44 $options = Maasha::Biopieces::parse_options(
46 { long => 'identity', short => 'i', type => 'float', mandatory => 'no', default => "0.9", allowed => undef, disallowed => undef },
47 { long => 'library', short => 'l', type => 'file!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
48 { long => 'no_sort', short => 'n', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
52 $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
53 $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
55 $tmp_dir = Maasha::Biopieces::get_tmpdir();
56 $tmp_file = "$tmp_dir/uclust.fasta";
57 $tmp_fh = Maasha::Filesys::file_write_open( $tmp_file );
59 while ( $record = Maasha::Biopieces::get_record( $in ) )
61 if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) ) {
62 Maasha::Fasta::put_entry( $entry, $tmp_fh );
65 Maasha::Biopieces::put_record( $record, $out );
70 uclust_sort( $tmp_file, $tmp_dir, $options->{ 'verbose' } ) if not $options->{ 'no_sort' };
72 push @args, "--input $tmp_file";
73 push @args, "--id $options->{ 'identity' }";
74 push @args, "--tmpdir $tmp_dir";
75 push @args, "--uc $tmp_file.out";
76 push @args, "--lib $options->{ 'library' }" if $options->{ 'library' };
77 push @args, "--libonly" if $options->{ 'library' };
78 push @args, "--quiet" if not $options->{ 'verbose' };
79 push @args, "> /dev/null 2>&1" if not $options->{ 'verbose' };
81 $arg_str = join " ", @args;
83 Maasha::Common::run( "uclust", $arg_str );
85 $tmp_fh = Maasha::Filesys::file_read_open( "$tmp_file.out" );
87 while ( $line = <$tmp_fh> )
89 next if $line =~ /^#/;
93 @fields = split "\t", $line;
98 CLUSTER => $fields[ 1 ],
100 IDENT => $fields[ 3 ],
101 STRAND => $fields[ 4 ],
102 Q_BEG => $fields[ 5 ],
103 S_BEG => $fields[ 6 ],
104 CIGAR => $fields[ 7 ],
105 SEQ_NAME => $fields[ 8 ],
108 Maasha::Biopieces::put_record( $record, $out );
113 Maasha::Biopieces::close_stream( $in );
114 Maasha::Biopieces::close_stream( $out );
117 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
122 # Martin A. Hansen, January 2010.
124 # Sorts a FASTA file according to sequence length
125 # - longest first - using uclust.
127 my ( $file, # file to sort
128 $tmp_dir, # temporary directory for sorting
129 $verbose, # verbose flag - OPTIONAL
134 my ( @args, $arg_str );
136 push @args, "--mergesort $file";
137 push @args, "--output $file.sort";
138 push @args, "--tmpdir $tmp_dir";
139 push @args, "--quiet" if not $verbose;
140 push @args, "> /dev/null 2>&1" if not $verbose;
142 $arg_str = join " ", @args;
144 Maasha::Common::run( "uclust", $arg_str );
146 rename "$file.sort", $file;
150 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
155 Maasha::Biopieces::status_set();
161 Maasha::Biopieces::status_log();
165 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<