From: martinahansen Date: Tue, 19 Jan 2010 08:45:44 +0000 (+0000) Subject: changed uclust engine from CD-hit to uclust X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=b4b4214381fd2b6c1ee6c7d04d320444bc9a6286;p=biopieces.git changed uclust engine from CD-hit to uclust git-svn-id: http://biopieces.googlecode.com/svn/trunk@837 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/cluster_seq b/bp_bin/cluster_seq index 26037e1..04fc8bb 100755 --- a/bp_bin/cluster_seq +++ b/bp_bin/cluster_seq @@ -39,7 +39,7 @@ use Maasha::Filesys; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -my ( $options, $in, $out, $tmp_dir, $tmp_fh1, $tmp_fh2, $fh, $record, $entry, @args, $arg_str, $clusters ); +my ( $options, $in, $out, $tmp_dir, $tmp_fh1, $tmp_fh2, $fh, $record, $entry, @args1, @args2, $arg_str1, $arg_str2, $clusters ); $tmp_dir = Maasha::Biopieces::get_tmpdir(); @@ -72,16 +72,24 @@ while ( $record = Maasha::Biopieces::get_record( $in ) ) close $tmp_fh1; close $tmp_fh2; -push @args, "--ucluster $tmp_dir/cluster.fasta"; -push @args, "--id $options->{ 'identity' }"; -push @args, "--tmpdir $options->{ 'tmp_dir' }"; -push @args, "--output $tmp_dir/cluster.uc"; -push @args, "--quiet" if not $options->{ 'verbose' }; -push @args, "> /dev/null 2>&1" if not $options->{ 'verbose' }; +push @args1, "--sort $tmp_dir/cluster.fasta"; +push @args1, "--output $tmp_dir/cluster.fasta.sort"; +push @args1, "--tmpdir $options->{ 'tmp_dir' }"; +push @args1, "--quiet" if not $options->{ 'verbose' }; +push @args1, "> /dev/null 2>&1" if not $options->{ 'verbose' }; -$arg_str = join " ", @args; +push @args2, "--input $tmp_dir/cluster.fasta.sort"; +push @args2, "--id $options->{ 'identity' }"; +push @args2, "--tmpdir $options->{ 'tmp_dir' }"; +push @args2, "--uc $tmp_dir/cluster.uc"; +push @args2, "--quiet" if not $options->{ 'verbose' }; +push @args2, "> /dev/null 2>&1" if not $options->{ 'verbose' }; -Maasha::Common::run( "uclust", $arg_str ); +$arg_str1 = join " ", @args1; +$arg_str2 = join " ", @args2; + +Maasha::Common::run( "uclust", $arg_str1 ); +Maasha::Common::run( "uclust", $arg_str2 ); $clusters = parse_clusters( "$tmp_dir/cluster.uc" ); @@ -121,9 +129,11 @@ sub parse_clusters while ( $line = <$fh> ) { + next if $line =~ /^#/; + chomp $line; - ( $cluster, undef, undef, undef, $seq_name ) = split "\t", $line; + ( undef, $cluster, undef, undef, undef, undef, undef, undef, $seq_name ) = split "\t", $line; $clusters{ $seq_name } = $cluster; }