From c15039f907e2ae22407256b4ca9405663f958efe Mon Sep 17 00:00:00 2001 From: martinahansen Date: Thu, 21 Jan 2010 09:11:56 +0000 Subject: [PATCH] added plot_lines git-svn-id: http://biopieces.googlecode.com/svn/trunk@839 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/cluster_seq | 119 +++++++++++++++++++++++---------------------- bp_bin/plot_lines | 119 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 180 insertions(+), 58 deletions(-) create mode 100755 bp_bin/plot_lines diff --git a/bp_bin/cluster_seq b/bp_bin/cluster_seq index 04fc8bb..53d8f2c 100755 --- a/bp_bin/cluster_seq +++ b/bp_bin/cluster_seq @@ -39,71 +39,77 @@ use Maasha::Filesys; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -my ( $options, $in, $out, $tmp_dir, $tmp_fh1, $tmp_fh2, $fh, $record, $entry, @args1, @args2, $arg_str1, $arg_str2, $clusters ); - -$tmp_dir = Maasha::Biopieces::get_tmpdir(); +my ( $options, $in, $out, $tmp_dir, $tmp_file, $tmp_fh, $record, $entry, @args, $arg_str, @fields, $line ); $options = Maasha::Biopieces::parse_options( [ - { long => 'identity', short => 'i', type => 'float', mandatory => 'no', default => "0.9", allowed => undef, disallowed => undef }, - { long => 'word_size', short => 'w', type => 'uint', mandatory => 'no', default => 7, allowed => undef, disallowed => 0 }, - { long => 'fast_clust', short => 'f', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - - { long => 'tmp_dir', short => 't', type => 'dir!', mandatory => 'no', default => $tmp_dir, allowed => undef, disallowed => undef }, + { long => 'identity', short => 'i', type => 'float', mandatory => 'no', default => "0.9", allowed => undef, disallowed => undef }, + { long => 'library', short => 'l', type => 'file!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'no_sort', short => 'n', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, ] ); $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); -$tmp_fh1 = Maasha::Filesys::file_write_open( "$tmp_dir/cluster.fasta" ); -$tmp_fh2 = Maasha::Filesys::file_write_open( "$tmp_dir/cluster.stream" ); +$tmp_dir = Maasha::Biopieces::get_tmpdir(); +$tmp_file = "$tmp_dir/uclust.fasta"; +$tmp_fh = Maasha::Filesys::file_write_open( $tmp_file ); while ( $record = Maasha::Biopieces::get_record( $in ) ) { - if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) ) - { - Maasha::Fasta::put_entry( $entry, $tmp_fh1 ); + if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) ) { + Maasha::Fasta::put_entry( $entry, $tmp_fh ); } - Maasha::Biopieces::put_record( $record, $tmp_fh2 ); + Maasha::Biopieces::put_record( $record, $out ); } -close $tmp_fh1; -close $tmp_fh2; - -push @args1, "--sort $tmp_dir/cluster.fasta"; -push @args1, "--output $tmp_dir/cluster.fasta.sort"; -push @args1, "--tmpdir $options->{ 'tmp_dir' }"; -push @args1, "--quiet" if not $options->{ 'verbose' }; -push @args1, "> /dev/null 2>&1" if not $options->{ 'verbose' }; +close $tmp_fh; -push @args2, "--input $tmp_dir/cluster.fasta.sort"; -push @args2, "--id $options->{ 'identity' }"; -push @args2, "--tmpdir $options->{ 'tmp_dir' }"; -push @args2, "--uc $tmp_dir/cluster.uc"; -push @args2, "--quiet" if not $options->{ 'verbose' }; -push @args2, "> /dev/null 2>&1" if not $options->{ 'verbose' }; +uclust_sort( $tmp_file, $tmp_dir, $options->{ 'verbose' } ) if not $options->{ 'no_sort' }; -$arg_str1 = join " ", @args1; -$arg_str2 = join " ", @args2; +push @args, "--input $tmp_file"; +push @args, "--id $options->{ 'identity' }"; +push @args, "--tmpdir $tmp_dir"; +push @args, "--uc $tmp_file.out"; +push @args, "--lib $options->{ 'library' }" if $options->{ 'library' }; +push @args, "--libonly" if $options->{ 'library' }; +push @args, "--quiet" if not $options->{ 'verbose' }; +push @args, "> /dev/null 2>&1" if not $options->{ 'verbose' }; -Maasha::Common::run( "uclust", $arg_str1 ); -Maasha::Common::run( "uclust", $arg_str2 ); +$arg_str = join " ", @args; -$clusters = parse_clusters( "$tmp_dir/cluster.uc" ); +Maasha::Common::run( "uclust", $arg_str ); -$tmp_fh2 = Maasha::Filesys::file_read_open( "$tmp_dir/cluster.stream" ); +$tmp_fh = Maasha::Filesys::file_read_open( "$tmp_file.out" ); -while ( $record = Maasha::Biopieces::get_record( $tmp_fh2 ) ) +while ( $line = <$tmp_fh> ) { - if ( exists $clusters->{ $record->{ 'SEQ_NAME' } } ) { - $record->{ 'CLUSTER' } = $clusters->{ $record->{ 'SEQ_NAME' } }; - } + next if $line =~ /^#/; + + chomp $line; + + @fields = split "\t", $line; + + $record = { + REC_TYPE => 'UCLUST', + TYPE => $fields[ 0 ], + CLUSTER => $fields[ 1 ], + SIZE => $fields[ 2 ], + IDENT => $fields[ 3 ], + STRAND => $fields[ 4 ], + Q_BEG => $fields[ 5 ], + S_BEG => $fields[ 6 ], + CIGAR => $fields[ 7 ], + SEQ_NAME => $fields[ 8 ], + }; Maasha::Biopieces::put_record( $record, $out ); } +close $tmp_fh; + Maasha::Biopieces::close_stream( $in ); Maasha::Biopieces::close_stream( $out ); @@ -111,36 +117,33 @@ Maasha::Biopieces::close_stream( $out ); # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -sub parse_clusters +sub uclust_sort { # Martin A. Hansen, January 2010. - # Parses a uclust uc cluster file and returns a hash with - # sequence name as key and cluster number as value. + # Sorts a FASTA file according to sequence length + # - longest first - using uclust. - my ( $file, # cluster file + my ( $file, # file to sort + $tmp_dir, # temporary directory for sorting + $verbose, # verbose flag - OPTIONAL ) = @_; - # Returns a hash. - - my ( $fh, $line, %clusters, $seq_name, $cluster ); - - $fh = Maasha::Filesys::file_read_open( $file ); - - while ( $line = <$fh> ) - { - next if $line =~ /^#/; - - chomp $line; + # Returns nothing. + + my ( @args, $arg_str ); - ( undef, $cluster, undef, undef, undef, undef, undef, undef, $seq_name ) = split "\t", $line; + push @args, "--mergesort $file"; + push @args, "--output $file.sort"; + push @args, "--tmpdir $tmp_dir"; + push @args, "--quiet" if not $verbose; + push @args, "> /dev/null 2>&1" if not $verbose; - $clusters{ $seq_name } = $cluster; - } + $arg_str = join " ", @args; - close $fh; + Maasha::Common::run( "uclust", $arg_str ); - return wantarray ? %clusters : \%clusters; + rename "$file.sort", $file; } diff --git a/bp_bin/plot_lines b/bp_bin/plot_lines new file mode 100755 index 0000000..4b9c041 --- /dev/null +++ b/bp_bin/plot_lines @@ -0,0 +1,119 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Plot one or more lines. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +use warnings; +use strict; +use Data::Dumper; +use Maasha::Common; +use Maasha::Biopieces; +use Maasha::Plot; +use Maasha::Matrix; +use Maasha::Filesys; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $options, $in, $out, $default, $terminals, $record, $AoA, $fh, $plot, $tmp_dir, $key, @list ); + +$default = "Lines"; +$terminals = "dumb,x11,aqua,post,svg"; + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'data_out', short => 'o', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'keys', short => 'k', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'list', short => 'l', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'terminal', short => 't', type => 'string', mandatory => 'no', default => 'dumb', allowed => $terminals, disallowed => undef }, + { long => 'title', short => 'T', type => 'string', mandatory => 'no', default => $default, allowed => undef, disallowed => undef }, + { long => 'xlabel', short => 'X', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'ylabel', short => 'Y', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'logscale_y', short => 'L', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + ] +); + +Maasha::Common::error( qq(neither 'keys' or 'list' specified - use one or the other) ) if not defined $options->{ 'keys' } and not defined $options->{ 'list' }; +Maasha::Common::error( qq(both 'keys' and 'list' specified - use only one or the other) ) if defined $options->{ 'keys' } and defined $options->{ 'list' }; + +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); + +while ( $record = Maasha::Biopieces::get_record( $in ) ) +{ + if ( defined $options->{ 'list' } and defined $record->{ $options->{ 'list' } } ) + { + push @{ $AoA }, [ split ";", $record->{ $options->{ 'list' } } ]; + } + elsif ( defined $options->{ 'keys' } ) + { + undef @list; + + map { push @list, $record->{ $_ } if defined $record->{ $_ } } @{ $options->{ 'keys' } }; + + push @{ $AoA }, [ @list ] if scalar @list > 0; + } + + Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" }; +} + +$AoA = Maasha::Matrix::matrix_flip( $AoA ) if $options->{ 'list' }; # convert row to column + +$tmp_dir = Maasha::Biopieces::get_tmpdir(); + +$plot = Maasha::Plot::lineplot_simple( $AoA, $options, $tmp_dir ); + +$fh = Maasha::Biopieces::write_stream( $options->{ "data_out" } ); + +print $fh "$_\n" foreach @{ $plot }; + +close $fh; + +Maasha::Biopieces::close_stream( $in ); +Maasha::Biopieces::close_stream( $out ); + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + Maasha::Biopieces::status_set(); +} + + +END +{ + Maasha::Biopieces::status_log(); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ -- 2.39.5