From a243213e6e37774b4363374706405070527e9826 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 23 Jun 2009 10:36:40 +0000 Subject: [PATCH] fixes and stuff git-svn-id: http://biopieces.googlecode.com/svn/trunk@541 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/biostat | 209 ++++++++++++++++++++++++++++++++++ bp_bin/blat_seq | 18 +-- bp_bin/read_psl | 11 +- bp_bin/swapcase_seq | 37 ++++++ bp_conf/bashrc | 8 +- code_perl/Maasha/Biopieces.pm | 41 ++----- code_perl/Maasha/Common.pm | 2 + code_ruby/Maasha/biopieces.rb | 2 +- 8 files changed, 278 insertions(+), 50 deletions(-) create mode 100755 bp_bin/biostat create mode 100755 bp_bin/swapcase_seq diff --git a/bp_bin/biostat b/bp_bin/biostat new file mode 100755 index 0000000..b751e95 --- /dev/null +++ b/bp_bin/biostat @@ -0,0 +1,209 @@ +#!/usr/bin/env perl -w + +# Copyright (C) 2007-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Monitor status running Biopieces. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +use strict; +use Maasha::Biopieces; +use Maasha::Common; +use Maasha::Filesys; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $options, $allowed, @files, $file, $user, $pid, $script, $info, $time0, $time1, $args, $du, @table ); + +$allowed = "USER,PID,%CPU,%MEM,TMP_DIR,START_TIME,RUN_TIME,COMMAND"; + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'time', short => 't', type => 'uint', mandatory => 'no', default => 5, allowed => undef, disallowed => 0 }, + { long => 'sort', short => 's', type => 'string', mandatory => 'no', default => '%CPU', allowed => $allowed, disallowed => undef }, + { long => 'user', short => 'u', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + ] +); + +while ( 1 ) +{ + system( "clear" ); + + undef @table; + + @files = Maasha::Filesys::ls_files( $ENV{ 'BP_TMP' } ); + @files = grep /\.status$/, @files; + + if ( $options->{ 'user' } ) { + @files = grep /$options->{ 'user' }/, @files; + } + + foreach $file ( @files ) + { + ( $user, $script, $pid ) = split /\./, $file; + + if ( $info = process_info_mac( $pid ) ) + { + ( $time0, $args, $du ) = get_status_info( $file ); + + $info->{ 'START_TIME' } = $time0; + $info->{ 'RUN_TIME' } = Maasha::Common::time_stamp_diff( $time0, Maasha::Common::time_stamp() ); + $info->{ 'TMP_DIR' } = $du || "N/A"; + $info->{ 'COMMAND' } = "$script $args"; + + push @table, $info; + } + else + { + print STDERR "Removing stale file: $file\n"; + unlink $file; + } + } + + if ( $options->{ 'sort' } =~ /^(USER|TMP_DIR|START_TIME|RUN_TIME|COMMAND)$/ ) { + @table = sort { $a->{ $options->{ 'sort' } } cmp $b->{ $options->{ 'sort' } } } @table; + } else { + @table = sort { $a->{ $options->{ 'sort' } } <=> $b->{ $options->{ 'sort' } } } @table; + } + + print join( "\t", 'USER', 'PID', '%CPU', '%MEM', 'TMP_DIR', 'START_TIME ', 'RUN_TIME', 'COMMAND' ), "\n"; + print join( "\t", $_->{ 'USER' }, $_->{ 'PID' }, $_->{ '%CPU' }, $_->{ '%MEM' }, $_->{ 'TMP_DIR' }, $_->{ 'START_TIME' }, $_->{ 'RUN_TIME' }, $_->{ 'COMMAND' } ), "\n" foreach @table; + + sleep $options->{ 'time' }; +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +sub process_info_mac +{ + # Martin A. Hansen, June 2009. + + # Get process info for a given process given a PID by + # calling ps aux on a Mac. + + # ps aux: + # USER PID %CPU %MEM VSZ RSS TT STAT STARTED TIME COMMAND + + my ( $pid, # process id + ) = @_; + + # Returns a hashref + + my ( @processes, $process, @fields, %info_hash, $hour, $min, $ampm ); + + @processes = `ps aux`; + + shift @processes; + + foreach $process ( @processes ) + { + @fields = split " ", $process; + + if ( $fields[ 1 ] == $pid ) + { + %info_hash = ( + "USER" => $fields[ 0 ], + "PID" => $fields[ 1 ], + "%CPU" => $fields[ 2 ], + "%MEM" => $fields[ 3 ], + "VSZ" => $fields[ 4 ], + "RSS" => $fields[ 5 ], + "TT" => $fields[ 6 ], + "STAT" => $fields[ 7 ], + "STARTED" => $fields[ 8 ], + "TIME" => $fields[ 9 ], + "COMMAND" => $fields[ 10 ], + ); + + if ( $info_hash{ 'STARTED' } =~ /^(\d+):(\d+)(AM|PM)$/ ) + { + $hour = $1; + $min = $2; + $ampm = $3; + + if ( $ampm eq "PM" ) { + $hour += 12; + } + + $info_hash{ 'STARTED' } = "$hour:$min"; + } + + return wantarray ? %info_hash : \%info_hash; + } + } + + return; +} + + +sub get_status_info +{ + my ( $file, # + ) = @_; + + my ( $fh, $line, $time, $args, $tmp_dir, $du ); + + $fh = Maasha::Filesys::file_read_open( $file ); + $line = <$fh>; + close $fh; + + chomp $line; + + ( $time, $args, $tmp_dir ) = split ";", $line; + + if ( $tmp_dir and -d $tmp_dir ) + { + $du = `du -sh $tmp_dir`; + + chomp $du; + + $du =~ s/\s.*//; + } + + return wantarray ? ( $time, $args, $du ) : [ $time, $args, $du ]; +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + Maasha::Biopieces::status_set(); +} + + +END +{ + Maasha::Biopieces::status_log(); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/bp_bin/blat_seq b/bp_bin/blat_seq index 342c5cd..4d49d5d 100755 --- a/bp_bin/blat_seq +++ b/bp_bin/blat_seq @@ -42,13 +42,15 @@ my ( $options, $in, $out, $record, $blat_args, $genome_file, $query_file, $fh_in $options = Maasha::Biopieces::parse_options( [ - { long => 'genome', short => 'g', type => 'genome', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, - { long => 'occ', short => 'c', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'tile_size', short => 't', type => 'uint', mandatory => 'no', default => 11, allowed => undef, disallowed => 0 }, - { long => 'step_size', short => 's', type => 'uint', mandatory => 'no', default => 11, allowed => undef, disallowed => 0 }, - { long => 'min_identity', short => 'm', type => 'uint', mandatory => 'no', default => 90, allowed => undef, disallowed => 0 }, - { long => 'min_score', short => 'M', type => 'uint', mandatory => 'no', default => 0, allowed => undef, disallowed => undef }, - { long => 'one_off', short => 'o', type => 'uint', mandatory => 'no', default => 0, allowed => undef, disallowed => undef }, + { long => 'genome', short => 'g', type => 'genome', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, + { long => 'fast_map', short => 'f', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'occ', short => 'c', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'intron_max', short => 'i', type => 'uint', mandatory => 'no', default => 750000, allowed => undef, disallowed => undef }, + { long => 'tile_size', short => 't', type => 'uint', mandatory => 'no', default => 11, allowed => undef, disallowed => 0 }, + { long => 'step_size', short => 's', type => 'uint', mandatory => 'no', default => 11, allowed => undef, disallowed => 0 }, + { long => 'min_identity', short => 'm', type => 'uint', mandatory => 'no', default => 90, allowed => undef, disallowed => 0 }, + { long => 'min_score', short => 'M', type => 'uint', mandatory => 'no', default => 0, allowed => undef, disallowed => undef }, + { long => 'one_off', short => 'o', type => 'uint', mandatory => 'no', default => 0, allowed => undef, disallowed => undef }, ] ); @@ -62,6 +64,8 @@ $blat_args .= " -oneOff=$options->{ 'one_off' }"; $blat_args .= " -minIdentity=$options->{ 'min_identity' }"; $blat_args .= " -minScore=$options->{ 'min_score' }"; $blat_args .= " -stepSize=$options->{ 'step_size' }"; +$blat_args .= " -maxIntron=$options->{ 'intron_max' }"; +$blat_args .= " -fastMap" if $options->{ 'fast_map' }; # $blat_args .= " -ooc=" . Maasha::Config::genome_blat_ooc( $options->{ "genome" }, 11 ) if $options->{ 'ooc' }; $tmp_dir = Maasha::Biopieces::get_tmpdir(); diff --git a/bp_bin/read_psl b/bp_bin/read_psl index 866b99b..ccf4f4a 100755 --- a/bp_bin/read_psl +++ b/bp_bin/read_psl @@ -57,13 +57,16 @@ if ( $options->{ 'data_in' } ) $num = 1; - while ( $record = Maasha::UCSC::PSL::psl_get_entry( $data_in ) ) + while ( $entry = Maasha::UCSC::PSL::psl_entry_get( $data_in ) ) { - Maasha::Biopieces::put_record( $record, $out ); + if ( $record = Maasha::UCSC::PSL::psl2biopiece( $entry ) ) + { + Maasha::Biopieces::put_record( $record, $out ); - last if $options->{ "num" } and $num == $options->{ "num" }; + last if $options->{ "num" } and $num == $options->{ "num" }; - $num++; + $num++; + } } close $data_in; diff --git a/bp_bin/swapcase_seq b/bp_bin/swapcase_seq new file mode 100755 index 0000000..06f6089 --- /dev/null +++ b/bp_bin/swapcase_seq @@ -0,0 +1,37 @@ +#!/usr/bin/env ruby + +require 'Maasha/biopieces' +require 'getoptlong' + +options = GetoptLong.new( + [ '--help', '-?', GetoptLong::NO_ARGUMENT ], + [ '--stream_in', '-I', GetoptLong::REQUIRED_ARGUMENT ], + [ '--stream_out', '-O', GetoptLong::REQUIRED_ARGUMENT ], + [ '--verbose', '-v', GetoptLong::NO_ARGUMENT ] +) + +if ARGV.length == 0 then + biopiece = $0.split( "/" ).last + exec "print_wiki --data_in #{ ENV[ 'BP_DIR' ] }/bp_usage/#{ biopiece }.wiki" +end + +options.each do | opt, arg | + puts "option: #{ opt } argument: #{ arg }" + + case opt + when '--help' + biopiece = $0.split( "/" ).last + exec "print_wiki --data_in #{ ENV[ 'BP_DIR' ] }/bp_usage/#{ biopiece }.wiki --help" + when '--stream_in' + puts "Stream in" + stream = File.open( arg ) + when '--stream_out' + puts "Stream out" + when '--verbose' + puts "blababnlbalbalbalbalab" + end +end + +bp_stream = BioPieces.new( "test.stream" ) + +puts bp_stream.record_get diff --git a/bp_conf/bashrc b/bp_conf/bashrc index a8a89b1..0a113ae 100644 --- a/bp_conf/bashrc +++ b/bp_conf/bashrc @@ -11,10 +11,10 @@ export BP_BIN="$BP_DIR/bp_bin" # Directory with biopiece executable ### The following directories hold the biopiece libraries, modules, gems, etc - one per programming language. -export BP_PERL="$BP_DIR/code_perl" # Direcotry with Perl code. -export BP_C="$BP_DIR/code_c" # Direcotry with C code. -export BP_PYTHON="$BP_DIR/code_python" # Direcotry with Pyton code. -export BP_RUBY="$BP_DIR/code_ruby" # Direcotry with Ruby code. +export BP_PERL="$BP_DIR/code_perl" # Direcotory with Perl code. +export BP_C="$BP_DIR/code_c" # Direcotory with C code. +export BP_PYTHON="$BP_DIR/code_python" # Direcotory with Pyton code. +export BP_RUBY="$BP_DIR/code_ruby" # Direcotory with Ruby code. ### Here we add the biopiece variable to the existing PATH. diff --git a/code_perl/Maasha/Biopieces.pm b/code_perl/Maasha/Biopieces.pm index 09218a6..867f71b 100644 --- a/code_perl/Maasha/Biopieces.pm +++ b/code_perl/Maasha/Biopieces.pm @@ -289,7 +289,12 @@ sub parse_options { # Martin A. Hansen, May 2009 - # + # Parses and checks options for Biopieces. + + # First the argument list is checked for duplicates and then + # options are parsed from ARGV after which it is checked if + # the Biopieces usage information should be printed. Finally, + # all options from ARGV are checked according to the argument list. my ( $arg_list, # data structure with argument description ) = @_; @@ -409,7 +414,7 @@ sub check_print_usage if ( exists $options->{ 'help' } ) { `print_wiki --data_in=$wiki --help`; - } elsif ( $script =~ /^(list_biopieces|list_genomes)$/ ) { + } elsif ( $script =~ /^(list_biopieces|list_genomes|biostat)$/ ) { return; } else { `print_wiki --data_in=$wiki`; @@ -805,38 +810,6 @@ sub clean_tmp } -sub run_time -{ - # Martin A. Hansen, May 2009. - - # Returns a precision timestamp for calculating - # run time. - - return Maasha::Common::get_time_hires(); -} - - -sub run_time_print -{ - # Martin A. Hansen, May 2009 - - # Print the run time to STDERR for the current script if - # the verbose switch is set in the option hash. - - my ( $t0, # run time begin - $t1, # run time end - $options, # options hash - ) = @_; - - # Returns nothing - - my $script = Maasha::Common::get_scriptname(); - - print STDERR "Program: $script" . ( " " x ( 25 - length( $script ) ) ) . sprintf( "Run time: %.4f\n", ( $t1 - $t0 ) ) if $options->{ 'verbose' }; - -} - - sub get_tmpdir { # Martin A. Hansen, April 2008. diff --git a/code_perl/Maasha/Common.pm b/code_perl/Maasha/Common.pm index bd60729..9f4170c 100644 --- a/code_perl/Maasha/Common.pm +++ b/code_perl/Maasha/Common.pm @@ -594,6 +594,7 @@ sub time_stamp_diff $min0 = $5; $sec0 = $6; + $sec0 += $day0 * 24 * 60 * 60; $sec0 += $hour0 * 60 * 60;; $sec0 += $min0 * 60; @@ -605,6 +606,7 @@ sub time_stamp_diff $min1 = $5; $sec1 = $6; + $sec1 += $day1 * 24 * 60 * 60; $sec1 += $hour1 * 60 * 60;; $sec1 += $min1 * 60; diff --git a/code_ruby/Maasha/biopieces.rb b/code_ruby/Maasha/biopieces.rb index 0289d50..61d9cd7 100755 --- a/code_ruby/Maasha/biopieces.rb +++ b/code_ruby/Maasha/biopieces.rb @@ -1,6 +1,6 @@ class BioPieces RECORD_DELIMITER = "\n---\n" - + class Record attr_reader :__hash__ -- 2.39.2