X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_perl%2FMaasha%2FCommon.pm;h=079587ba4eb013f0b3d9f5179edb3efc4c204d24;hb=2f10201d21af91034ac31a45177de90d4ccef43b;hp=a6d63e10fbe6ee1edfc9060b398dfe4002fcdb4a;hpb=6a4043cd15c4d8f6c251e4a9c50b006f95cda2db;p=biopieces.git diff --git a/code_perl/Maasha/Common.pm b/code_perl/Maasha/Common.pm index a6d63e1..079587b 100644 --- a/code_perl/Maasha/Common.pm +++ b/code_perl/Maasha/Common.pm @@ -29,11 +29,13 @@ package Maasha::Common; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +use warnings; use strict; use Carp; use Data::Dumper; use Storable; use IO::File; +use Time::HiRes qw( gettimeofday ); use Maasha::Config; use Exporter; @@ -42,6 +44,107 @@ use vars qw( @ISA @EXPORT @EXPORT_OK ); @ISA = qw( Exporter ) ; +use Inline ( C => <<'END_C', DIRECTORY => $ENV{ "BP_TMP" } ); + +int index_m( char *str, char *substr, size_t str_len, size_t substr_len, size_t offset, size_t max_mismatch ) +{ + /* Martin A. Hansen & Selene Fernandez, August 2008 */ + + /* Locates a substring within a string starting from offset and allowing for max_mismatch mismatches. */ + /* The begin position of the substring is returned if found otherwise -1 is returned. */ + + int i = 0; + int j = 0; + + size_t max_match = substr_len - max_mismatch; + + i = offset; + + while ( i < str_len - ( max_match + max_mismatch ) + 1 ) + { + j = 0; + + while ( j < substr_len - ( max_match + max_mismatch ) + 1 ) + { + if ( match_m( str, substr, str_len, substr_len, i, j, max_match, max_mismatch ) != 0 ) { + return i; + } + + j++; + } + + i++; + } + + return -1; +} + + +int match_m( char *str, char *substr, size_t str_len, size_t substr_len, size_t str_offset, size_t substr_offset, size_t max_match, size_t max_mismatch ) +{ + /* Martin A. Hansen & Selene Fernandez, August 2008 */ + + /* Compares a string and substring starting at speficied string and substring offset */ + /* positions allowing for a specified number of mismatches. Returns 1 if there is a */ + /* match otherwise returns 0. */ + + size_t match = 0; + size_t mismatch = 0; + + while ( str_offset <= str_len && substr_offset <= substr_len ) + { + if ( str[ str_offset ] == substr[ substr_offset ] ) + { + match++; + + if ( match >= max_match ) { + return 1; + }; + } + else + { + mismatch++; + + if ( mismatch > max_mismatch ) { + return 0; + } + } + + str_offset++; + substr_offset++; + } + + return 0; +} + + +void str_analyze_C( const char *string ) +{ + /* Martin A. Hansen, July 2009 */ + + /* Scans a string incrementing the char count in an array. */ + + int count[ 256 ] = { 0 }; /* Integer array spanning the ASCII alphabet */ + int i; + + for ( i = 0; i < strlen( string ); i++ ) { + count[ ( int ) string[ i ] ]++; + } + + Inline_Stack_Vars; + Inline_Stack_Reset; + + for ( i = 0; i < 256; i++ ) { + Inline_Stack_Push( sv_2mortal( newSViv( count[ i ] ) ) ); + } + + Inline_Stack_Done; +} + + +END_C + + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @@ -62,9 +165,9 @@ sub error chomp $msg; - $script = &get_scriptname(); + $script = get_scriptname(); - $error = &Carp::longmess(); + $error = Carp::longmess(); @lines = split "\n", $error; @@ -77,7 +180,7 @@ sub error $file_max = length "File"; $line_max = length "Line"; - if ( $line =~ /^ at (.+) line (\d+)$/ ) + if ( $line =~ /^ at (.+) line (\d+)\.?$/ ) { $file = $1; $line_no = $2; @@ -100,6 +203,8 @@ sub error $file = $2; $line_no = $3; + $routine =~ s/\(.+\)$/ .../; + $routine_max = length $routine if length $routine > $routine_max; $file_max = length $file if length $file > $file_max; $line_max = length $line_no if length $line_no > $line_max; @@ -136,7 +241,7 @@ sub read_open { # Martin A. Hansen, January 2004. - # read opens a file and returns a filehandle. + # Read opens a file and returns a filehandle. my ( $path, # full path to file ) = @_; @@ -145,12 +250,12 @@ sub read_open my ( $fh, $type ); - $type = `file $path` if $path; + $type = `file $path` if -f $path; if ( $type =~ /gzip compressed/ ) { - $fh = new IO::File "zcat $path|" or &Maasha::Common::error( qq(Could not read-open file "$path": $!) ); + $fh = new IO::File "zcat $path|" or Maasha::Common::error( qq(Could not read-open file "$path": $!) ); } else { - $fh = new IO::File $path, "r" or &Maasha::Common::error( qq(Could not read-open file "$path": $!) ); + $fh = new IO::File $path, "r" or Maasha::Common::error( qq(Could not read-open file "$path": $!) ); } return $fh; @@ -172,9 +277,9 @@ sub write_open my ( $fh ); if ( $gzip ) { - $fh = new IO::File "|gzip -f>$path" or &Maasha::Common::error( qq(Could not write-open file "$path": $!) ); + $fh = new IO::File "|gzip -f>$path" or Maasha::Common::error( qq(Could not write-open file "$path": $!) ); } else { - $fh = new IO::File $path, "w" or &Maasha::Common::error( qq(Could not write-open file "$path": $!) ); + $fh = new IO::File $path, "w" or Maasha::Common::error( qq(Could not write-open file "$path": $!) ); } return $fh; @@ -194,7 +299,7 @@ sub append_open my ( $fh ); - $fh = new IO::File $path, "a" or &Maasha::Common::error( qq(Could not append-open file "$path": $!) ); + $fh = new IO::File $path, "a" or Maasha::Common::error( qq(Could not append-open file "$path": $!) ); return $fh; } @@ -208,215 +313,12 @@ sub pipe_open my ( $fh ); - $fh = new IO::File "-" or &Maasha::Common::error( qq(Could not open pipe: $!) ); - - return $fh; -} - - -sub read_stdin -{ - # Martin A. Hansen, July 2007. - - # Returns a filehandle to STDIN - - my ( $fh ); - - $fh = new IO::File "<&STDIN" or &Maasha::Common::error( qq(Could not read from STDIN: $!) ); + $fh = new IO::File "-" or Maasha::Common::error( qq(Could not open pipe: $!) ); return $fh; } -sub write_stdout -{ - # Martin A. Hansen, July 2007. - - # Returns a filehandle to STDOUT - - my ( $fh ); - - $fh = new IO::File ">&STDOUT" or &Maasha::Common::error( qq(Could not write to STDOUT: $!) ); - - return $fh; -} - - -sub file_store -{ - # Martin A. Hansen, December 2004. - - # writes a data structure to file. - - my ( $path, # full path to file - $data, # data structure - ) = @_; - - &Storable::store( $data, $path ) or &Maasha::Common::error( qq(Could not write-open file "$path": $!) ); -} - - -sub file_retrieve -{ - # Martin A. Hansen, December 2004. - - # retrieves hash data structure - # (this routines needs to test if its a hash, array or else) - - my ( $path, # full path to data file - ) = @_; - - my ( $data ); - - $data = &Storable::retrieve( $path ) or &Maasha::Common::error( qq(Could not read-open file "$path": $!) ); - - return wantarray ? %{ $data } : $data; -} - - -sub dir_create -{ - # Martin A. Hansen, July 2007. - - # Creates a directory. - - my ( $path, # full path to dir - ) = @_; - - # Returns nothing. - - if ( -d $path ) { - &Maasha::Common::error( qq(Directory already exists "$path": $!) ); - } else { - mkdir $path or &Maasha::Common::error( qq(Could not create directory "$path": $!) ); - } -} - - -sub dir_create_if_not_exists -{ - # Martin A. Hansen, May 2008. - - # Creates a directory if it does not already exists. - - my ( $path, # full path to dir - ) = @_; - - # Returns nothing. - - if ( not -d $path ) { - mkdir $path or &Maasha::Common::error( qq(Could not create directory "$path": $!) ); - } -} - - -sub dir_remove -{ - # Martin A. Hansen, April 2008. - - # Removes a directory recursively. - - my ( $path, # directory - ) = @_; - - &Maasha::Common::run( "rm", "-rf $path" ) if -d $path; -} - - -sub ls_dirs -{ - # Martin A. Hansen, June 2007. - - # returns all dirs in a given directory. - - my ( $path, # full path to directory - ) = @_; - - # returns a list of filenames. - - my ( $dh, @dirs ); - - $dh = &open_dir( $path ); - - @dirs = &read_dir( $dh ); - @dirs = grep { -d "$path/$_" } @dirs; - - map { $_ = "$path/$_" } @dirs; - - close $dh; - - return wantarray ? @dirs : \@dirs; -} - - -sub ls_files -{ - # Martin A. Hansen, June 2007. - - # returns all files in a given directory. - - my ( $path, # full path to directory - ) = @_; - - # returns a list of filenames. - - my ( $dh, @files ); - - $dh = &open_dir( $path ); - - @files = &read_dir( $dh ); - @files = grep { -f "$path/$_" } @files; - - map { $_ = "$path/$_" } @files; - - close $dh; - - return wantarray ? @files : \@files; -} - - -sub open_dir -{ - # Martin A. Hansen, June 2007. - - # open a directory and returns a directory handle - - use IO::Dir; - - my ( $path, # full path to directory - ) = @_; - - # returns object - - my $dh; - - $dh = IO::Dir->new( $path ) or &Maasha::Common::error( qq(Could not open dir "$path": $!) ); - - return $dh; -} - - -sub read_dir -{ - # Martin A. Hansen, June 2007. - - # read all files and directories from a directory. - - my ( $dh, # directory handle object - ) = @_; - - # returns list - - my ( $elem, @elems ); - - while ( defined( $elem = $dh->read ) ) { - push @elems, $elem; - } - - return wantarray ? @elems : \@elems; -} - - sub read_args { # Martin A. Hansen, December 2006 @@ -446,7 +348,7 @@ sub read_args } elsif ( -f $arg ) { push @files, $arg; } else { - &Maasha::Common::error( qq(Bad syntax in argument->"$arg") ); + Maasha::Common::error( qq(Bad syntax in argument->"$arg") ); } } @@ -460,13 +362,49 @@ sub read_args $ok_hash{ "DIRS" } = 1; $ok_hash{ "FILES" } = 1; - map { &Maasha::Common::error( qq(Unknown argument->"$_") ) if not exists $ok_hash{ $_ } } keys %hash; + map { Maasha::Common::error( qq(Unknown argument->"$_") ) if not exists $ok_hash{ $_ } } keys %hash; } return wantarray ? %hash : \%hash; } +sub get_time +{ + # Martin A. Hansen, July 2008. + + # Get current time as a number. + + # Returns a number. + + return time; +} + + +sub get_time_hires +{ + # Martin A. Hansen, May 2008. + + # Get current time in high resolution. + + # Returns a float. + + return gettimeofday(); +} + + +sub get_processid +{ + # Martin A. Hansen, July 2008. + + # Get process id for current process. + + # Returns a number. + + return $$; +} + + sub get_sessionid { # Martin A. Hansen, April 2008. @@ -475,7 +413,19 @@ sub get_sessionid # Returns a number - return time . $$; + return get_time . get_processid; +} + + +sub get_user +{ + # Martin A. Hansen, July 2008. + + # Return the user name of the current user. + + # Returns a string. + + return $ENV{ 'USER' }; } @@ -486,20 +436,21 @@ sub get_tmpdir # Create a temporary directory based on # $ENV{ 'BP_TMP' } and sessionid. - # Returns a path. + # this thing is a really bad solution and needs to be removed. - my ( $user, $sid, $path ); + # Returns a path. - &Maasha::Common::error( qq(no BP_TMP set in %ENV) ) if not -d $ENV{ 'BP_TMP' }; + my ( $user, $sid, $pid, $path ); - $user = $ENV{ 'USER' }; - $user =~ s/\.//g; + Maasha::Common::error( qq(no BP_TMP set in %ENV) ) if not -d $ENV{ 'BP_TMP' }; - $sid = &Maasha::Common::get_sessionid(); + $user = Maasha::Common::get_user(); + $sid = Maasha::Common::get_sessionid(); + $pid = Maasha::Common::get_processid(); - $path = "$ENV{ 'BP_TMP' }/$user\_$sid"; + $path = "$ENV{ 'BP_TMP' }/" . join( "_", $user, $sid, $pid, "bp_tmp" ); - &Maasha::Common::dir_create( $path ); + Maasha::Filesys::dir_create( $path ); return $path; } @@ -535,45 +486,33 @@ sub get_basename } -sub file_read +sub get_fields { - # Martin A. Hansen, December 2004. + # Martin A. Hansen, July 2008. - # given a file, a seek beg position and - # length, returns the corresponding string. - - my ( $fh, # file handle to file - $beg, # read start in file - $len, # read length of block - ) = @_; + # Given a filehandle to a file gets the + # next line which is split into a list of + # fields that is returned. - # returns string + my ( $fh, # filehandle + $delimiter, # field seperator - OPTIONAL + ) = @_; - my ( $string ); + # Returns a list. - &Maasha::Common::error( qq(Negative length: $len) ) if $len < 0; + my ( $line, @fields ); - sysseek $fh, $beg, 0; - sysread $fh, $string, $len; + $line = <$fh>; - return $string; -} + return if not defined $line; + chomp $line; -sub file_size -{ - # Martin A. Hansen, March 2007 + $delimiter ||= "\t"; - # returns the file size for a given file + @fields = split "$delimiter", $line; - my ( $path, # full path to file - ) = @_; - - # returns integer - - my $file_size = ( stat ( $path ) )[ 7 ]; - - return $file_size; + return wantarray ? @fields : \@fields; } @@ -592,11 +531,11 @@ sub run my ( $command_line, $result ); - $command_line = &Maasha::Config::get_exe( $exe ); + $command_line = Maasha::Config::get_exe( $exe ); $command_line .= " " . $args if $args; $command_line = "nice -n19 " . $command_line if $nice; - system( $command_line ) == 0 or &Maasha::Common::error( qq(Could not execute "$command_line": $?) ); + system( $command_line ) == 0 or Maasha::Common::error( qq(Could not execute "$command_line": $?) ); } @@ -616,7 +555,7 @@ sub run_and_return my ( $command_line, @result ); - $command_line = &Maasha::Config::get_exe( $exe ); + $command_line = Maasha::Config::get_exe( $exe ); $command_line .= " " . $args if $args; $command_line = "nice -n19 " . $command_line if $nice; @@ -655,6 +594,88 @@ sub time_stamp } +sub time_stamp_diff +{ + # Martin A. Hansen, June 2009. + + # Return the difference between two time stamps in + # the time stamp format. + + my ( $t0, # time stamp 0 + $t1, # time stamp 1 + ) = @_; + + # Returns a time stamp string. + + my ( $year0, $mon0, $day0, $hour0, $min0, $sec0, + $year1, $mon1, $day1, $hour1, $min1, $sec1, + $year, $mon, $day, $hour, $min, $sec ); + + $t0 =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/; + $year0 = $1; + $mon0 = $2; + $day0 = $3; + $hour0 = $4; + $min0 = $5; + $sec0 = $6; + + $sec0 += $day0 * 24 * 60 * 60; + $sec0 += $hour0 * 60 * 60;; + $sec0 += $min0 * 60; + + $t1 =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/; + $year1 = $1; + $mon1 = $2; + $day1 = $3; + $hour1 = $4; + $min1 = $5; + $sec1 = $6; + + $sec1 += $day1 * 24 * 60 * 60; + $sec1 += $hour1 * 60 * 60;; + $sec1 += $min1 * 60; + + $year = $year1 - $year0; + $mon = $mon1 - $mon0; + $day = $day1 - $day0; + + $sec = $sec1 - $sec0; + + $hour = int( $sec / ( 60 * 60 ) ); + $sec -= $hour * 60 * 60; + + $min = int( $sec / 60 ); + $sec -= $min * 60; + + return join( ":", sprintf( "%02d", $hour ), sprintf( "%02d", $min ), sprintf( "%02d", $sec ) ); +} + + +sub process_running +{ + # Martin A. Hansen, July 2008. + + # Given a process ID check if it is running + # on the system. Return 1 if the process is + # running else 0. + + my ( $pid, # process ID to check. + ) = @_; + + # Returns boolean + + my ( @ps_table ); + + @ps_table = run_and_return( "ps", " a" ); + + if ( grep /^\s*$pid\s+/, @ps_table ) { + return 1; + } else { + return 0; + } +} + + sub wrap_line { # Martin A. Hansen, May 2005 @@ -700,6 +721,29 @@ sub wrap_line } +sub str_analyze +{ + # Martin A. Hansen, July 2009. + + # Analyzes the string composition of a given string. + + my ( $str, # string to analyze + ) = @_; + + # Returns hash + + my ( @composition, %hash, $i ); + + @composition = Maasha::Common::str_analyze_C( $str ); + + for ( $i = 32; $i <= 126; $i++ ) { # Only include printable chars + $hash{ chr $i } = $composition[ $i ] + } + + return wantarray ? %hash : \%hash; +} + + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 1;