1 package Maasha::Biopieces;
4 # Copyright (C) 2007-2009 Martin A. Hansen.
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 # http://www.gnu.org/copyleft/gpl.html
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
26 # Routines for manipulation, parsing and emitting of human/machine readable biopieces records.
29 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
32 use Getopt::Long qw( :config bundling );
37 use vars qw( @ISA @EXPORT_OK );
41 @ISA = qw( Exporter );
51 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SIGNAL HANDLER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
54 $SIG{ '__DIE__' } = \&sig_handler;
55 $SIG{ 'INT' } = \&sig_handler;
56 $SIG{ 'TERM' } = \&sig_handler;
59 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
64 my ( $time_stamp, $script, $user, $pid, $file, $fh );
66 $time_stamp = Maasha::Common::time_stamp();
67 $user = Maasha::Common::get_user();
68 $script = Maasha::Common::get_scriptname();
69 $pid = Maasha::Common::get_processid();
71 $file = bp_tmp() . "/" . join( ".", $user, $script, $pid ) . ".status";
72 $fh = Maasha::Filesys::file_write_open( $file );
75 print $fh join( ";", $time_stamp, join( " ", @ARGV ) ) . "\n";
83 # Martin A. Hansen, June 2009.
85 # Retrieves initial status information written with status_set and uses this
86 # to write a status entry to the log file.
88 my ( $status, # status - OPTIONAL
93 my ( $time0, $time1, $script, $user, $pid, $file, $fh, $elap, $fh_global, $fh_local, $line, $args, $tmp_dir );
97 $time1 = Maasha::Common::time_stamp();
98 $user = Maasha::Common::get_user();
99 $script = Maasha::Common::get_scriptname();
100 $pid = Maasha::Common::get_processid();
102 $file = bp_tmp() . "/" . join( ".", $user, $script, $pid ) . ".status";
104 return if not -f $file;
106 $fh = Maasha::Filesys::file_read_open( $file );
114 ( $time0, $args, $tmp_dir ) = split /;/, $line;
116 Maasha::Filesys::dir_remove( $tmp_dir ) if defined $tmp_dir;
118 $elap = Maasha::Common::time_stamp_diff( $time0, $time1 );
120 $fh_global = Maasha::Filesys::file_append_open( "$ENV{ 'BP_LOG' }/biopieces.log" );
121 flock($fh_global, 2);
122 $fh_local = Maasha::Filesys::file_append_open( "$ENV{ 'HOME' }/.biopieces.log" );
125 print $fh_global join( "\t", $time0, $time1, $elap, $user, $status, "$script $args" ) . "\n";
126 print $fh_local join( "\t", $time0, $time1, $elap, $user, $status, "$script $args" ) . "\n";
128 $fh_global->autoflush( 1 );
129 $fh_local->autoflush( 1 );
138 # Martin A. Hansen, January 2008.
140 # Log messages to logfile.
144 my ( $time_stamp, $user, $script, $fh );
146 $time_stamp = Maasha::Common::time_stamp();
147 $user = Maasha::Common::get_user();
148 $script = Maasha::Common::get_scriptname();
150 $fh = Maasha::Filesys::file_append_open( "$ENV{ 'BP_LOG' }/biopieces.log" );
153 print $fh "$time_stamp\t$user\t$script ", join( " ", @ARGV ), "\n";
163 # Martin A. Hansen, July 2007.
165 # Opens a stream to STDIN or a file,
167 my ( $file, # file - OPTIONAL
170 # Returns filehandle.
174 if ( not -t STDIN ) {
175 $fh = Maasha::Filesys::stdin_read();
176 } elsif ( not $file ) {
177 # Maasha::Common::error( qq(no data stream) );
179 $fh = Maasha::Filesys::file_read_open( $file );
188 # Martin A. Hansen, August 2007.
190 # Opens a stream to STDOUT or a file.
192 my ( $path, # path - OPTIONAL
193 $gzip, # compress data - OPTIONAL
196 # Returns filehandle.
201 $fh = Maasha::Filesys::file_write_open( $path, $gzip );
203 $fh = Maasha::Filesys::stdout_write();
212 # Martin A. Hansen, May 2009.
214 # Close stream if open.
216 my ( $fh, # filehandle
221 close $fh if defined $fh;
227 # Martin A. Hansen, July 2007.
229 # Reads one record at a time and converts that record
230 # to a Perl data structure (a hash) which is returned.
232 my ( $fh, # handle to stream
237 my ( $block, @lines, $line, $key, $value, %record );
239 return if not defined $fh;
241 local $/ = "\n---\n";
245 return if not defined $block;
249 @lines = split "\n", $block;
251 foreach $line ( @lines )
253 ( $key, $value ) = split ": ", $line, 2;
255 $record{ $key } = $value;
258 return wantarray ? %record : \%record;
264 # Martin A. Hansen, July 2007.
266 # Given a Perl datastructure (a hash ref) emits this to STDOUT or a filehandle.
268 my ( $data, # data structure
269 $fh, # file handle - OPTIONAL
274 if ( scalar keys %{ $data } )
278 map { print $fh "$_: $data->{ $_ }\n" } keys %{ $data };
283 map { print "$_: $data->{ $_ }\n" } keys %{ $data };
294 # Martin A. Hansen, May 2009
296 # Parses and checks options for Biopieces.
298 # First the argument list is checked for duplicates and then
299 # options are parsed from ARGV after which it is checked if
300 # the Biopieces usage information should be printed. Finally,
301 # all options from ARGV are checked according to the argument list.
303 my ( $arg_list, # data structure with argument description
308 my ( $arg, @list, $options );
310 # ---- Adding the mandatory arguments to the arg_list ----
312 push @{ $arg_list }, (
313 { long => 'help', short => '?', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
314 { long => 'stream_in', short => 'I', type => 'file!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
315 { long => 'stream_out', short => 'O', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
316 { long => 'verbose', short => 'v', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
319 check_duplicates_args( $arg_list );
321 # ---- Compiling options list ----
323 foreach $arg ( @{ $arg_list } )
325 if ( $arg->{ 'type' } eq 'flag' ) {
326 push @list, "$arg->{ 'long' }|$arg->{ 'short' }";
328 push @list, "$arg->{ 'long' }|$arg->{ 'short' }=s";
332 # ---- Parsing options from @ARGV ----
336 Getopt::Long::GetOptions( $options, @list );
338 # print Dumper( $options );
340 check_print_usage( $options );
342 # ---- Expanding and checking options ----
344 foreach $arg ( @{ $arg_list } )
346 check_mandatory( $arg, $options );
347 set_default( $arg, $options );
348 check_uint( $arg, $options );
349 check_int( $arg, $options );
350 set_list( $arg, $options );
351 check_dir( $arg, $options );
352 check_file( $arg, $options );
353 set_files( $arg, $options );
354 check_files( $arg, $options );
355 check_allowed( $arg, $options );
356 check_disallowed( $arg, $options );
359 # print Dumper( $options );
361 # return wantarray ? $options : %{ $options }; # WTF! Someone changed the behaviour of wantarray???
367 sub check_duplicates_args
369 # Martin A. Hansen, May 2009
371 # Check if there are duplicate long or short arguments,
372 # and raise an error if so.
374 my ( $arg_list, # List of argument hashrefs,
379 my ( $arg, %check_hash );
381 foreach $arg ( @{ $arg_list } )
383 Maasha::Common::error( qq(Duplicate long argument: $arg->{ 'long' }) ) if exists $check_hash{ $arg->{ 'long' } };
384 Maasha::Common::error( qq(Duplicate short argument: $arg->{ 'short' }) ) if exists $check_hash{ $arg->{ 'short' } };
386 $check_hash{ $arg->{ 'long' } } = 1;
387 $check_hash{ $arg->{ 'short' } } = 1;
392 sub check_print_usage
394 # Martin A. Hansen, May 2009.
396 # Check if we need to print usage and print usage
397 # and exit if that is the case.
399 my ( $options, # option hash
404 my ( %options, $help, $script, $wiki );
406 %options = %{ $options };
407 $help = $options{ 'help' };
408 delete $options{ 'help' };
410 $script = Maasha::Common::get_scriptname();
412 if ( $script ne 'print_wiki' )
414 if ( $help or -t STDIN )
416 if ( not ( exists $options{ 'stream_in' } or $options{ 'data_in' } ) )
418 if ( scalar keys %options == 0 )
420 $wiki = $ENV{ 'BP_DIR' } . "/bp_usage/$script.wiki";
423 `print_wiki --data_in=$wiki --help`;
424 } elsif ( $script =~ /^(list_biopieces|list_genomes|list_mysql_databases|biostat)$/ ) {
427 `print_wiki --data_in=$wiki`;
440 # Martin A. Hansen, May 2009.
442 # Check if mandatory arguments are set and raises an error if not.
445 $options, # options hash
450 if ( $arg->{ 'mandatory' } eq 'yes' and not defined $options->{ $arg->{ 'long' } } ) {
451 Maasha::Common::error( qq(Argument --$arg->{ 'long' } is mandatory) );
458 # Martin A. Hansen, May 2009.
460 # Set default values in option hash.
463 $options, # options hash
468 if ( not defined $options->{ $arg->{ 'long' } } ) {
469 $options->{ $arg->{ 'long' } } = $arg->{ 'default' }
476 # Martin A. Hansen, May 2009.
478 # Check if value to argument is an unsigned integer and
479 # raises an error if not.
482 $options, # options hash
487 if ( $arg->{ 'type' } eq 'uint' and defined $options->{ $arg->{ 'long' } } )
489 if ( $options->{ $arg->{ 'long' } } !~ /^\d+$/ ) {
490 Maasha::Common::error( qq(Argument --$arg->{ 'long' } must be an unsigned integer - not $options->{ $arg->{ 'long' } }) );
498 # Martin A. Hansen, May 2009.
500 # Check if value to argument is an integer and
501 # raises an error if not.
504 $options, # options hash
509 if ( $arg->{ 'type' } eq 'int' and defined $options{ $arg->{ 'long' } } )
511 if ( $options->{ $arg->{ 'long' } } !~ /^-?\d+$/ ) {
512 Maasha::Common::error( qq(Argument --$arg->{ 'long' } must be an integer - not $options->{ $arg->{ 'long' } }) );
520 # Martin A. Hansen, May 2009.
522 # Splits an argument of type 'list' into a list that is put
523 # in the options hash.
526 $options, # options hash
531 if ( $arg->{ 'type' } eq 'list' and defined $options->{ $arg->{ 'long' } } ) {
532 $options->{ $arg->{ 'long' } } = [ split /,/, $options->{ $arg->{ 'long' } } ];
539 # Martin A. Hansen, May 2009.
541 # Check if an argument of type 'dir!' truly is a directory and
542 # raises an error if not.
545 $options, # options hash
550 if ( $arg->{ 'type' } eq 'dir!' and defined $options->{ $arg->{ 'long' } } )
552 if ( not -d $options->{ $arg->{ 'long' } } ) {
553 Maasha::Common::error( qq(No such directory: "$options->{ $arg->{ 'long' } }") );
561 # Martin A. Hansen, May 2009.
563 # Check if an argument of type 'file!' truly is a file and
564 # raises an error if not.
567 $options, # options hash
572 if ( $arg->{ 'type' } eq 'file!' and defined $options->{ $arg->{ 'long' } } )
574 if ( not -f $options->{ $arg->{ 'long' } } ) {
575 Maasha::Common::error( qq(No such file: "$options->{ $arg->{ 'long' } }") );
583 # Martin A. Hansen, May 2009.
585 # Split the argument to 'files' into a list that is put on the options hash.
588 $options, # options hash
593 if ( $arg->{ 'type' } eq 'files' and defined $options->{ $arg->{ 'long' } } ) {
594 $options->{ $arg->{ 'long' } } = [ split /,/, $options->{ $arg->{ 'long' } } ];
601 # Martin A. Hansen, May 2009.
603 # Split the argument to 'files!' and check if each file do exists before adding
604 # the file list to the options hash.
607 $options, # options hash
612 my ( $elem, @files );
614 if ( $arg->{ 'type' } eq 'files!' and defined $options->{ $arg->{ 'long' } } )
616 foreach $elem ( split /,/, $options->{ $arg->{ 'long' } } )
620 } elsif ( $elem =~ /\*/ ) {
621 push @files, glob( $elem );
625 if ( scalar @files == 0 ) {
626 Maasha::Common::error( qq(Argument to --$arg->{ 'long' } must be a valid file or fileglob expression - not $options->{ $arg->{ 'long' } }) );
629 $options->{ $arg->{ 'long' } } = [ @files ];
636 # Martin A. Hansen, May 2009.
638 # Check if all values to all arguement are allowed and raise an
642 $options, # options hash
649 if ( defined $arg->{ 'allowed' } and defined $options->{ $arg->{ 'long' } } )
651 map { $val_hash{ $_ } = 1 } split /,/, $arg->{ 'allowed' };
653 if ( $arg->{ 'type' } =~ /^(list|files|files!)$/ )
655 foreach $elem ( @{ $options->{ $arg->{ 'long' } } } )
657 if ( not exists $val_hash{ $elem } ) {
658 Maasha::Common::error( qq(Argument to --$arg->{ 'long' } $elem is not allowed) );
664 if ( not exists $val_hash{ $options->{ $arg->{ 'long' } } } ) {
665 Maasha::Common::error( qq(Argument to --$arg->{ 'long' } $options->{ $arg->{ 'long' } } is not allowed) );
674 # Martin A. Hansen, May 2009.
676 # Check if any values to all arguemnts are disallowed and raise an error if so.
679 $options, # options hash
684 my ( $val, %val_hash );
686 if ( defined $arg->{ 'disallowed' } and defined $options->{ $arg->{ 'long' } } )
688 foreach $val ( split /,/, $arg->{ 'disallowed' } )
690 if ( $options->{ $arg->{ 'long' } } eq $val ) {
691 Maasha::Common::error( qq(Argument to --$arg->{ 'long' } $val is disallowed) );
698 # marked for deletion - obsolete?
701 # # Martin A. Hansen, November 2007.
703 # # Extracts files from an explicit GetOpt::Long argument
704 # # allowing for the use of glob. E.g.
705 # # --data_in=test.fna
706 # # --data_in=test.fna,test2.fna
708 # # --data_in=test.fna,/dir/*.fna
710 # my ( $option, # option from GetOpt::Long
715 # my ( $elem, @files );
717 # foreach $elem ( split ",", $option )
720 # push @files, $elem;
721 # } elsif ( $elem =~ /\*/ ) {
722 # push @files, glob( $elem );
726 # return wantarray ? @files : \@files;
732 # Martin A. Hansen, April 2008.
734 # Removes temporary directory and exits gracefully.
735 # This subroutine is meant to be run always as the last
736 # thing even if a script is dies or is interrupted
739 my ( $sig, # signal from the %SIG
742 # print STDERR "signal->$sig<-\n";
744 my $script = Maasha::Common::get_scriptname();
750 if ( $sig =~ /MAASHA_ERROR/ )
752 print STDERR "\nProgram '$script' had an error" . " - Please wait for temporary data to be removed\n";
753 status_log( "ERROR" );
755 elsif ( $sig eq "INT" )
757 print STDERR "\nProgram '$script' interrupted (ctrl-c was pressed)" . " - Please wait for temporary data to be removed\n";
758 status_log( "INTERRUPTED" );
760 elsif ( $sig eq "TERM" )
762 print STDERR "\nProgram '$script' terminated (someone used kill?)" . " - Please wait for temporary data to be removed\n";
763 status_log( "TERMINATED" );
767 print STDERR "\nProgram '$script' died->$sig" . " - Please wait for temporary data to be removed\n";
768 status_log( "DIED" );
779 # Martin A. Hansen, July 2008.
781 # Cleans out any unused temporary files and directories in BP_TMP.
785 my ( $tmpdir, @dirs, $curr_pid, $dir, $user, $sid, $pid );
789 $curr_pid = Maasha::Common::get_processid();
791 @dirs = Maasha::Filesys::ls_dirs( $tmpdir );
793 foreach $dir ( @dirs )
795 if ( $dir =~ /^$tmpdir\/(.+)_(\d+)_(\d+)_bp_tmp$/ )
801 # next if $user eq "maasha"; # DEBUG
803 if ( $user eq Maasha::Common::get_user() )
805 if ( not Maasha::Common::process_running( $pid ) )
807 # print STDERR "Removing stale dir: $dir\n";
808 Maasha::Filesys::dir_remove( $dir );
810 elsif ( $pid == $curr_pid )
812 # print STDERR "Removing current dir: $dir\n";
813 Maasha::Filesys::dir_remove( $dir );
823 # Martin A. Hansen, April 2008.
825 # Create a temporary directory based on
826 # $ENV{ 'BP_TMP' } and sessionid. The directory
827 # name is written to the status file.
831 my ( $user, $sid, $pid, $script, $path, $file, $fh, $line );
833 $user = Maasha::Common::get_user();
834 $sid = Maasha::Common::get_sessionid();
835 $pid = Maasha::Common::get_processid();
836 $script = Maasha::Common::get_scriptname();
838 $path = bp_tmp() . "/" . join( "_", $user, $sid, $pid, "bp_tmp" );
839 $file = bp_tmp() . "/" . join( ".", $user, $script, $pid ) . ".status";
841 $fh = Maasha::Filesys::file_read_open( $file );
847 $fh = Maasha::Filesys::file_write_open( $file );
849 print $fh "$line;$path\n";
852 Maasha::Filesys::dir_create( $path );
860 # Martin A. Hansen, July 2009.
862 # Read Biopiece configuration info from .biopiecesrc.
863 # and returns the value of a given key.
865 my ( $key, # configuration key
870 my ( $file, $fh, $record );
872 $file = "$ENV{ 'HOME' }/.biopiecesrc";
874 return undef if not -f $file;
876 $fh = Maasha::Filesys::file_read_open( $file );
878 $record = get_record( $fh );
881 if ( exists $record->{ $key } ) {
882 return $record->{ $key };
890 # Martin A. Hansen, March 2013.
892 # Returns the BP_TMP path.
893 # Errs if no BP_TMP in ENV and
894 # creates BP_TMP if it doesn't exists.
898 Maasha::Common::error( qq(no BP_TMP set in %ENV) ) if not -d $ENV{ 'BP_TMP' };
900 $path = $ENV{ 'BP_TMP' };
902 unless ( -d $path ) { # No BP_TMP so we create it
903 mkdir $path or die qq(failed to create dir "$path": $!);
915 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<