1 package Maasha::Biopieces;
4 # Copyright (C) 2007-2009 Martin A. Hansen.
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 # http://www.gnu.org/copyleft/gpl.html
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
26 # Routines for manipulation, parsing and emitting of human/machine readable biopieces records.
29 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
32 use Getopt::Long qw( :config bundling );
37 use vars qw( @ISA @EXPORT_OK );
41 @ISA = qw( Exporter );
51 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SIGNAL HANDLER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
54 $SIG{ '__DIE__' } = \&sig_handler;
55 $SIG{ 'INT' } = \&sig_handler;
56 $SIG{ 'TERM' } = \&sig_handler;
61 # Martin A. Hansen, January 2008.
63 # Log messages to logfile.
67 my ( $time_stamp, $user, $script, $fh_global, $fh_local );
69 $time_stamp = Maasha::Common::time_stamp();
70 $user = Maasha::Common::get_user();
71 $script = Maasha::Common::get_scriptname();
73 $fh_global = Maasha::Filesys::file_append_open( "$ENV{ 'BP_LOG' }/biopieces.log" );
74 $fh_local = Maasha::Filesys::file_append_open( "$ENV{ 'HOME' }/.biopieces.log" );
76 print $fh_global "$time_stamp\t$user\t$script ", join( " ", @ARGV ), "\n";
77 print $fh_local "$time_stamp\t$user\t$script ", join( " ", @ARGV ), "\n";
79 $fh_global->autoflush( 1 );
80 $fh_local->autoflush( 1 );
89 # Martin A. Hansen, July 2007.
91 # Opens a stream to STDIN or a file,
93 my ( $file, # file - OPTIONAL
100 if ( not -t STDIN ) {
101 $fh = Maasha::Filesys::stdin_read();
102 } elsif ( not $file ) {
103 # Maasha::Common::error( qq(no data stream) );
105 $fh = Maasha::Filesys::file_read_open( $file );
114 # Martin A. Hansen, August 2007.
116 # Opens a stream to STDOUT or a file.
118 my ( $path, # path - OPTIONAL
119 $gzip, # compress data - OPTIONAL
122 # Returns filehandle.
127 $fh = Maasha::Filesys::file_write_open( $path, $gzip );
129 $fh = Maasha::Filesys::stdout_write();
138 # Martin A. Hansen, May 2009.
140 # Close stream if open.
142 my ( $fh, # filehandle
147 close $fh if defined $fh;
153 # Martin A. Hansen, July 2007.
155 # Reads one record at a time and converts that record
156 # to a Perl data structure (a hash) which is returned.
158 my ( $fh, # handle to stream
163 my ( $block, @lines, $line, $key, $value, %record );
165 return if not defined $fh;
167 local $/ = "\n---\n";
171 return if not defined $block;
175 @lines = split "\n", $block;
177 foreach $line ( @lines )
179 ( $key, $value ) = split ": ", $line, 2;
181 $record{ $key } = $value;
184 return wantarray ? %record : \%record;
190 # Martin A. Hansen, July 2007.
192 # Given a Perl datastructure (a hash ref) emits this to STDOUT or a filehandle.
194 my ( $data, # data structure
195 $fh, # file handle - OPTIONAL
200 if ( scalar keys %{ $data } )
204 map { print $fh "$_: $data->{ $_ }\n" } keys %{ $data };
209 map { print "$_: $data->{ $_ }\n" } keys %{ $data };
220 # Martin A. Hansen, May 2009
224 my ( $arg_list, # data structure with argument description
229 my ( $arg, @list, $options );
231 # ---- Adding the mandatory arguments to the arg_list ----
233 push @{ $arg_list }, (
235 { long => 'help', short => '?', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
236 { long => 'stream_in', short => 'I', type => 'file!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
237 { long => 'stream_out', short => 'O', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
238 { long => 'verbose', short => 'v', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
241 check_duplicates_args( $arg_list );
243 # ---- Compiling options list ----
245 foreach $arg ( @{ $arg_list } )
247 if ( $arg->{ 'type' } eq 'flag' ) {
248 push @list, "$arg->{ 'long' }|$arg->{ 'short' }";
250 push @list, "$arg->{ 'long' }|$arg->{ 'short' }=s";
254 # ---- Parsing options from @ARGV ----
258 Getopt::Long::GetOptions( $options, @list );
260 # print Dumper( $options );
262 check_print_usage( $options );
264 # ---- Expanding and checking options ----
266 foreach $arg ( @{ $arg_list } )
268 check_mandatory( $arg, $options );
269 set_default( $arg, $options );
270 check_uint( $arg, $options );
271 check_int( $arg, $options );
272 set_list( $arg, $options );
273 check_dir( $arg, $options );
274 check_file( $arg, $options );
275 set_files( $arg, $options );
276 check_files( $arg, $options );
277 check_allowed( $arg, $options );
278 check_disallowed( $arg, $options );
281 # print Dumper( $options );
283 # return wantarray ? $options : %{ $options }; # WTF! Someone changed the behaviour of wantarray???
289 sub check_duplicates_args
291 # Martin A. Hansen, May 2009
293 # Check if there are duplicate long or short arguments,
294 # and raise an error if so.
296 my ( $arg_list, # List of argument hashrefs,
301 my ( $arg, %check_hash );
303 foreach $arg ( @{ $arg_list } )
305 Maasha::Common::error( qq(Duplicate long argument: $arg->{ 'long' }) ) if exists $check_hash{ $arg->{ 'long' } };
306 Maasha::Common::error( qq(Duplicate short argument: $arg->{ 'short' }) ) if exists $check_hash{ $arg->{ 'short' } };
308 $check_hash{ $arg->{ 'long' } } = 1;
309 $check_hash{ $arg->{ 'short' } } = 1;
314 sub check_print_usage
316 # Martin A. Hansen, May 2009.
318 # Check if we need to print usage and print usage
319 # and exit if that is the case.
321 my ( $options, # option hash
326 my ( $script, $wiki );
328 $script = Maasha::Common::get_scriptname();
330 if ( $script ne 'print_wiki' )
332 # if ( exists $options{ 'help' } or not ( exists $options{ 'stream_in' } or exists $options{ 'data_in' } or not -t STDIN ) )
333 if ( exists $options->{ 'help' } or ( scalar keys %{ $options } == 0 or exists $options->{ 'data_in' } or not -t STDIN ) )
335 $wiki = $ENV{ 'BP_DIR' } . "/bp_usage/$script.wiki";
337 if ( exists $options->{ 'help' } ) {
338 `print_wiki --data_in=$wiki --help`;
339 } elsif ( $script =~ /^(list_biopieces|list_genomes)$/ ) {
342 `print_wiki --data_in=$wiki`;
353 # Martin A. Hansen, May 2009.
355 # Check if mandatory arguments are set and raises an error if not.
358 $options, # options hash
363 if ( $arg->{ 'mandatory' } eq 'yes' and not defined $options->{ $arg->{ 'long' } } ) {
364 Maasha::Common::error( qq(Argument --$arg->{ 'long' } is mandatory) );
371 # Martin A. Hansen, May 2009.
373 # Set default values in option hash.
376 $options, # options hash
381 if ( not defined $options->{ $arg->{ 'long' } } ) {
382 $options->{ $arg->{ 'long' } } = $arg->{ 'default' }
389 # Martin A. Hansen, May 2009.
391 # Check if value to argument is an unsigned integer and
392 # raises an error if not.
395 $options, # options hash
400 if ( $arg->{ 'type' } eq 'uint' and defined $options->{ $arg->{ 'long' } } )
402 if ( $options->{ $arg->{ 'long' } } !~ /^\d+$/ ) {
403 Maasha::Common::error( qq(Argument --$arg->{ 'long' } must be an unsigned integer - not $options->{ $arg->{ 'long' } }) );
411 # Martin A. Hansen, May 2009.
413 # Check if value to argument is an integer and
414 # raises an error if not.
417 $options, # options hash
422 if ( $arg->{ 'type' } eq 'int' and defined $options{ $arg->{ 'long' } } )
424 if ( $options->{ $arg->{ 'long' } } !~ /^-?\d+$/ ) {
425 Maasha::Common::error( qq(Argument --$arg->{ 'long' } must be an integer - not $options->{ $arg->{ 'long' } }) );
433 # Martin A. Hansen, May 2009.
435 # Splits an argument of type 'list' into a list that is put
436 # in the options hash.
439 $options, # options hash
444 if ( $arg->{ 'type' } eq 'list' and defined $options->{ $arg->{ 'long' } } ) {
445 $options->{ $arg->{ 'long' } } = [ split /,/, $options->{ $arg->{ 'long' } } ];
452 # Martin A. Hansen, May 2009.
454 # Check if an argument of type 'dir!' truly is a directory and
455 # raises an error if not.
458 $options, # options hash
463 if ( $arg->{ 'type' } eq 'dir!' and defined $options->{ $arg->{ 'long' } } )
465 if ( not -d $options->{ $arg->{ 'long' } } ) {
466 Maasha::Common::error( qq(No such directory: "$options->{ $arg->{ 'long' } }") );
474 # Martin A. Hansen, May 2009.
476 # Check if an argument of type 'file!' truly is a file and
477 # raises an error if not.
480 $options, # options hash
485 if ( $arg->{ 'type' } eq 'file!' and defined $options->{ $arg->{ 'long' } } )
487 if ( not -f $options->{ $arg->{ 'long' } } ) {
488 Maasha::Common::error( qq(No such file: "$options->{ $arg->{ 'long' } }") );
496 # Martin A. Hansen, May 2009.
498 # Split the argument to 'files' into a list that is put on the options hash.
501 $options, # options hash
506 if ( $arg->{ 'type' } eq 'files' and defined $options->{ $arg->{ 'long' } } ) {
507 $options->{ $arg->{ 'long' } } = [ split /,/, $options->{ $arg->{ 'long' } } ];
514 # Martin A. Hansen, May 2009.
516 # Split the argument to 'files!' and check if each file do exists before adding
517 # the file list to the options hash.
520 $options, # options hash
525 my ( $elem, @files );
527 if ( $arg->{ 'type' } eq 'files!' and defined $options->{ $arg->{ 'long' } } )
529 foreach $elem ( split /,/, $options->{ $arg->{ 'long' } } )
533 } elsif ( $elem =~ /\*/ ) {
534 push @files, glob( $elem );
538 if ( scalar @files == 0 ) {
539 Maasha::Common::error( qq(Argument to --$arg->{ 'long' } must be a valid file or fileglob expression - not $options->{ $arg->{ 'long' } }) );
542 $options->{ $arg->{ 'long' } } = [ @files ];
549 # Martin A. Hansen, May 2009.
551 # Check if all values to all arguement are allowed and raise an
555 $options, # options hash
562 if ( defined $arg->{ 'allowed' } and defined $options->{ $arg->{ 'long' } } )
564 map { $val_hash{ $_ } = 1 } split /,/, $arg->{ 'allowed' };
566 if ( $arg->{ 'type' } =~ /^(list|files|files!)$/ )
568 foreach $elem ( @{ $options->{ $arg->{ 'long' } } } )
570 if ( not exists $val_hash{ $elem } ) {
571 Maasha::Common::error( qq(Argument to --$arg->{ 'long' } $elem is not allowed) );
577 if ( not exists $val_hash{ $options->{ $arg->{ 'long' } } } ) {
578 Maasha::Common::error( qq(Argument to --$arg->{ 'long' } $options->{ $arg->{ 'long' } } is not allowed) );
587 # Martin A. Hansen, May 2009.
589 # Check if any values to all arguemnts are disallowed and raise an error if so.
592 $options, # options hash
597 my ( $val, %val_hash );
599 if ( defined $arg->{ 'disallowed' } and defined $options->{ $arg->{ 'long' } } )
601 foreach $val ( split /,/, $arg->{ 'disallowed' } )
603 if ( $options->{ $arg->{ 'long' } } eq $val ) {
604 Maasha::Common::error( qq(Argument to --$arg->{ 'long' } $val is disallowed) );
613 # Martin A. Hansen, November 2007.
615 # Extracts files from an explicit GetOpt::Long argument
616 # allowing for the use of glob. E.g.
618 # --data_in=test.fna,test2.fna
620 # --data_in=test.fna,/dir/*.fna
622 my ( $option, # option from GetOpt::Long
627 my ( $elem, @files );
629 foreach $elem ( split ",", $option )
633 } elsif ( $elem =~ /\*/ ) {
634 push @files, glob( $elem );
638 return wantarray ? @files : \@files;
644 # Martin A. Hansen, April 2008.
646 # Removes temporary directory and exits gracefully.
647 # This subroutine is meant to be run always as the last
648 # thing even if a script is dies or is interrupted
651 my ( $sig, # signal from the %SIG
654 # print STDERR "signal->$sig<-\n";
656 my $script = Maasha::Common::get_scriptname();
664 if ( $sig =~ /MAASHA_ERROR/ ) {
665 print STDERR "\nProgram '$script' had an error" . " - Please wait for temporary data to be removed\n";
666 } elsif ( $sig eq "INT" ) {
667 print STDERR "\nProgram '$script' interrupted (ctrl-c was pressed)" . " - Please wait for temporary data to be removed\n";
668 } elsif ( $sig eq "TERM" ) {
669 print STDERR "\nProgram '$script' terminated (someone used kill?)" . " - Please wait for temporary data to be removed\n";
671 print STDERR "\nProgram '$script' died->$sig" . " - Please wait for temporary data to be removed\n";
683 # Martin A. Hansen, July 2008.
685 # Cleans out any unused temporary files and directories in BP_TMP.
689 my ( $tmpdir, @dirs, $curr_pid, $dir, $user, $sid, $pid );
691 $tmpdir = $ENV{ 'BP_TMP' } || Maasha::Common::error( 'No BP_TMP variable in environment.' );
693 $curr_pid = Maasha::Common::get_processid();
695 @dirs = Maasha::Filesys::ls_dirs( $tmpdir );
697 foreach $dir ( @dirs )
699 if ( $dir =~ /^$tmpdir\/(.+)_(\d+)_(\d+)_bp_tmp$/ )
705 # next if $user eq "maasha"; # DEBUG
707 if ( $user eq Maasha::Common::get_user() )
709 if ( not Maasha::Common::process_running( $pid ) )
711 # print STDERR "Removing stale dir: $dir\n";
712 Maasha::Filesys::dir_remove( $dir );
714 elsif ( $pid == $curr_pid )
716 # print STDERR "Removing current dir: $dir\n";
717 Maasha::Filesys::dir_remove( $dir );
727 # Martin A. Hansen, May 2009.
729 # Returns a precision timestamp for calculating
732 return Maasha::Common::get_time_hires();
738 # Martin A. Hansen, May 2009
740 # Print the run time to STDERR for the current script if
741 # the verbose switch is set in the option hash.
743 my ( $t0, # run time begin
745 $options, # options hash
750 my $script = Maasha::Common::get_scriptname();
752 print STDERR "Program: $script" . ( " " x ( 25 - length( $script ) ) ) . sprintf( "Run time: %.4f\n", ( $t1 - $t0 ) ) if $options->{ 'verbose' };
759 # Martin A. Hansen, April 2008.
761 # Create a temporary directory based on
762 # $ENV{ 'BP_TMP' } and sessionid.
764 # this thing is a really bad solution and needs to be removed.
768 my ( $user, $sid, $pid, $path );
770 Maasha::Common::error( qq(no BP_TMP set in %ENV) ) if not -d $ENV{ 'BP_TMP' };
772 $user = Maasha::Common::get_user();
773 $sid = Maasha::Common::get_sessionid();
774 $pid = Maasha::Common::get_processid();
776 $path = "$ENV{ 'BP_TMP' }/" . join( "_", $user, $sid, $pid, "bp_tmp" );
778 Maasha::Filesys::dir_create( $path );
790 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<