#!/usr/bin/perl # dqsub submits jobs using qsub with better options # and is released under the terms of the GNU GPL version 3, or any # later version, at your option. See the file README and COPYING for # more information. # Copyright 2014 by Don Armstrong . use warnings; use strict; use Getopt::Long; # use Pod::Usage; =head1 NAME dqsub - submits jobs using qsub with better options =head1 SYNOPSIS dqsub [options] Options: --queue, -q Queue to use --interactive, -I call qsub interactively --nodes nodes to use --array array mode (one of 'chdir' or 'xargs' or '') --array-from file to read arrays from (default STDIN) --array-per-job number of array items to handle in each job (default 1) --array-all-in-one-job Run all of the array items in one job --ppn processors per node to use --mem memory to request --dir Directory to run the script in (default current directory) --account, -A Account name to use --join, -J join error and output streams (default) --name, -N Name of the job --precommand Optional command to run before each command --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual =head1 OPTIONS =over =item B<--array> This describes how dqsub will generate array jobs. If no B<--array> is given, then the command and any additional arguments given will be run using qsub. If B<--array> is C, then each line of the input given in B<--array-from> will be used as a directory and the command and any additional arguments given will run in each directory. IF B<--array> is C, then each line of the input given will be considered to be an additional argument which will be given to the command run in the current directory. =item B<--array-from> File to read array arguments from. If not provided, and B<--array> is given, arguments will be read from STDIN. =item B<--account, -A> Account name to use =item B<--join, J> Whether to join STDOUT and STDERR. On by default; disable with C<--nojoin>. =item B<--batch> Which batch system to use. If sbatch exists, assume it's slurm, otherwise, PBS. =item B<--debug, -d> Debug verbosity. (Default 0) =item B<--help, -h> Display brief usage information. =item B<--man, -m> Display this manual. =back =head1 EXAMPLES dqsub =cut use IO::File; use Cwd qw(getcwd abs_path); use POSIX qw(ceil); use List::Util qw(min); use vars qw($DEBUG); my %options = (nodes => 1, ppn => 2, mem => '2G', debug => 0, help => 0, man => 0, interactive => 0, array_per_job => 1, join => 1, ); GetOptions(\%options, 'queue|q=s', 'batch=s', 'interactive|I!', 'nodes=i', 'array=s', 'array_from|array-from=s', 'array_per_job|array-per-job=i', 'array_slot_limit|array-slot-limit=i', 'array_all_in_one_job|array-all-in-one-job!', 'ppn|cpus|processors-per-node=i', 'account|A=s', 'join|J!', 'mem|memory=s', 'time|walltime=s','cputime|cput=s','host=s', 'pmem|process_mem|process-mem=s', 'pvmem|process_virtual_mem|process-virtiual-mem=s', 'max_file|max-file|file=s', 'precommand|pre-command|pre_command=s', 'dir=s', 'name=s', 'debug|d+','help|h|?','man|m'); # pod2usage() if $options{help}; # pod2usage({verbose=>2}) if $options{man}; $DEBUG = $options{debug}; my @USAGE_ERRORS; if (not @ARGV and not $options{interactive}) { push @USAGE_ERRORS,"You must provide a command to run"; } if (defined $options{array} and $options{array} !~ /^(?:|chdir|xargs)$/i) { push @USAGE_ERRORS,"--array must be one of chdir, xargs or '' if provided"; $options{array} = lc($options{array}); if ($options{array} eq '') { $options{array} = undef; } } if ($options{interactive} and @ARGV) { push @USAGE_ERRORS,"Don't provide commands when you're asking for an interactive shell"; } if (not defined $options{batch}) { qx{which sbatch >/dev/null 2>&1}; if ($? == 0) { $options{batch} = 'slurm' } else { $options{batch} = 'pbs' } } if ($options{batch} !~ /^pbs|slurm$/) { push @USAGE_ERRORS,"Unsupported batch system '$options{batch}'; ". "supported systems are pbs or slurm"; } # pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; if (@USAGE_ERRORS) { print STDERR map {"$_\n"} @USAGE_ERRORS; exit 1; } my $JOB_SUBMITTER = 'qsub'; # OK. Generate the options to qsub which we'll be using my @qsub_options; if ($options{batch} eq 'pbs') { @qsub_options = generate_qsub_options(\%options,\@ARGV); $JOB_SUBMITTER = 'qsub'; } elsif ($options{batch} eq 'slurm') { @qsub_options = generate_slurm_options(\%options,\@ARGV); $JOB_SUBMITTER = 'sbatch'; } else { die "Unsupported batch system '$options{batch}'"; } if ($options{interactive}) { print STDERR 'running: '.$JOB_SUBMITTER.' '.join(' ',@qsub_options) if $DEBUG; if ($options{batch} eq 'pbs') { exec($JOB_SUBMITTER,@qsub_options); } else { exec('srun',@qsub_options,$ENV{SHELL}//'bash'); } } else { my @array = (); if ($options{array}) { @array = read_array_options(\%options) if $options{array}; # the -t option gives the range of elements for an array job if ($options{array_all_in_one_job}) { $options{array_per_job} = scalar @array; } else { if ($options{batch} eq 'pbs') { push @qsub_options,'-t'; } else { push @qsub_options,'-a'; } push @qsub_options,'1-'. ceil(scalar @array / $options{array_per_job}); if ($options{array_slot_limit}) { $qsub_options[$#qsub_options] .= '%'.$options{array_slot_limit}; } } } if ($options{batch} eq 'pbs') { push @qsub_options,'-'; } call_qsub(\@qsub_options,write_qsub_script(\%options,\@ARGV,\@array)); } sub generate_qsub_options{ my ($options,$args) = @_; my @qo; if (defined $options->{queue} and length $options->{queue}) { push @qo,'-q',$options->{queue}; } ## handle the -l options my @l; push @l, 'nodes='.$options->{nodes}; if (defined $options->{ppn}) { $l[$#l] .= ':ppn='.$options->{ppn}; } if (defined $options->{account}) { push @qo,'-A',$options->{account}; } my %l_options = (mem => 'vmem', time => 'walltime', cputime => 'cput', host => 'host', pmem => 'pmem', pvmem => 'pvmem', max_file => 'file', ); for my $k (keys %l_options) { if ($options->{$k}) { push @l,$l_options{$k}.'='.$options{$k}; } } push @qo,'-l',join(',',@l) if @l; if ($options->{interactive}) { push @qo,'-I'; } if ($options->{name}) { push @qo,'-N',$options->{name}; } else { push @qo,'-N',join('_', map {my $a = $_; $a =~ s/[^a-zA-Z0-9]*//g; $a;} @{$args}[0..min($#{$args},2)]); } # join error and output streams if ($options->{join}) { push @qo,'-j','oe'; } return @qo; } sub generate_slurm_options{ my ($options,$args) = @_; my @qo; if (defined $options->{queue} and length $options->{queue}) { push @qo,'-p',$options->{queue}; } ## handle the -l options if (defined $options->{account}) { push @qo,'-A',$options->{account}; } my %options_map = (mem => 'mem', ppn => 'cpus-per-task', time => 'time', cputime => 'cput', host => 'host', pmem => 'pmem', pvmem => 'pvmem', max_file => 'file', ); for my $k (keys %options_map) { if ($options->{$k}) { push @qo,'--'.$options_map{$k}.'='.$options{$k}; } } if ($options{mem}) { push @qo,'--mem='.$options{mem}; } if ($options->{interactive}) { push @qo,'--pty'; } if ($options->{name}) { push @qo,'-J',$options->{name}; } else { push @qo,'-J',join('_', map {my $a = $_; $a =~ s/[^a-zA-Z0-9]*//g; $a;} @{$args}[0..min($#{$args},2)]); } return @qo; } sub read_array_options{ my ($options) = @_; my $fh = \*STDIN; if (defined $options->{array_from}) { $fh = IO::File->new(defined $options->{array_from}) or die "Unable to open $options->{array_from} for reading: $!"; } my @array; for (<$fh>) { chomp; push @array,$_; } return @array; } sub call_qsub { my ($qsub_options,$script) = @_; my $qsub_fh; open $qsub_fh,'|-',$JOB_SUBMITTER,@{$qsub_options} or die "Unable to start $JOB_SUBMITTER: $!"; print {$qsub_fh} $script or die "Unable to print to $JOB_SUBMITTER: $!"; close($qsub_fh) or die "Unable to close $JOB_SUBMITTER filehandle: $!"; } sub write_qsub_script { my ($opt,$arg,$array) = @_; my $script = "#!/bin/bash\n"; my $command = join(' ',map {$_ =~ /\s/?qq('$_'):$_} @{$arg}); $script .= <{precommand}) { $script .=<{precommand} # precommand _END_ EOF } my $directory = getcwd; if (defined $opt->{dir}) { $directory = abs_path($opt->{dir}); } # we really should be quoting this instead $script .=<{array}) { my @subshell = ('',''); my $array_opt = join("\n",@{$array}); my $max_array = scalar @{$array}; my $apjm1 = $opt->{array_per_job} - 1; $script .= <{array_per_job} > 1) { # we will use subshells if there are more than one array # items per job @subshell = ('(',')'); $script .= <{array_per_job}); do # in some cases, the jobs aren't going to come out evenly. Handle that. JOBNUM=\$(( \${MYARRAYID:=1} * $opt->{array_per_job} + \$i - $opt->{array_per_job} )) if [ \$JOBNUM -le $max_array ]; then OPT=\$(sed -n -e "\$JOBNUM p"<<'_HERE_DOC_END_' EOF } else { $script .= <{array} eq 'chdir') { $script .= <{array_per_job} > 1) { $script .= <