Simple.pm

   1 package Parallel::MPI::Simple;
   2
   3 use strict;
   4 require DynaLoader;
   5 use vars qw(@ISA $VERSION);
   6 use Storable qw(nfreeze thaw);
   7
   8 @ISA = qw(DynaLoader);
   9 $VERSION = '0.10';
  10
  11 bootstrap Parallel::MPI::Simple;
  12
  13 # evil, but makes everything MPI_*, which is sort of expected
  14 sub import {
  15     my $call = (caller())[0];
  16     no strict 'refs';
  17     # subs (MPI_ function calls)
  18     foreach (qw(Init Finalize COMM_WORLD ANY_SOURCE Comm_rank Comm_size
  19                 Recv Send Barrier Bcast Gather
  20                 Scatter Allgather Alltoall Reduce
  21                 Comm_compare Comm_dup Comm_free Comm_split
  22                )) {
  23         *{$call.'::MPI_'.$_} = \&$_;
  24     }
  25     # flags (variables)
  26     *{$call.'::MPI_'.$_} = \&$_
  27         foreach (qw(IDENT CONGRUENT SIMILAR UNEQUAL UNDEFINED));
  28 }
  29
  30 =head1 NAME
  31
  32  Parallel::MPI::Simple
  33
  34 =head1 SYNOPSIS
  35
  36  mpirun -np 2 perl script.pl
  37
  38  #!perl
  39  use Parallel::MPI::Simple;
  40  MPI_Init();
  41  my $rank = MPI_Comm_rank(MPI_COMM_WORLD);
  42  if ($rank == 1) {
  43    my $msg = "Hello, I'm $rank";
  44    MPI_Send($msg, 0, 123, MPI_COMM_WORLD);
  45  }
  46  else {
  47    my $msg = MPI_Recv(1, 123, MPI_COMM_WORLD);
  48    print "$rank received: '$msg'\n";
  49  }
  50  MPI_Finalise();
  51
  52 =head1 COMPILING AND RUNNING
  53
  54 Please view the README file in the module tarball if you are having
  55 trouble compiling or running this module.
  56
  57 =head1 INTRODUCTION
  58
  59 Perl is not a strongly typed language, Perl does not enforce data
  60 structures of a fixed size or dimensionality, Perl makes things easy.
  61 Parallel processing solves problems faster and is commonly programmed
  62 using a message passing paradigm.  Traditional message passing systems
  63 are designed for strongly typed languages like C or Fortran, there
  64 exist implementations of these for Perl but they concentrate on
  65 perfectly mimicing the standards forcing the poor Perl programmer to
  66 use strongly typed data despite all his best instincts.
  67
  68 This module provides a non-compliant wrapper around the widely
  69 implemented MPI libraries, allowing messages to consist of arbitarily
  70 nested Perl data structures whose size is limited by available memory.
  71 This hybrid approach should allow you to quickly write programs which
  72 run anywhere which supports MPI (both Beowulf and traditional MPP
  73 machines).
  74
  75 =head1 Message Passing and Multiprocessing
  76
  77 The message passing paradigm is simple and easy to use.  Multiple
  78 versions of the same program are run on multiple processors (or
  79 nodes).  Each running copy should call C<MPI_Init> to announce that it
  80 is running.  It can then find out who it is by calling
  81 C<MPI_Comm_rank> and who else it can talk to by calling
  82 C<MPI_Comm_size>.  Using this information to decide what part it is to
  83 play in the ensuing computation, it the  exchanges messages, or
  84 parcels of data, with other nodes allowing all to cooperate.
  85
  86 Once the computation is finished, the node calls C<MPI_Finalize> and
  87 exits cleanly, ready to run another day.
  88
  89 These processes are all copies of the I<same> perl script and are invoked
  90 using: C<mpirun -np [number of nodes] perl script.pl> .
  91
  92 Remember you may need to start a daemon before mpirun will work, for
  93 C<mpich> this is often as easy as running: C<mpd &>.
  94
  95 =head1 Starting and Stopping a process
  96
  97 A process must formally enter and leave the MPI pool by calling these
  98 functions.
  99
 100 =head2 MPI_Init
 101
 102   MPI_Init()
 103
 104 Initialises the message passing layer.  This should be the first C<MPI_*>
 105 call made by the program and ideally one of the first things the
 106 program does.  After completing this call, all processes will be
 107 synchronised and will become members of the C<MPI_COMM_WORLD>
 108 communicator.  It is an error for programs invoked with C<mpirun> to
 109 fail to call C<MPI_Init> (not to mention being a little silly).
 110
 111 =head2 MPI_Finalize
 112
 113   MPI_Finalize()
 114
 115 Shuts down the message passing layer.  This should be called by every
 116 participating process before exiting.  No more C<MPI_*> calls may be made
 117 after this function has been called.  It is an error for a program to
 118 exit I<without> calling this function.
 119
 120 =head1 Communicators
 121
 122 All processes are members of one or more I<communicators>.  These are
 123 like channels over which messages are broadcast.  Any operation
 124 involving more than one process will take place in a communicator,
 125 operations involving one communicator will not interfere with those in
 126 another.
 127
 128 On calling C<MPI_Init> all nodes automatically join the
 129 C<MPI_COMM_WORLD> communicator.  A communicator can be split into
 130 smaller subgroups using the C<MPI_Comm_split> function.
 131
 132 =head2 MPI_COMM_WORLD
 133
 134  $global_comm = MPI_COMM_WORLD;
 135
 136 Returns the global communicator shared by all processes launched at
 137 the same time.  Can be used as a "constant" where a communicator is
 138 required.  Most MPI applications can get by using only this
 139 communicator.
 140
 141 =head2 MPI_Comm_rank
 142
 143   $rank = MPI_Comm_rank($comm);
 144
 145 Returns the rank of the process within the communicator given by
 146 $comm.  Processes have ranks from 0..(size-1).
 147
 148 =cut
 149
 150 sub Comm_rank {
 151   _Comm_rank($_[0]);
 152 }
 153
 154
 155 =head2 MPI_Comm_size
 156
 157   $size = MPI_Comm_size($comm);
 158
 159 Returns the number of processes in communicator $comm.
 160
 161 =cut
 162
 163 sub Comm_size {
 164   _Comm_size($_[0]);
 165 }
 166
 167 =head2 MPI_Comm_compare
 168
 169     $result = MPI_Comm_compare($comm1, $comm2);
 170
 171 Compares the two communicators $comm1 and $comm2.  $result will be equal
 172 to:
 173
 174   MPI_IDENT    : communicators are identical
 175   MPI_CONGRUENT: membership is same, ranks are equal
 176   MPI_SIMILAR  : membership is same, ranks not equal
 177   MPI_UNEQUAL  : at least one member of one is not in the other
 178
 179 =cut
 180
 181 sub IDENT     () { 1 }
 182 sub CONGRUENT () { 2 }
 183 sub SIMILAR   () { 3 }
 184 sub UNEQUAL   () { 0 }
 185
 186 sub Comm_compare {
 187     my ($c1, $c2) = (@_);
 188     _Comm_compare($c1, $c2);
 189 }
 190
 191 =head2 MPI_Comm_dup
 192
 193     $newcomm = MPI_Comm_dup($comm);
 194
 195 Duplicates $comm but creates a new context for messages.
 196
 197 =cut
 198
 199 sub Comm_dup {
 200     my ($comm) = @_;
 201     _Comm_dup($comm);
 202 }
 203
 204 =head2 MPI_Comm_split
 205
 206     $newcomm = MPI_Comm_split($comm, $colour, $key);
 207
 208 Every process in $comm calls C<MPI_Comm_split> at the same time.  A
 209 new set of communicators is produced, one for each distinct value of
 210 $colour.  All those processes which specified the same value of
 211 $colour end up in the same comminicator and are ranked on the values
 212 of $key, with their original ranks in $comm being used to settle ties.
 213
 214 If $colour is negative (or C<MPI_UNDEFINED>), the process will not be
 215 allocated to any of the new communicators and C<undef> will be
 216 returned.
 217
 218 =cut
 219
 220 sub UNDEFINED () { -1 }
 221 sub Comm_split {
 222     my ($comm, $colour, $key) = @_;
 223     my $rt = _Comm_split($comm, $colour, $key);
 224     if ($colour < 0) {
 225         return undef;
 226     }
 227     else {
 228         return $rt;
 229     }
 230 }
 231
 232 =head2 MPI_Comm_free
 233
 234     MPI_Comm_free($comm, [$comm2, ...] );
 235
 236 Frees the underlying object in communicator $comm, do not attempt to
 237 do this to MPI_COMM_WORLD, wise to do this for any other comminicators
 238 that you have created.  If given a list of comminicators, will free
 239 all of them, make sure there are no duplicates...
 240
 241 =cut
 242
 243 sub Comm_free {
 244     _Comm_free($_) foreach @_;
 245 }
 246
 247 =head1 Communications operations
 248
 249 =head2 MPI_Barrier
 250
 251   MPI_Barrier($comm);
 252
 253 Waits for every process in $comm to call MPI_Barrier, once done, all
 254 continue to execute.  This causes synchronisation of processes.  Be
 255 sure that every process does call this, else your computation will
 256 hang.
 257
 258 =head2 MPI_Send
 259
 260   MPI_Send($scalar, $dest, $msg_tag, $comm);
 261
 262 This takes a scalar (which can be an anonymous reference to a more
 263 complicated data structure) and sends it to process with rank $dest in
 264 communicator $comm.  The message also carries $msg_tag as an
 265 identfier, allowing nodes to receive and send out of order.
 266 Completion of this call does not imply anything about the progress of
 267 the receiving node.
 268
 269 =cut
 270
 271 sub Send {
 272   # my ($ref,$dest,$tag,$comm) = @_;
 273   my $stor = nfreeze(\$_[0]);
 274   _Send($stor, $_[1], $_[2], $_[3]);
 275 }
 276
 277 =head2 MPI_Recv
 278
 279  $scalar = MPI_Recv($source, $msg_tag, $comm);
 280
 281 Receives a scalar from another process.  $source and $msg_tag must both
 282 match a message sent via MPI_Send (or one which will be sent in future)
 283 to the same communicator given by $comm.
 284
 285  if ($rank == 0) {
 286    MPI_Send([qw(This is a message)], 1, 0, MPI_COMM_WORLD);
 287  }
 288  elsif ($rank == 1) {
 289    my $msg = MPI_Recv(1,0,MPI_COMM_WORLD);
 290    print join(' ', @{ $msg } );
 291  }
 292
 293 Will output "This is a message".  Messages with the same source,
 294 destination, tag and comminicator will be delivered in the order in
 295 which they were sent.  No other guarantees of timeliness or ordering
 296 can be given.  If needed, use C<MPI_Barrier>.
 297
 298 C<$source> can be C<MPI_ANY_SOURCE> which will do what it says.
 299
 300 =cut
 301
 302 sub Recv {
 303   my $out;
 304   my ($source, $tag, $comm, $status) = @_;
 305   $out = _Recv($source, $tag, $comm, $status);
 306   return ${thaw($out)};
 307 }
 308
 309 =head2 MPI_Bcast
 310
 311  $data = MPI_Bcast($scalar, $root, $comm);
 312
 313 This sends $scalar in process $root from the root process to every
 314 other process in $comm, returning this scalar in every process.  All
 315 non-root processes should provide a dummy message (such as C<undef>),
 316 this is a bit ugly, but maintains a consistant interface between the
 317 other communication operations.  The scalar can be a complicated data
 318 structure.
 319
 320   if ($rank == 0) { # send from 0
 321     my $msg = [1,2,3, {3=>1, 5=>6}  ];
 322     MPI_Bcast( $msg, 0, MPI_COMM_WORLD);
 323   }
 324   else { # everything else receives, note dummy message
 325     my $msg = MPI_Bcast(undef, 0, MPI_COMM_WORLD);
 326   }
 327
 328 =cut
 329
 330 sub Bcast {
 331   my $out;
 332   # my ($data, $from, $comm) = @_;
 333   my $data = nfreeze(\$_[0]);
 334   $out = _Bcast($data, $_[1], $_[2]);
 335   return ${thaw($out)};
 336 }
 337
 338 =head2 MPI_Gather
 339
 340  # if root:
 341  @list = MPI_Gather($scalar, $root, $comm);
 342  #otherwise
 343  (nothing) = MPI_Gather($scalar, $root, $comm);
 344
 345 Sends $scalar from every process in $comm (each $scalar can be
 346 different, root's data is also sent) to the root process which
 347 collects them as a list of scalars, sorted by process rank order in
 348 $comm.
 349
 350 =cut
 351 #'
 352 sub Gather {
 353   # my ($ref, $root, $comm) = @_;
 354   my @rt;
 355   my $data = nfreeze(\$_[0]);
 356   foreach (@{ _Gather($data, $_[1], $_[2]) }) {
 357      push @rt, ${thaw($_)};
 358   }
 359   return @rt;
 360 }
 361
 362 =head2 MPI_Scatter
 363
 364  $data = MPI_Scatter([N items of data], $root, $comm);
 365
 366 Sends list of scalars (anon array as 1st arg) from $root to all
 367 processes in $comm, with process of rank N-1 receiving the Nth item in
 368 the array.  Very bad things might happen if number of elements in
 369 array != N.  This does not call the C function at any time, so do not
 370 expect any implicit synchronisation.
 371
 372 =cut
 373
 374 sub Scatter {
 375   my ($aref, $root, $comm) = @_;
 376   if (Comm_rank($comm) == $root) {
 377     for my $i (0..@$aref-1) {
 378       next if $i == $root;
 379       Send($aref->[$i], $i, 11002, $comm);
 380     }
 381     $aref->[$root];
 382   }
 383   else {
 384     Recv($root, 11002, $comm);
 385   }
 386 }
 387
 388 =head2 MPI_Allgather
 389
 390  @list = MPI_Allgather($scalar, $comm);
 391
 392 Every process receives an ordered list containing an element from every
 393 other process.  Again, this is implemented without a call to the C function.
 394
 395 =cut
 396
 397 sub Allgather {
 398   # my ($data, $comm) = @_;
 399   my @rt;
 400   my $frozen = nfreeze(\$_[0]);
 401   for my $i (0..Comm_size($_[1])-1) {
 402     push @rt, ${ thaw(_Bcast($frozen, $i, $_[1])) };
 403   }
 404   return @rt;
 405 }
 406
 407 =head2 MPI_Alltoall
 408
 409  @list = MPI_Alltoall([ list of scalars ], $comm);
 410
 411 Simillar to Allgather, each process (with rank I<rank>) ends up with a
 412 list such that element I<i> contains the data which started in element
 413 I<rank> of process I<i>s data.
 414
 415 =cut
 416
 417 sub Alltoall {
 418   my ($data, $comm) = @_;
 419   my ($rank, $size) = (Comm_rank($comm), Comm_size($comm));
 420
 421   my @rt;
 422   foreach (0..$size-1) {
 423     next if $_ eq $rank;
 424     Send($data->[$_], $_, 1, $comm);
 425   }
 426   foreach (0..$size-1) {
 427     if ($_ eq $rank) {
 428       push @rt, $data->[$_]; next;
 429     }
 430     push @rt, Recv($_, 1, $comm);
 431   }
 432   return @rt;
 433 }
 434
 435 =head2 MPI_Reduce
 436
 437  $value = MPI_Reduce($input, \&operation, $comm);
 438
 439 Every process receives in $value the result of performing &operation
 440 between every processes $input.  If there are three processes in
 441 $comm, then C<$value = $input_0 op $input_1 op $input_2>.
 442
 443 Operation should be a sub which takes two scalar values (the $input
 444 above) and returns a single value.  The operation it performs should
 445 be commutative and associative, otherwise the result will be undefined.
 446
 447 For instance, to return the sum of some number held by each process, perform:
 448
 449  $sum = MPI_Reduce($number, sub {$_[0] + $_[1]}, $comm);
 450
 451 To find which process holds the greatest value of some number:
 452
 453  ($max, $mrank) = @{ MPI_Reduce([$number, $rank],
 454                        sub { $_[0]->[0] > $_[1]->[0] ? $_[0] : $_[1]}
 455                          , $comm) };
 456
 457 =cut
 458
 459 # This version is deprecated, but may be faster
 460 sub Reduce2 {
 461   my ($ref, $code, $comm) = @_;
 462   my ($rank, $size) = (Comm_rank($comm), Comm_size($comm));
 463   my $rt;
 464   Barrier($comm); # safety first
 465   if ($rank != 0) {
 466     Send($ref, 0, 1, $comm);
 467     $rt = Recv(0,1,$comm);
 468   }
 469   else {
 470     $rt = $ref;
 471     for (1..$size-1) {
 472       $rt = &$code($rt, Recv($_,1,$comm));
 473     }
 474     for (1..$size-1) {
 475       Send($rt, $_,1,$comm);
 476     }
 477   }
 478   return $rt;
 479 }
 480
 481 # This should be O(log(P)) in calc and comm
 482 # This version first causes odds to send to evens which reduce, then etc.
 483 sub Reduce {
 484   my ($val, $code, $comm) = @_;
 485   my ($rank, $size) = (Comm_rank($comm), Comm_size($comm));
 486   my $rt = $val;
 487   my @nodes = (0..$size-1);
 488   while (@nodes>1) {
 489     $#nodes += @nodes % 2;
 490     my %from = @nodes;
 491     my %to = reverse %from;
 492     if ($from{$rank}) { # I'm receiving something
 493       $rt = &$code($rt, Recv($from{$rank}, 1, $comm));
 494     }
 495     elsif (defined($to{$rank})) {# I'm sending something
 496       Send($rt, $to{$rank}, 1, $comm);
 497     }
 498     @nodes = sort {$a <=> $b} keys %from;
 499   }
 500   # node 0 only to distribute via Broadcast
 501   Bcast($rt, 0, $comm);
 502 }
 503
 504 1; # I am the ANTI-POD!
 505
 506 =head1 PHILOSOPHY
 507
 508 I have decided to loosely follow the MPI calling and naming
 509 conventions but do not want to stick strictly to them in all cases.
 510 In the interests of being simple, I have decided that all errors
 511 should result in the death of the MPI process rather than huge amounts
 512 of error checking being foisted onto the module's user.
 513
 514 Many of the MPI functions have not been implemented, some of this is
 515 because I feel they would complicate the module (I chose to have a
 516 single version of the Send command, for instance) but some of this is
 517 due to my not having finished yet.  I certainly do not expect to
 518 provide process topologies or inter-communicators, I also do not
 519 expect to provide anything in MPI-2 for some time.
 520
 521 =head1 ISSUES
 522
 523 This module has been tested on a variety of platforms.  I have not
 524 been able to get it running with the mpich MPI implementation in
 525 a clustered environment.
 526
 527 In general, I expect that most programs using this module will make
 528 use of little other than C<MPI_Init>, C<MPI_Send>, C<MPI_Recv>,
 529 C<MPI_COMM_WORLD>, C<MPI_Barrier>, C<MPI_Comm_size>, C<MPI_Comm_rank>
 530 and C<MPI_Finalize>.
 531
 532 Please send bugs to github: L<https://github.com/quidity/p5-parallel-mpi-simple/issues>
 533
 534 =head1 AUTHOR
 535
 536   Alex Gough (alex@earth.li)
 537
 538 =head1 COPYRIGHT
 539
 540   This module is copyright (c) Alex Gough, 2001,2011.
 541
 542   You may use and redistribute this software under the Artistic License as
 543   supplied with Perl.
 544
 545 =cut
 546
 547 __DATA__
 548 __C__
 549 #include <mpi.h>
 550 #define GATHER_TAG 11001 /* used to be unlikely to upset other sends */
 551
 552 /*
 553   root process first broadcasts length of stored data then broadcasts
 554   the data.  Non-root processes receive length (via bcast), allocate
 555   space to take incomming data from root
 556
 557   Both root and non-root processes then create and return a new scalar
 558   with contents identical to those root started with.
 559 */
 560
 561 SV* _Bcast (SV* data, int root, SV* comm) {
 562   int buf_len[1];
 563   int rank;
 564   SV* rval;
 565   MPI_Comm_rank((MPI_Comm)SvIVX(comm), &rank);
 566   if (rank == root) {
 567     buf_len[0] = sv_len(data);
 568     MPI_Bcast(buf_len, 1, MPI_INT, root, (MPI_Comm)SvIVX(comm));
 569     MPI_Bcast(SvPVX(data), buf_len[0], MPI_CHAR, root, (MPI_Comm)SvIVX(comm));
 570     rval = newSVpvn(SvPVX(data), buf_len[0]);
 571   }
 572   else {
 573     char *recv_buf;
 574     MPI_Bcast(buf_len, 1, MPI_INT, root, (MPI_Comm)SvIVX(comm));
 575     recv_buf = (char*)malloc((buf_len[0]+1)*sizeof(char));
 576     if (recv_buf == NULL) croak("Allocation error in _Bcast");
 577     MPI_Bcast(recv_buf, buf_len[0], MPI_CHAR, root, (MPI_Comm)SvIVX(comm));
 578     rval = newSVpvn(recv_buf, buf_len[0]);
 579     free(recv_buf);
 580   }
 581   return rval;
 582 }
 583
 584 /*
 585   Finds length of data in stor_ref, sends this to receiver, then
 586   sends actual data, uses same tag for each message.
 587 */
 588
 589 int _Send(SV* stor_ref, int dest, int tag, SV*comm) {
 590   int str_len[1];
 591   str_len[0] = sv_len(stor_ref);
 592   MPI_Send(str_len, 1, MPI_INT, dest, tag, (MPI_Comm)SvIVX(comm));
 593   MPI_Send(SvPVX(stor_ref), sv_len(stor_ref),MPI_CHAR,
 594            dest, tag, (MPI_Comm)SvIVX(comm));
 595   return 0;
 596 }
 597
 598 /*
 599   Receives int for length of data it should then expect, allocates space
 600   then receives data into that space.  Creates a new SV and returns it.
 601 */
 602
 603 SV* _Recv (int source, int tag, SV*comm, SV*status) {
 604   MPI_Status tstatus;
 605   SV* rval;
 606   int len_buf[1];
 607   char *recv_buf;
 608
 609   MPI_Recv(len_buf, 1, MPI_INT, source, tag, (MPI_Comm)SvIVX(comm), &tstatus);
 610   recv_buf = (char*)malloc((len_buf[0]+1)*sizeof(char));
 611   if (recv_buf == NULL) croak("Allocation error in _Recv");
 612   MPI_Recv(recv_buf, len_buf[0], MPI_CHAR, source, tag,
 613             (MPI_Comm)SvIVX(comm), &tstatus);
 614   rval = newSVpvn(recv_buf, len_buf[0]);
 615   sv_setiv(status, tstatus.MPI_SOURCE);
 616   free(recv_buf);
 617   return rval;
 618 }
 619
 620 /* Calls MPI_Init with dummy arguments, a bit dodgy but sort of ok */
 621 int Init () {
 622   MPI_Init((int) NULL, (char ***)NULL);
 623 }
 624
 625 /* Returns rank of process within comm */
 626 int _Comm_rank (SV* comm) {
 627   int trank;
 628   MPI_Comm_rank((MPI_Comm)SvIVX(comm),&trank);
 629   return trank;
 630 }
 631
 632 /* returns total number of processes within comm */
 633 int _Comm_size (SV* comm) {
 634   int tsize;
 635   MPI_Comm_size((MPI_Comm)SvIVX(comm), &tsize);
 636   return tsize;
 637 }
 638
 639 /* returns SV whose IV slot is a cast pointer to the MPI_COMM_WORLD object */
 640 SV* COMM_WORLD () {
 641   return newSViv((IV)MPI_COMM_WORLD);
 642 }
 643
 644 /* calls MPI_Barrier for comm */
 645 int Barrier (SV*comm) {
 646   MPI_Barrier((MPI_Comm)SvIVX(comm));
 647 }
 648
 649 /* ends MPI participation */
 650 int Finalize () {
 651   MPI_Finalize();
 652 }
 653
 654 /*
 655   If non-root:  participates in Gather so that root finds length of data
 656                 to expect from this process.  Then send (using MPI_Send)
 657                 data to root.
 658
 659   If root: receives array of ints detailing length of scalars held by
 660    other processes, then receives from each in turn (using MPI_Recv)
 661    returns an array ref to root process only.
 662
 663  */
 664 SV* _Gather (SV* data, int root, SV* comm) {
 665   int rank, size, *buf_lens, i, max;
 666   char* recv_buf;
 667   int my_buf[1];
 668   AV* ret_arr;
 669   MPI_Status tstatus;
 670
 671   /* find out how long data is */
 672   ret_arr = av_make(0,(SV**)NULL);
 673   my_buf[0] = sv_len(data);
 674   if (_Comm_rank(comm) == root) {
 675     MPI_Comm_size((MPI_Comm)SvIVX(comm), &size);
 676     buf_lens = malloc(size*sizeof(int));
 677     if (buf_lens == NULL) croak("Allocation error (lens) in _Gather");
 678     /* gather all scalar length data */
 679     MPI_Gather(my_buf, 1, MPI_INT, buf_lens, 1,
 680                MPI_INT, root, (MPI_Comm)SvIVX(comm));
 681     max = 0; // keep buffer allocation calls to minimum
 682     for (i=0;i<size;i++) {
 683       max = max < buf_lens[i] ? buf_lens[i] : max;
 684     }
 685     recv_buf = malloc(max * sizeof(char));
 686     if (recv_buf == NULL) croak("Allocation error (recv) in _Gather");
 687     for (i=0;i<size;i++) {
 688       if (i == root) {
 689         av_push(ret_arr, data);
 690         continue; /* me, no point sending */
 691       }
 692       MPI_Recv(recv_buf, buf_lens[i], MPI_CHAR, i, GATHER_TAG,
 693                (MPI_Comm)SvIVX(comm), &tstatus );
 694       av_push(ret_arr, sv_2mortal( newSVpvn(recv_buf, buf_lens[i]) ) );
 695     }
 696     free(recv_buf);
 697     free(buf_lens);
 698   }
 699   else {
 700     /* send out how long my scalar data is */
 701       MPI_Gather(my_buf, 1, MPI_INT, buf_lens, 1,
 702                MPI_INT, root, (MPI_Comm)SvIVX(comm) );
 703     /* send out my scalar data as normal send with tag of ???? */
 704       MPI_Send(SvPVX(data), my_buf[0], MPI_CHAR,
 705                root, GATHER_TAG,(MPI_Comm)SvIVX(comm));
 706   }
 707
 708   return newRV_inc((SV*)ret_arr);
 709 }
 710
 711 /* compares two communicators, translates MPI constants into something I
 712    can use as constants in the module interface */
 713 int _Comm_compare(SV* comm1, SV* comm2) {
 714     int result = 0;
 715     MPI_Comm_compare((MPI_Comm)SvIVX(comm1), (MPI_Comm)SvIVX(comm2), &result);
 716     switch (result) {
 717         case MPI_IDENT:
 718                        return(1);
 719         case MPI_CONGRUENT:
 720                        return(2);
 721         case MPI_SIMILAR:
 722                        return(3);
 723         case MPI_UNEQUAL:
 724                        return(0);
 725         default:
 726                        return(0);
 727     }
 728 }
 729
 730 /* frees a communicator, once all pending communication has taken place */
 731 void _Comm_free (SV* comm) {
 732     MPI_Comm_free((MPI_Comm*)&SvIVX(comm));
 733     if ((MPI_Comm)SvIVX(comm) != MPI_COMM_NULL)
 734         croak("Communicator not freed properly\n");
 735 }
 736
 737 SV* _Comm_dup (SV*comm) {
 738     MPI_Comm newcomm;
 739     MPI_Comm_dup((MPI_Comm)SvIVX(comm), &newcomm);
 740     return newSViv((IV)newcomm);
 741 }
 742
 743 SV* _Comm_split (SV* comm, int colour, int key) {
 744     MPI_Comm newcomm;
 745     int realcolour;
 746     MPI_Comm_split((MPI_Comm)SvIVX(comm),
 747                     (colour < 0 ? MPI_UNDEFINED : colour),
 748                     key, &newcomm);
 749     return newSViv((IV)newcomm);
 750 }
 751
 752 __END__
 753
 754