From 82ddd5da038e091cfe0bf3df8092e845c16213e7 Mon Sep 17 00:00:00 2001
From: Don Armstrong <don@donarmstrong.com>
Date: Tue, 13 Mar 2012 15:13:04 -0700
Subject: [PATCH] Import original source of Parallel-MPI-Simple 0.10

---
 Changes     |  11 +
 INSTALL     |  30 +++
 MANIFEST    |   9 +
 META.yml    |  21 ++
 Makefile.PL |  83 ++++++
 Simple.pm   | 754 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 Simple.xs   | 303 +++++++++++++++++++++
 ic.pl       | 183 +++++++++++++
 test.pl     |  17 ++
 9 files changed, 1411 insertions(+)
 create mode 100644 Changes
 create mode 100644 INSTALL
 create mode 100644 MANIFEST
 create mode 100644 META.yml
 create mode 100644 Makefile.PL
 create mode 100644 Simple.pm
 create mode 100644 Simple.xs
 create mode 100755 ic.pl
 create mode 100644 test.pl

diff --git a/Changes b/Changes
new file mode 100644
index 0000000..3d98713
--- /dev/null
+++ b/Changes
@@ -0,0 +1,11 @@
+Revision history for Perl extension Parallel::MPI::Simple.
+
+0.01  Thu Nov  1 22:46:27 2001
+	- original version, played with by self, not for release.
+0.04  2011-06-06
+        - Fixes to find things when mpicc available, might quieten bugs
+0.05  2011-06-06
+        - Heroic efforts get this working on windows.  Because you can.
+
+0.10  2011-08-19
+        - Added MPI_ANY_SOURCE
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..d914422
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,30 @@
+This module tries to compile itself with "-lmpi", some systems require
+additional compiler flags when building and linking MPI applications.
+
+If mpicc is present, this module tries to use `mpicc -compile-info` and
+`mpicc -link-info` to work out how to compile and link itself.  You may
+need to make sure that mpicc is in your path for this to work during
+installation.
+
+You may need to ensure that an appropriate daemon is running before
+using mpirun/mpiexec.  mpich2 requires that mpd is running.
+
+If `make && make test` doesn't work, try specifying CCFLAGS and
+LDFLAGS and LIBS correctly and re-run Makefile.PL.
+
+Some systems might be a lot happier with the module if you build a static
+perl, instead of one which dynamically loads the C portion of this module.
+You will also need to statically link the Storable module.  See the
+ExtUtils::MakeMaker manpage for more details.
+
+Windows: This works, but the Makefile is generated incorrectly by Makemaker,
+so you might need to hand edit it to get it working, by searching through for
+repeated {{ characters where there should be none.  I do not know why this is.
+
+### Working systems
+SGI	- IRIX64 6.5 IP30 -  2 nodes
+SGI	- IRIX64 6.5 IP27 - 16 nodes
+CRAY	- 2.0.5.55 unicosmk CRAY T3E
+FreeBSD	- FreeBSD4.4 w/lam
+Linux	- Debian with mpich2
+Windows - Windows7 with mpich2 v1.1.1 (some later mpich versions fail to include libmpi.a properly, once they fix this, this should work)
\ No newline at end of file
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..bbb59b1
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,9 @@
+INSTALL
+Changes
+Makefile.PL
+MANIFEST
+Simple.pm
+Simple.xs
+ic.pl
+test.pl
+META.yml                                 Module meta-data (added by MakeMaker)
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..2b83e58
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,21 @@
+--- #YAML:1.0
+name:               Parallel-MPI-Simple
+version:            0.10
+abstract:           ~
+author:  []
+license:            unknown
+distribution_type:  module
+configure_requires:
+    ExtUtils::MakeMaker:  0
+build_requires:
+    ExtUtils::MakeMaker:  0
+requires:
+    Storable:  0
+no_index:
+    directory:
+        - t
+        - inc
+generated_by:       ExtUtils::MakeMaker version 6.55_02
+meta-spec:
+    url:      http://module-build.sourceforge.net/META-spec-v1.4.html
+    version:  1.4
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..a50377a
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,83 @@
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+
+# Make sure people aren't being stupid... it's been known to happen.
+
+print <<ENDOFMESSAGE;
+I'm about to build the Makefile for Parallel::MPI::Simple.
+
+If you have mpicc installed, I will attempt to use the arguements
+it suggests to compile and link the module.  You will also have to
+have already started mpd for the tests to work.
+
+If you find you cannot build this module, or that the tests fail, you
+may be on an odd system where you need to do more than pass -lmpi to
+your C compiler to build MPI applications.  If this is the case, rerun
+perl Makefile.PL with appropriate LDFLAGS= and CCFLAGS= arguments.
+
+ENDOFMESSAGE
+
+my $mpirun = "";
+my ($libs, $ccflags) = ("", "");
+
+if ($ENV{FORCE_MPIRUN}) {
+    $mpirun = $ENV{FORCE_MPIRUN};
+}
+else {
+    foreach my $mpi_try (qw(mpiexec mpirun)) {
+	my $test = join("",`$mpi_try -n 1 echo test`);
+	$mpirun = $mpi_try unless $! =~ /No such file or directory/;
+	last if $mpirun;
+    }
+    
+    if (!$mpirun) {
+	print STDERR "Failed to run `$mpirun -n 1 echo test`: $!\n";
+	print STDERR "It looks like you don't have mpi installed, re-run build with env: FORCE_MPIRUN=1 to ignore\n";
+	exit(0);
+    }
+}
+
+# try to get this information from mpicc, should quietly fail if not
+if ($^O !~ /MSWin32/) {
+    my @lines = `mpicc -help`;
+    foreach my $line (@lines) {
+	if ($line =~ /-link-info/) {
+	    my $linkinfo = `mpicc -link-info`;
+	    my @entries = split (/\s+/, $linkinfo);
+	    $libs = join(" ", grep {/^-[Ll]/} @entries);
+	}
+	if ($line =~ /-compile-info/) {
+	    my $compileinfo = `mpicc -compile-info`;
+	    my @entries = split (/\s+/, $compileinfo);
+	    $ccflags = join(" ", grep {/^-[ILl]/} @entries);
+	}
+    }
+    if ($libs || $ccflags) {
+	print STDERR "guessing values for\n LIBS:$libs\n CCFLAGS:$ccflags\n";
+    }
+    else {
+	print STDERR "Could not determine options for mpicc, using defaults\n";
+    }
+}
+else { # windows, look for mpich2
+  my @potential = map {"C:\\$_\\MPICH2"}
+    'Program Files',
+      'Program Files (x86)';
+  foreach my $try (@potential) {
+    if (-f "$try\\bin\\mpiexec.exe") {
+      $mpirun = "$try\\bin\\mpiexec.exe";
+      $libs = qq{"-L$try\\lib" -lmpi};
+      $ccflags = qq{"-I$try\\include" "-L$try\\lib" -lmpi};
+    }
+  }
+}
+
+# Check that people have tried to define linking and compiling options...
+WriteMakefile(
+    'NAME'	=> 'Parallel::MPI::Simple',
+    'VERSION_FROM' => 'Simple.pm', # finds $VERSION
+    'PREREQ_PM' => {Storable => 0},
+    'LIBS' => $libs || "-lmpi",
+    'CCFLAGS' => $ccflags || "",
+);
diff --git a/Simple.pm b/Simple.pm
new file mode 100644
index 0000000..3733eb7
--- /dev/null
+++ b/Simple.pm
@@ -0,0 +1,754 @@
+package Parallel::MPI::Simple;
+
+use strict;
+require DynaLoader;
+use vars qw(@ISA $VERSION);
+use Storable qw(nfreeze thaw);
+
+@ISA = qw(DynaLoader);
+$VERSION = '0.10';
+
+bootstrap Parallel::MPI::Simple;
+
+# evil, but makes everything MPI_*, which is sort of expected
+sub import {
+    my $call = (caller())[0];
+    no strict 'refs';
+    # subs (MPI_ function calls)
+    foreach (qw(Init Finalize COMM_WORLD ANY_SOURCE Comm_rank Comm_size
+		Recv Send Barrier Bcast Gather
+		Scatter Allgather Alltoall Reduce
+		Comm_compare Comm_dup Comm_free Comm_split
+	       )) {
+	*{$call.'::MPI_'.$_} = \&$_;
+    }
+    # flags (variables)
+    *{$call.'::MPI_'.$_} = \&$_
+	foreach (qw(IDENT CONGRUENT SIMILAR UNEQUAL UNDEFINED));
+}
+
+=head1 NAME
+
+ Parallel::MPI::Simple
+
+=head1 SYNOPSIS
+
+ mpirun -np 2 perl script.pl
+
+ #!perl
+ use Parallel::MPI::Simple;
+ MPI_Init();
+ my $rank = MPI_Comm_rank(MPI_COMM_WORLD);
+ if ($rank == 1) {
+   my $msg = "Hello, I'm $rank";
+   MPI_Send($msg, 0, 123, MPI_COMM_WORLD);
+ }
+ else {
+   my $msg = MPI_Recv(1, 123, MPI_COMM_WORLD);
+   print "$rank received: '$msg'\n";
+ }
+ MPI_Finalise();
+
+=head1 COMPILING AND RUNNING
+
+Please view the README file in the module tarball if you are having
+trouble compiling or running this module.
+
+=head1 INTRODUCTION
+
+Perl is not a strongly typed language, Perl does not enforce data
+structures of a fixed size or dimensionality, Perl makes things easy.
+Parallel processing solves problems faster and is commonly programmed
+using a message passing paradigm.  Traditional message passing systems
+are designed for strongly typed languages like C or Fortran, there
+exist implementations of these for Perl but they concentrate on
+perfectly mimicing the standards forcing the poor Perl programmer to
+use strongly typed data despite all his best instincts.
+
+This module provides a non-compliant wrapper around the widely
+implemented MPI libraries, allowing messages to consist of arbitarily
+nested Perl data structures whose size is limited by available memory.
+This hybrid approach should allow you to quickly write programs which
+run anywhere which supports MPI (both Beowulf and traditional MPP
+machines).
+
+=head1 Message Passing and Multiprocessing
+
+The message passing paradigm is simple and easy to use.  Multiple
+versions of the same program are run on multiple processors (or
+nodes).  Each running copy should call C<MPI_Init> to announce that it
+is running.  It can then find out who it is by calling
+C<MPI_Comm_rank> and who else it can talk to by calling
+C<MPI_Comm_size>.  Using this information to decide what part it is to
+play in the ensuing computation, it the  exchanges messages, or
+parcels of data, with other nodes allowing all to cooperate.
+
+Once the computation is finished, the node calls C<MPI_Finalize> and
+exits cleanly, ready to run another day.
+
+These processes are all copies of the I<same> perl script and are invoked
+using: C<mpirun -np [number of nodes] perl script.pl> .
+
+Remember you may need to start a daemon before mpirun will work, for
+C<mpich> this is often as easy as running: C<mpd &>.
+
+=head1 Starting and Stopping a process
+
+A process must formally enter and leave the MPI pool by calling these
+functions.
+
+=head2 MPI_Init
+
+  MPI_Init()
+
+Initialises the message passing layer.  This should be the first C<MPI_*>
+call made by the program and ideally one of the first things the
+program does.  After completing this call, all processes will be
+synchronised and will become members of the C<MPI_COMM_WORLD>
+communicator.  It is an error for programs invoked with C<mpirun> to
+fail to call C<MPI_Init> (not to mention being a little silly).
+
+=head2 MPI_Finalize
+
+  MPI_Finalize()
+
+Shuts down the message passing layer.  This should be called by every
+participating process before exiting.  No more C<MPI_*> calls may be made
+after this function has been called.  It is an error for a program to
+exit I<without> calling this function.
+
+=head1 Communicators
+
+All processes are members of one or more I<communicators>.  These are
+like channels over which messages are broadcast.  Any operation
+involving more than one process will take place in a communicator,
+operations involving one communicator will not interfere with those in
+another.
+
+On calling C<MPI_Init> all nodes automatically join the
+C<MPI_COMM_WORLD> communicator.  A communicator can be split into
+smaller subgroups using the C<MPI_Comm_split> function.
+
+=head2 MPI_COMM_WORLD
+
+ $global_comm = MPI_COMM_WORLD;
+
+Returns the global communicator shared by all processes launched at
+the same time.  Can be used as a "constant" where a communicator is
+required.  Most MPI applications can get by using only this
+communicator.
+
+=head2 MPI_Comm_rank
+
+  $rank = MPI_Comm_rank($comm);
+
+Returns the rank of the process within the communicator given by
+$comm.  Processes have ranks from 0..(size-1).
+
+=cut
+
+sub Comm_rank {
+  _Comm_rank($_[0]);
+}
+
+
+=head2 MPI_Comm_size
+
+  $size = MPI_Comm_size($comm);
+
+Returns the number of processes in communicator $comm.
+
+=cut
+
+sub Comm_size {
+  _Comm_size($_[0]);
+}
+
+=head2 MPI_Comm_compare
+
+    $result = MPI_Comm_compare($comm1, $comm2);
+
+Compares the two communicators $comm1 and $comm2.  $result will be equal
+to:
+
+  MPI_IDENT    : communicators are identical
+  MPI_CONGRUENT: membership is same, ranks are equal
+  MPI_SIMILAR  : membership is same, ranks not equal
+  MPI_UNEQUAL  : at least one member of one is not in the other
+
+=cut
+
+sub IDENT     () { 1 }
+sub CONGRUENT () { 2 }
+sub SIMILAR   () { 3 }
+sub UNEQUAL   () { 0 }
+
+sub Comm_compare {
+    my ($c1, $c2) = (@_);
+    _Comm_compare($c1, $c2);
+}
+
+=head2 MPI_Comm_dup
+
+    $newcomm = MPI_Comm_dup($comm);
+
+Duplicates $comm but creates a new context for messages.
+
+=cut
+
+sub Comm_dup {
+    my ($comm) = @_;
+    _Comm_dup($comm);
+}
+
+=head2 MPI_Comm_split
+
+    $newcomm = MPI_Comm_split($comm, $colour, $key);
+
+Every process in $comm calls C<MPI_Comm_split> at the same time.  A
+new set of communicators is produced, one for each distinct value of
+$colour.  All those processes which specified the same value of
+$colour end up in the same comminicator and are ranked on the values
+of $key, with their original ranks in $comm being used to settle ties.
+
+If $colour is negative (or C<MPI_UNDEFINED>), the process will not be
+allocated to any of the new communicators and C<undef> will be
+returned.
+
+=cut
+
+sub UNDEFINED () { -1 }
+sub Comm_split {
+    my ($comm, $colour, $key) = @_;
+    my $rt = _Comm_split($comm, $colour, $key);
+    if ($colour < 0) {
+	return undef;
+    }
+    else {
+	return $rt;
+    }
+}
+
+=head2 MPI_Comm_free
+
+    MPI_Comm_free($comm, [$comm2, ...] );
+
+Frees the underlying object in communicator $comm, do not attempt to
+do this to MPI_COMM_WORLD, wise to do this for any other comminicators
+that you have created.  If given a list of comminicators, will free
+all of them, make sure there are no duplicates...
+
+=cut
+
+sub Comm_free {
+    _Comm_free($_) foreach @_;
+}
+
+=head1 Communications operations
+
+=head2 MPI_Barrier
+
+  MPI_Barrier($comm);
+
+Waits for every process in $comm to call MPI_Barrier, once done, all
+continue to execute.  This causes synchronisation of processes.  Be
+sure that every process does call this, else your computation will
+hang.
+
+=head2 MPI_Send
+
+  MPI_Send($scalar, $dest, $msg_tag, $comm);
+
+This takes a scalar (which can be an anonymous reference to a more
+complicated data structure) and sends it to process with rank $dest in
+communicator $comm.  The message also carries $msg_tag as an
+identfier, allowing nodes to receive and send out of order.
+Completion of this call does not imply anything about the progress of
+the receiving node.
+
+=cut
+
+sub Send {
+  # my ($ref,$dest,$tag,$comm) = @_;
+  my $stor = nfreeze(\$_[0]);
+  _Send($stor, $_[1], $_[2], $_[3]);
+}
+
+=head2 MPI_Recv
+
+ $scalar = MPI_Recv($source, $msg_tag, $comm);
+
+Receives a scalar from another process.  $source and $msg_tag must both
+match a message sent via MPI_Send (or one which will be sent in future)
+to the same communicator given by $comm.
+
+ if ($rank == 0) {
+   MPI_Send([qw(This is a message)], 1, 0, MPI_COMM_WORLD); 
+ }
+ elsif ($rank == 1) {
+   my $msg = MPI_Recv(1,0,MPI_COMM_WORLD);
+   print join(' ', @{ $msg } );
+ }
+
+Will output "This is a message".  Messages with the same source,
+destination, tag and comminicator will be delivered in the order in
+which they were sent.  No other guarantees of timeliness or ordering
+can be given.  If needed, use C<MPI_Barrier>.
+
+C<$source> can be C<MPI_ANY_SOURCE> which will do what it says.
+
+=cut
+
+sub Recv {
+  my $out;
+  my ($source, $tag, $comm, $status) = @_;
+  $out = _Recv($source, $tag, $comm, $status);
+  return ${thaw($out)};
+}
+
+=head2 MPI_Bcast
+
+ $data = MPI_Bcast($scalar, $root, $comm);
+
+This sends $scalar in process $root from the root process to every
+other process in $comm, returning this scalar in every process.  All
+non-root processes should provide a dummy message (such as C<undef>),
+this is a bit ugly, but maintains a consistant interface between the
+other communication operations.  The scalar can be a complicated data
+structure.
+
+  if ($rank == 0) { # send from 0
+    my $msg = [1,2,3, {3=>1, 5=>6}  ];
+    MPI_Bcast( $msg, 0, MPI_COMM_WORLD);
+  }
+  else { # everything else receives, note dummy message
+    my $msg = MPI_Bcast(undef, 0, MPI_COMM_WORLD);
+  }
+
+=cut
+
+sub Bcast {
+  my $out;
+  # my ($data, $from, $comm) = @_;
+  my $data = nfreeze(\$_[0]);
+  $out = _Bcast($data, $_[1], $_[2]);
+  return ${thaw($out)};
+}
+
+=head2 MPI_Gather
+
+ # if root:
+ @list = MPI_Gather($scalar, $root, $comm);
+ #otherwise
+ (nothing) = MPI_Gather($scalar, $root, $comm);
+
+Sends $scalar from every process in $comm (each $scalar can be
+different, root's data is also sent) to the root process which
+collects them as a list of scalars, sorted by process rank order in
+$comm.
+
+=cut
+#'
+sub Gather {
+  # my ($ref, $root, $comm) = @_;
+  my @rt;
+  my $data = nfreeze(\$_[0]);
+  foreach (@{ _Gather($data, $_[1], $_[2]) }) {
+     push @rt, ${thaw($_)};
+  }
+  return @rt;
+}
+
+=head2 MPI_Scatter
+
+ $data = MPI_Scatter([N items of data], $root, $comm);
+
+Sends list of scalars (anon array as 1st arg) from $root to all
+processes in $comm, with process of rank N-1 receiving the Nth item in
+the array.  Very bad things might happen if number of elements in
+array != N.  This does not call the C function at any time, so do not
+expect any implicit synchronisation.
+
+=cut
+
+sub Scatter {
+  my ($aref, $root, $comm) = @_;
+  if (Comm_rank($comm) == $root) {
+    for my $i (0..@$aref-1) {
+      next if $i == $root;
+      Send($aref->[$i], $i, 11002, $comm);
+    }
+    $aref->[$root];
+  }
+  else {
+    Recv($root, 11002, $comm);
+  }
+}
+
+=head2 MPI_Allgather
+
+ @list = MPI_Allgather($scalar, $comm);
+
+Every process receives an ordered list containing an element from every
+other process.  Again, this is implemented without a call to the C function.
+
+=cut
+
+sub Allgather {
+  # my ($data, $comm) = @_;
+  my @rt;
+  my $frozen = nfreeze(\$_[0]);
+  for my $i (0..Comm_size($_[1])-1) {
+    push @rt, ${ thaw(_Bcast($frozen, $i, $_[1])) };
+  }
+  return @rt;
+}
+
+=head2 MPI_Alltoall
+
+ @list = MPI_Alltoall([ list of scalars ], $comm);
+
+Simillar to Allgather, each process (with rank I<rank>) ends up with a
+list such that element I<i> contains the data which started in element
+I<rank> of process I<i>s data.
+
+=cut
+
+sub Alltoall {
+  my ($data, $comm) = @_;
+  my ($rank, $size) = (Comm_rank($comm), Comm_size($comm));
+
+  my @rt;
+  foreach (0..$size-1) {
+    next if $_ eq $rank;
+    Send($data->[$_], $_, 1, $comm);
+  }
+  foreach (0..$size-1) {
+    if ($_ eq $rank) {
+      push @rt, $data->[$_]; next;
+    }
+    push @rt, Recv($_, 1, $comm);
+  }
+  return @rt;
+}
+
+=head2 MPI_Reduce
+
+ $value = MPI_Reduce($input, \&operation, $comm);
+
+Every process receives in $value the result of performing &operation
+between every processes $input.  If there are three processes in
+$comm, then C<$value = $input_0 op $input_1 op $input_2>.
+
+Operation should be a sub which takes two scalar values (the $input
+above) and returns a single value.  The operation it performs should
+be commutative and associative, otherwise the result will be undefined.
+
+For instance, to return the sum of some number held by each process, perform:
+
+ $sum = MPI_Reduce($number, sub {$_[0] + $_[1]}, $comm);
+
+To find which process holds the greatest value of some number:
+
+ ($max, $mrank) = @{ MPI_Reduce([$number, $rank],
+		       sub { $_[0]->[0] > $_[1]->[0] ? $_[0] : $_[1]}
+			 , $comm) };
+
+=cut
+
+# This version is deprecated, but may be faster
+sub Reduce2 {
+  my ($ref, $code, $comm) = @_;
+  my ($rank, $size) = (Comm_rank($comm), Comm_size($comm));
+  my $rt;
+  Barrier($comm); # safety first
+  if ($rank != 0) {
+    Send($ref, 0, 1, $comm);
+    $rt = Recv(0,1,$comm);
+  }
+  else {
+    $rt = $ref;
+    for (1..$size-1) {
+      $rt = &$code($rt, Recv($_,1,$comm));
+    }
+    for (1..$size-1) {
+      Send($rt, $_,1,$comm);
+    }
+  }
+  return $rt;
+}
+
+# This should be O(log(P)) in calc and comm
+# This version first causes odds to send to evens which reduce, then etc.
+sub Reduce {
+  my ($val, $code, $comm) = @_;
+  my ($rank, $size) = (Comm_rank($comm), Comm_size($comm));
+  my $rt = $val;
+  my @nodes = (0..$size-1);
+  while (@nodes>1) {
+    $#nodes += @nodes % 2;
+    my %from = @nodes;
+    my %to = reverse %from;
+    if ($from{$rank}) { # I'm receiving something
+      $rt = &$code($rt, Recv($from{$rank}, 1, $comm));
+    }
+    elsif (defined($to{$rank})) {# I'm sending something
+      Send($rt, $to{$rank}, 1, $comm);
+    }
+    @nodes = sort {$a <=> $b} keys %from;
+  }
+  # node 0 only to distribute via Broadcast
+  Bcast($rt, 0, $comm);
+}
+
+1; # I am the ANTI-POD!
+
+=head1 PHILOSOPHY
+
+I have decided to loosely follow the MPI calling and naming
+conventions but do not want to stick strictly to them in all cases.
+In the interests of being simple, I have decided that all errors
+should result in the death of the MPI process rather than huge amounts
+of error checking being foisted onto the module's user.
+
+Many of the MPI functions have not been implemented, some of this is
+because I feel they would complicate the module (I chose to have a
+single version of the Send command, for instance) but some of this is
+due to my not having finished yet.  I certainly do not expect to
+provide process topologies or inter-communicators, I also do not
+expect to provide anything in MPI-2 for some time.
+
+=head1 ISSUES
+
+This module has been tested on a variety of platforms.  I have not
+been able to get it running with the mpich MPI implementation in
+a clustered environment.
+
+In general, I expect that most programs using this module will make
+use of little other than C<MPI_Init>, C<MPI_Send>, C<MPI_Recv>,
+C<MPI_COMM_WORLD>, C<MPI_Barrier>, C<MPI_Comm_size>, C<MPI_Comm_rank>
+and C<MPI_Finalize>.
+
+Please send bugs to github: L<https://github.com/quidity/p5-parallel-mpi-simple/issues>
+
+=head1 AUTHOR
+
+  Alex Gough (alex@earth.li)
+
+=head1 COPYRIGHT
+
+  This module is copyright (c) Alex Gough, 2001,2011.
+
+  You may use and redistribute this software under the Artistic License as
+  supplied with Perl.
+
+=cut
+
+__DATA__
+__C__
+#include <mpi.h> 
+#define GATHER_TAG 11001 /* used to be unlikely to upset other sends */
+
+/*
+  root process first broadcasts length of stored data then broadcasts
+  the data.  Non-root processes receive length (via bcast), allocate
+  space to take incomming data from root
+
+  Both root and non-root processes then create and return a new scalar
+  with contents identical to those root started with.
+*/
+
+SV* _Bcast (SV* data, int root, SV* comm) {
+  int buf_len[1];
+  int rank;
+  SV* rval;
+  MPI_Comm_rank((MPI_Comm)SvIVX(comm), &rank);
+  if (rank == root) {
+    buf_len[0] = sv_len(data);
+    MPI_Bcast(buf_len, 1, MPI_INT, root, (MPI_Comm)SvIVX(comm));
+    MPI_Bcast(SvPVX(data), buf_len[0], MPI_CHAR, root, (MPI_Comm)SvIVX(comm));
+    rval = newSVpvn(SvPVX(data), buf_len[0]);
+  }
+  else {
+    char *recv_buf;
+    MPI_Bcast(buf_len, 1, MPI_INT, root, (MPI_Comm)SvIVX(comm));
+    recv_buf = (char*)malloc((buf_len[0]+1)*sizeof(char));
+    if (recv_buf == NULL) croak("Allocation error in _Bcast");
+    MPI_Bcast(recv_buf, buf_len[0], MPI_CHAR, root, (MPI_Comm)SvIVX(comm));
+    rval = newSVpvn(recv_buf, buf_len[0]);
+    free(recv_buf);
+  }
+  return rval;
+}
+
+/*
+  Finds length of data in stor_ref, sends this to receiver, then
+  sends actual data, uses same tag for each message.
+*/
+
+int _Send(SV* stor_ref, int dest, int tag, SV*comm) {
+  int str_len[1];
+  str_len[0] = sv_len(stor_ref);
+  MPI_Send(str_len, 1, MPI_INT, dest, tag, (MPI_Comm)SvIVX(comm));
+  MPI_Send(SvPVX(stor_ref), sv_len(stor_ref),MPI_CHAR,
+	   dest, tag, (MPI_Comm)SvIVX(comm));
+  return 0;
+}
+
+/*
+  Receives int for length of data it should then expect, allocates space
+  then receives data into that space.  Creates a new SV and returns it.
+*/
+
+SV* _Recv (int source, int tag, SV*comm, SV*status) {
+  MPI_Status tstatus;
+  SV* rval;
+  int len_buf[1];
+  char *recv_buf;
+
+  MPI_Recv(len_buf, 1, MPI_INT, source, tag, (MPI_Comm)SvIVX(comm), &tstatus);
+  recv_buf = (char*)malloc((len_buf[0]+1)*sizeof(char));
+  if (recv_buf == NULL) croak("Allocation error in _Recv");
+  MPI_Recv(recv_buf, len_buf[0], MPI_CHAR, source, tag,
+	    (MPI_Comm)SvIVX(comm), &tstatus);
+  rval = newSVpvn(recv_buf, len_buf[0]);
+  sv_setiv(status, tstatus.MPI_SOURCE);
+  free(recv_buf);
+  return rval;
+}
+
+/* Calls MPI_Init with dummy arguments, a bit dodgy but sort of ok */
+int Init () {
+  MPI_Init((int) NULL, (char ***)NULL);
+}
+
+/* Returns rank of process within comm */
+int _Comm_rank (SV* comm) {
+  int trank;
+  MPI_Comm_rank((MPI_Comm)SvIVX(comm),&trank);
+  return trank;
+}
+
+/* returns total number of processes within comm */
+int _Comm_size (SV* comm) {
+  int tsize;
+  MPI_Comm_size((MPI_Comm)SvIVX(comm), &tsize);
+  return tsize;
+}
+
+/* returns SV whose IV slot is a cast pointer to the MPI_COMM_WORLD object */
+SV* COMM_WORLD () {
+  return newSViv((IV)MPI_COMM_WORLD);
+}
+
+/* calls MPI_Barrier for comm */
+int Barrier (SV*comm) {
+  MPI_Barrier((MPI_Comm)SvIVX(comm));
+}
+
+/* ends MPI participation */
+int Finalize () {
+  MPI_Finalize();
+}
+
+/*
+  If non-root:  participates in Gather so that root finds length of data
+                to expect from this process.  Then send (using MPI_Send)
+                data to root.
+
+  If root: receives array of ints detailing length of scalars held by
+   other processes, then receives from each in turn (using MPI_Recv)
+   returns an array ref to root process only.
+  
+ */
+SV* _Gather (SV* data, int root, SV* comm) {
+  int rank, size, *buf_lens, i, max;
+  char* recv_buf;
+  int my_buf[1];
+  AV* ret_arr;
+  MPI_Status tstatus;
+
+  /* find out how long data is */
+  ret_arr = av_make(0,(SV**)NULL);
+  my_buf[0] = sv_len(data);
+  if (_Comm_rank(comm) == root) {
+    MPI_Comm_size((MPI_Comm)SvIVX(comm), &size);
+    buf_lens = malloc(size*sizeof(int));
+    if (buf_lens == NULL) croak("Allocation error (lens) in _Gather");
+    /* gather all scalar length data */
+    MPI_Gather(my_buf, 1, MPI_INT, buf_lens, 1,
+	       MPI_INT, root, (MPI_Comm)SvIVX(comm));
+    max = 0; // keep buffer allocation calls to minimum
+    for (i=0;i<size;i++) {
+      max = max < buf_lens[i] ? buf_lens[i] : max;
+    }
+    recv_buf = malloc(max * sizeof(char));
+    if (recv_buf == NULL) croak("Allocation error (recv) in _Gather");
+    for (i=0;i<size;i++) {
+      if (i == root) {
+	av_push(ret_arr, data);
+	continue; /* me, no point sending */
+      }
+      MPI_Recv(recv_buf, buf_lens[i], MPI_CHAR, i, GATHER_TAG,
+	       (MPI_Comm)SvIVX(comm), &tstatus );
+      av_push(ret_arr, sv_2mortal( newSVpvn(recv_buf, buf_lens[i]) ) );
+    }
+    free(recv_buf);
+    free(buf_lens);
+  }
+  else {
+    /* send out how long my scalar data is */ 
+      MPI_Gather(my_buf, 1, MPI_INT, buf_lens, 1,
+	       MPI_INT, root, (MPI_Comm)SvIVX(comm) );
+    /* send out my scalar data as normal send with tag of ???? */
+      MPI_Send(SvPVX(data), my_buf[0], MPI_CHAR,
+	       root, GATHER_TAG,(MPI_Comm)SvIVX(comm));
+  }
+
+  return newRV_inc((SV*)ret_arr);
+}
+
+/* compares two communicators, translates MPI constants into something I
+   can use as constants in the module interface */
+int _Comm_compare(SV* comm1, SV* comm2) {
+    int result = 0;
+    MPI_Comm_compare((MPI_Comm)SvIVX(comm1), (MPI_Comm)SvIVX(comm2), &result);
+    switch (result) {
+	case MPI_IDENT:
+	               return(1);
+	case MPI_CONGRUENT:
+	               return(2);
+	case MPI_SIMILAR:
+	               return(3);
+	case MPI_UNEQUAL:
+	               return(0);
+        default:
+	               return(0);
+    }
+}
+
+/* frees a communicator, once all pending communication has taken place */
+void _Comm_free (SV* comm) {
+    MPI_Comm_free((MPI_Comm*)&SvIVX(comm));
+    if ((MPI_Comm)SvIVX(comm) != MPI_COMM_NULL)
+	croak("Communicator not freed properly\n");
+}
+
+SV* _Comm_dup (SV*comm) {
+    MPI_Comm newcomm;
+    MPI_Comm_dup((MPI_Comm)SvIVX(comm), &newcomm);
+    return newSViv((IV)newcomm);
+}
+
+SV* _Comm_split (SV* comm, int colour, int key) {
+    MPI_Comm newcomm;
+    int realcolour;
+    MPI_Comm_split((MPI_Comm)SvIVX(comm),
+		    (colour < 0 ? MPI_UNDEFINED : colour),
+		    key, &newcomm);
+    return newSViv((IV)newcomm);
+}
+
+__END__
+
+
diff --git a/Simple.xs b/Simple.xs
new file mode 100644
index 0000000..288ad59
--- /dev/null
+++ b/Simple.xs
@@ -0,0 +1,303 @@
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+/* The Inline::C source for this is available in the __DATA__ section
+   of Simple.pm */
+
+#include <mpi.h> 
+#define GATHER_TAG 11001 /* used to be unlikely to upset other sends */
+
+/*
+  root process first broadcasts length of stored data then broadcasts
+  the data.  Non-root processes receive length (via bcast), allocate
+  space to take incomming data from root
+
+  Both root and non-root processes then create and return a new scalar
+  with contents identical to those root started with.
+*/
+
+SV* _Bcast (SV* data, int root, SV* comm) {
+  int buf_len[1];
+  int rank;
+  SV* rval;
+  MPI_Comm_rank((MPI_Comm)SvIVX(comm), &rank);
+  if (rank == root) {
+    buf_len[0] = sv_len(data);
+    MPI_Bcast(buf_len, 1, MPI_INT, root, (MPI_Comm)SvIVX(comm));
+    MPI_Bcast(SvPVX(data), buf_len[0], MPI_CHAR, root, (MPI_Comm)SvIVX(comm));
+    rval = newSVpvn(SvPVX(data), buf_len[0]);
+  }
+  else {
+    char *recv_buf;
+    MPI_Bcast(buf_len, 1, MPI_INT, root, (MPI_Comm)SvIVX(comm));
+    recv_buf = (char*)malloc((buf_len[0]+1)*sizeof(char));
+    if (recv_buf == NULL) croak("Allocation error in _Bcast");
+    MPI_Bcast(recv_buf, buf_len[0], MPI_CHAR, root, (MPI_Comm)SvIVX(comm));
+    rval = newSVpvn(recv_buf, buf_len[0]);
+    free(recv_buf);
+  }
+  return rval;
+}
+
+/*
+  Finds length of data in stor_ref, sends this to receiver, then
+  sends actual data, uses same tag for each message.
+*/
+
+int _Send(SV* stor_ref, int dest, int tag, SV*comm) {
+  int str_len[1];
+  str_len[0] = sv_len(stor_ref);
+  MPI_Send(str_len, 1, MPI_INT, dest, tag, (MPI_Comm)SvIVX(comm));
+  MPI_Send(SvPVX(stor_ref), sv_len(stor_ref),MPI_CHAR,
+	   dest, tag, (MPI_Comm)SvIVX(comm));
+  return 0;
+}
+
+/*
+  Receives int for length of data it should then expect, allocates space
+  then receives data into that space.  Creates a new SV and returns it.
+*/
+
+SV* _Recv (int source, int tag, SV*comm, SV*status) {
+  MPI_Status tstatus;
+  SV* rval;
+  int len_buf[1];
+  char *recv_buf;
+
+  MPI_Recv(len_buf, 1, MPI_INT, source, tag, (MPI_Comm)SvIVX(comm), &tstatus);
+  recv_buf = (char*)malloc((len_buf[0]+1)*sizeof(char));
+  if (recv_buf == NULL) croak("Allocation error in _Recv");
+  MPI_Recv(recv_buf, len_buf[0], MPI_CHAR, source, tag,
+	    (MPI_Comm)SvIVX(comm), &tstatus);
+  rval = newSVpvn(recv_buf, len_buf[0]);
+  sv_setiv(status, tstatus.MPI_SOURCE);
+  free(recv_buf);
+  return rval;
+}
+
+/* Calls MPI_Init with dummy arguments, a bit dodgy but sort of ok */
+int Init () {
+  MPI_Init(&PL_origargc, &PL_origargv);
+}
+
+/* Returns rank of process within comm */
+int _Comm_rank (SV* comm) {
+  int trank;
+  MPI_Comm_rank((MPI_Comm)SvIVX(comm),&trank);
+  return trank;
+}
+
+/* returns total number of processes within comm */
+int _Comm_size (SV* comm) {
+  int tsize;
+  MPI_Comm_size((MPI_Comm)SvIVX(comm), &tsize);
+  return tsize;
+}
+
+/* returns SV whose IV slot is a cast pointer to the MPI_COMM_WORLD object */
+SV* COMM_WORLD () {
+  return newSViv((IV)MPI_COMM_WORLD);
+}
+
+/* returns SV whose IV slot is a cast pointer to the MPI_ANY_SOURCE value */
+SV* ANY_SOURCE () {
+  return newSViv((IV)MPI_ANY_SOURCE);
+}
+
+/* calls MPI_Barrier for comm */
+int Barrier (SV*comm) {
+  MPI_Barrier((MPI_Comm)SvIVX(comm));
+}
+
+/* ends MPI participation */
+int Finalize () {
+  MPI_Finalize();
+}
+
+/*
+  If non-root:  participates in Gather so that root finds length of data
+                to expect from this process.  Then send (using MPI_Send)
+                data to root.
+
+  If root: receives array of ints detailing length of scalars held by
+   other processes, then receives from each in turn (using MPI_Recv)
+   returns an array ref to root process only.
+  
+ */
+SV* _Gather (SV* data, int root, SV* comm) {
+  int rank, size, *buf_lens, i, max;
+  char* recv_buf;
+  int my_buf[1];
+  AV* ret_arr;
+  MPI_Status tstatus;
+
+  /* find out how long data is */
+  ret_arr = av_make(0,(SV**)NULL);
+  my_buf[0] = sv_len(data);
+  if (_Comm_rank(comm) == root) {
+    MPI_Comm_size((MPI_Comm)SvIVX(comm), &size);
+    buf_lens = malloc(size*sizeof(int));
+    if (buf_lens == NULL) croak("Allocation error (lens) in _Gather");
+    /* gather all scalar length data */
+    MPI_Gather(my_buf, 1, MPI_INT, buf_lens, 1,
+	       MPI_INT, root, (MPI_Comm)SvIVX(comm));
+    max = 0; // keep buffer allocation calls to minimum
+    for (i=0;i<size;i++) {
+      max = max < buf_lens[i] ? buf_lens[i] : max;
+    }
+    recv_buf = malloc(max * sizeof(char));
+    if (recv_buf == NULL) croak("Allocation error (recv) in _Gather");
+    for (i=0;i<size;i++) {
+      if (i == root) {
+	av_push(ret_arr, data);
+	continue; /* me, no point sending */
+      }
+      MPI_Recv(recv_buf, buf_lens[i], MPI_CHAR, i, GATHER_TAG,
+	       (MPI_Comm)SvIVX(comm), &tstatus );
+      av_push(ret_arr, sv_2mortal( newSVpvn(recv_buf, buf_lens[i]) ) );
+    }
+    free(recv_buf);
+    free(buf_lens);
+  }
+  else {
+    /* send out how long my scalar data is */ 
+      MPI_Gather(my_buf, 1, MPI_INT, buf_lens, 1,
+	       MPI_INT, root, (MPI_Comm)SvIVX(comm) );
+    /* send out my scalar data as normal send with tag of ???? */
+      MPI_Send(SvPVX(data), my_buf[0], MPI_CHAR,
+	       root, GATHER_TAG,(MPI_Comm)SvIVX(comm));
+  }
+
+  return newRV_inc((SV*)ret_arr);
+}
+
+/* compares two communicators, translates MPI constants into something I
+   can use as constants in the module interface */
+int _Comm_compare(SV* comm1, SV* comm2) {
+    int result = 0;
+    MPI_Comm_compare((MPI_Comm)SvIVX(comm1), (MPI_Comm)SvIVX(comm2), &result);
+    switch (result) {
+	case MPI_IDENT:
+	               return(1);
+	case MPI_CONGRUENT:
+	               return(2);
+	case MPI_SIMILAR:
+	               return(3);
+	case MPI_UNEQUAL:
+	               return(0);
+        default:
+	               return(0);
+    }
+}
+
+/* frees a communicator, once all pending communication has taken place */
+void _Comm_free (SV* comm) {
+    MPI_Comm_free((MPI_Comm*)&SvIVX(comm));
+    if ((MPI_Comm)SvIVX(comm) != MPI_COMM_NULL)
+	croak("Communicator not freed properly\n");
+}
+
+SV* _Comm_dup (SV*comm) {
+    MPI_Comm newcomm;
+    MPI_Comm_dup((MPI_Comm)SvIVX(comm), &newcomm);
+    return newSViv((IV)newcomm);
+}
+
+SV* _Comm_split (SV* comm, int colour, int key) {
+    MPI_Comm newcomm;
+    int realcolour;
+    MPI_Comm_split((MPI_Comm)SvIVX(comm),
+		    (colour < 0 ? MPI_UNDEFINED : colour),
+		    key, &newcomm);
+    return newSViv((IV)newcomm);
+}
+
+
+
+
+MODULE = Parallel::MPI::Simple	PACKAGE = Parallel::MPI::Simple	
+
+PROTOTYPES: DISABLE
+
+SV *
+_Bcast (data, root, comm)
+	SV *	data
+	int	root
+	SV *	comm
+
+int
+_Send (stor_ref, dest, tag, comm)
+	SV *	stor_ref
+	int	dest
+	int	tag
+	SV *	comm
+
+SV *
+_Recv (source, tag, comm, status)
+	int	source
+	int	tag
+	SV *	comm
+	SV *	status
+
+int
+Init ()
+
+int
+_Comm_rank (comm)
+	SV *	comm
+
+int
+_Comm_size (comm)
+	SV *	comm
+
+SV *
+COMM_WORLD ()
+
+SV *
+ANY_SOURCE ()
+
+int
+Barrier (comm)
+	SV *	comm
+
+int
+Finalize ()
+
+SV *
+_Gather (data, root, comm)
+	SV *	data
+	int	root
+	SV *	comm
+
+int
+_Comm_compare (comm1, comm2)
+	SV *	comm1
+	SV *	comm2
+
+void
+_Comm_free (comm)
+	SV *	comm
+	PREINIT:
+	I32* temp;
+	PPCODE:
+	temp = PL_markstack_ptr++;
+	_Comm_free(comm);
+	if (PL_markstack_ptr != temp) {
+          /* truly void, because dXSARGS not invoked */
+	  PL_markstack_ptr = temp;
+	  XSRETURN_EMPTY; /* return empty stack */
+        }
+        /* must have used dXSARGS; list context implied */
+	return; /* assume stack size is correct */
+
+SV *
+_Comm_dup (comm)
+	SV *	comm
+
+SV *
+_Comm_split (comm, colour, key)
+	SV *	comm
+	int	colour
+	int	key
+
diff --git a/ic.pl b/ic.pl
new file mode 100755
index 0000000..baef203
--- /dev/null
+++ b/ic.pl
@@ -0,0 +1,183 @@
+#!/usr/bin/perl -w
+use strict;
+use Parallel::MPI::Simple;
+
+# Test basic functionality and blocking
+MPI_Init();
+my($rank,$size);
+$rank = MPI_Comm_rank(MPI_COMM_WORLD);
+$size = MPI_Comm_size(MPI_COMM_WORLD);
+
+if ($rank ==0) {
+  my $msg = "Hi ho the lillies grow\n";
+  MPI_Send($msg,1,0,MPI_COMM_WORLD);
+  $msg = MPI_Recv(1,0,MPI_COMM_WORLD);
+  print $msg;
+
+  print "ok 2 # MPI_Send MPI_Recv\n";
+  MPI_Barrier(MPI_COMM_WORLD);
+}
+else {
+  my $msg = MPI_Recv(0,0,MPI_COMM_WORLD);
+  $msg =~ s/(Hi ho).*/ok 1/;
+  MPI_Send($msg, 0,0,MPI_COMM_WORLD);
+
+  MPI_Barrier(MPI_COMM_WORLD);
+  print "ok 3 # MPI_Barrier\n";
+}
+
+# Test broadcast
+MPI_Barrier(MPI_COMM_WORLD);
+if ($rank ==0) {
+  my $msg = 4;
+  $msg = MPI_Bcast($msg, 0,MPI_COMM_WORLD);
+}
+else {
+  my $msg = MPI_Bcast(undef, 0, MPI_COMM_WORLD);
+  print "ok $msg # MPI_Bcast\n";
+}
+
+# Test gather
+MPI_Barrier(MPI_COMM_WORLD);
+if ($rank ==0) {
+  my $msg = "ok 5 # MPI_Gather\n";
+  print MPI_Gather($msg, 0, MPI_COMM_WORLD);
+}
+else {
+  my $msg = "ok 6 # MPI_Gather\n";
+  print MPI_Gather($msg, 0, MPI_COMM_WORLD); # shouldn't print anything.
+}
+
+# Test Scatter
+MPI_Barrier(MPI_COMM_WORLD);
+{
+  my $rt = MPI_Scatter(["ok 7 # MPI_Scatter\n", "ok 8\n"],
+			  0, MPI_COMM_WORLD);
+  print $rt if $rank ==0;
+  MPI_Barrier(MPI_COMM_WORLD);
+  print $rt if $rank ==1;
+}
+
+# blessed reference
+MPI_Barrier(MPI_COMM_WORLD);
+{ print "ok 9 # Skipped - blessed references\n" if $rank ==0; last;
+  my $obj = bless {cows=>"ok 9 # blessed ref\n"}, 'ZZZZZ::Testing';
+  my $sobj;
+  if ($rank == 0) {
+    MPI_Send($obj, 1, 0, MPI_COMM_WORLD);
+  }
+  else {
+    $sobj = MPI_Recv($obj, 0, 0, MPI_COMM_WORLD);
+    print $sobj->method;
+  }
+}
+
+# Test Allgather
+MPI_Barrier(MPI_COMM_WORLD);
+{
+  my @rt = MPI_Allgather($rank, MPI_COMM_WORLD);
+  if ($rank ==0) {
+    print "ok ". ($rt[0]+10) . " # MPI_Allgather\n";
+    print "ok ". ($rt[1]+10) . "\n";
+    MPI_Barrier(MPI_COMM_WORLD);
+  }
+  else {
+    MPI_Barrier(MPI_COMM_WORLD);
+    print "ok ". ($rt[0]+12)."\n";
+    print "ok ". ($rt[1]+12)."\n";
+  }
+}
+
+# Test Alltoall
+{
+  MPI_Barrier(MPI_COMM_WORLD);
+  my @data = (14+2*$rank, 15+2*$rank);
+  my @return = MPI_Alltoall(\@data, MPI_COMM_WORLD);
+  if ($rank == 0) {
+    print "ok $return[0] # MPI_Alltoall\n"; # 14
+    MPI_Barrier(MPI_COMM_WORLD);
+    print "ok $return[1]\n";
+    MPI_Barrier(MPI_COMM_WORLD);
+  }
+  else {
+    MPI_Barrier(MPI_COMM_WORLD);
+    print "ok $return[0]\n";
+    MPI_Barrier(MPI_COMM_WORLD);
+    print "ok $return[1]\n";
+  }
+}
+
+# reduce
+MPI_Barrier(MPI_COMM_WORLD);
+{
+  my $rt = MPI_Reduce($rank, sub {$_[0] + $_[1]}, MPI_COMM_WORLD);
+  if ($rank == 0) {
+    print "not " unless $rt == 1;
+    print "ok 18 # reduce\n";
+  }
+  else {
+    print "not " unless $rt == 1;
+    print "ok 19 # reduce\n";
+  }
+}
+
+MPI_Barrier(MPI_COMM_WORLD);
+{ # MPI_Comm_compare
+    if (MPI_Comm_compare(MPI_COMM_WORLD,MPI_COMM_WORLD) != MPI_IDENT) {
+	print "not ";
+    }
+    print "ok 2$rank # Comm_compare (ident)\n";
+}
+
+{
+    MPI_Barrier(MPI_COMM_WORLD);
+    my $dup = MPI_Comm_dup(MPI_COMM_WORLD);
+    if ($rank==0&&MPI_Comm_compare($dup, MPI_COMM_WORLD) != MPI_CONGRUENT) {
+	print "not ";
+    }
+    print "ok 22 # comm_dup\n" if $rank ==0;
+    MPI_Comm_free($dup);
+}
+
+{
+    MPI_Barrier(MPI_COMM_WORLD);
+    if ($rank ==0 ) {
+	my $newcomm = MPI_Comm_split(MPI_COMM_WORLD, $rank, 0);
+	if (MPI_Comm_compare($newcomm, MPI_COMM_WORLD) !=
+	    MPI_UNEQUAL) {
+	    print "not ";
+	}
+	print "ok 23 # MPI_Comm_split\n";
+	MPI_Comm_free($newcomm);
+    }
+    else {
+	my $rt=MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, 0);
+	if (defined($rt)) {print "not "}
+	print "ok 24 # MPI_Comm_split, not in new\n";
+    }
+}
+
+MPI_Barrier(MPI_COMM_WORLD);
+if ($rank == 0) {
+    my $msg = "Sending from ANY";
+    MPI_Send($msg,1,0,MPI_COMM_WORLD);
+    print "ok 25 # sent from ANY\n";
+}
+else {
+    my $msg = MPI_Recv(MPI_ANY_SOURCE,0,MPI_COMM_WORLD);
+    if ($msg =~ /Sending from ANY/) {
+	print "ok 26 # receive from ANY_SOURCE";
+    }
+    else {
+	print "not ok 26 # receive from ANY_SOURCE";
+    }
+}
+
+MPI_Finalize();
+exit(0);
+
+package ZZZZZ::Testing;
+sub method {
+  return $_[0]->{cows};
+}
+
diff --git a/test.pl b/test.pl
new file mode 100644
index 0000000..4eb8eb8
--- /dev/null
+++ b/test.pl
@@ -0,0 +1,17 @@
+my $mpirun = "";
+foreach my $mpi_try (qw(mpiexec mpirun)) {
+    my $test = join("",`$mpi_try -n 1 perl -e "print qq{honk},qq{honk\n}"`);
+    $mpirun = $mpi_try if $test =~ /honkhonk/;
+    last if $mpirun;
+}
+$mpirun = $mpirun || "mpirun"; # fallback
+my $incs;
+$incs .= " -I$_" foreach @INC;
+my @newout = sort {
+    (($a =~ /(\d+)/g)[0] <=> ($b =~ /(\d+)/g)[0])
+} `$mpirun -np 2 $^X $incs ic.pl`;
+print "1..26\n";
+if (@newout < 25) {
+    print "not ok 1 # mpirun failed.  Do you need to start mpd?\n";
+}
+print @newout;
-- 
2.39.2