8 use File::Spec::Functions;
9 use Statistics::R::Legacy;
10 use IPC::Run qw( harness start pump finish );
11 use Text::Balanced qw ( extract_delimited extract_multiple );
13 if ( $^O =~ m/^(?:.*?win32|dos)$/i ) {
14 require Statistics::R::Win32;
17 our $VERSION = '0.24';
19 our ($SHARED_BRIDGE, $SHARED_STDIN, $SHARED_STDOUT, $SHARED_STDERR);
21 my $prog = 'R'; # executable we are after... R
22 my $eos = 'Statistics::R::EOS'; # string to signal the R output stream end
23 my $eos_re = qr/$eos\n$/; # regexp to match end of R stream
27 Statistics::R - Perl interface with the R statistical program
31 I<Statistics::R> is a module to controls the R interpreter (R project for statistical
32 computing: L<http://www.r-project.org/>). It lets you start R, pass commands to
33 it and retrieve the output. A shared mode allow to have several instances of
34 I<Statistics::R> talk to the same R process.
36 The current I<Statistics::R> implementation uses pipes (for stdin, stdout and
37 and stderr) to communicate with R. This implementation should be more efficient
38 and reliable than that in previous version, which relied on reading and writing
39 files. As before, this module works on GNU/Linux, MS Windows and probably many
46 # Create a communication bridge with R and start R
47 my $R = Statistics::R->new();
49 # Run simple R commands
50 my $output_file = "file.ps";
51 $R->run(qq`postscript("$output_file" , horizontal=FALSE , width=500 , height=500 , pointsize=1)`);
52 $R->run(q`plot(c(1, 5, 10), type = "l")`);
53 $R->run(q`dev.off()`);
55 # Pass and retrieve data (scalars or arrays)
57 $R->set('x', $input_value);
59 my $output_value = $R->get('y');
60 print "y = $output_value\n";
70 Build a I<Statistics::R> bridge object between Perl and R. Available options are:
77 Specify the full path to R if it is not automatically found. See L<INSTALLATION>.
81 Start a shared bridge. When using a shared bridge, several instances of
82 Statistics::R can communicate with the same unique R instance. Example:
86 my $R1 = Statistics::R->new( shared => 1);
87 my $R2 = Statistics::R->new( shared => 1);
89 $R1->set( 'x', 'pear' );
90 my $x = $R2->get( 'x' );
93 Do not call the I<stop()> method is you still have processes that need to interact
101 First, start() R if it is not yet running. Then, execute R commands passed as a
102 string and return the output as a string. If your command fails to run in R, an
103 error message will be displayed.
107 my $out = $R->run( q`print( 1 + 2 )` );
109 If you intend on runnning many R commands, it may be convenient to pass an array
110 of commands or put multiple commands in an here-doc:
112 # Array of R commands:
120 # Here-doc with multiple R commands:
127 my $out2 = $R->run($cmds);
129 To run commands from a file, see the run_from_file() method.
131 The output you get from run() is the combination of what R would display on the
132 standard output and the standard error, but the order may differ. When loading
133 modules, some may write numerous messages on standard error. You can disable
134 this behavior using the following R command:
136 suppressPackageStartupMessages(library(library_to_load))
139 =item run_from_file()
141 Similar to run() but reads the R commands from the specified file. Internally,
142 this method uses the R source() command to read the file.
146 Set the value of an R variable (scalar or arrayref). Example:
148 $R->set( 'x', 'pear' );
152 $R->set( 'y', [1, 2, 3] );
157 Get the value of an R variable (scalar or arrayref). Example:
159 my $x = $R->get( 'x' ); # $y is an scalar
163 my $y = $R->get( 'y' ); # $x is an arrayref
167 Explicitly start R. Most times, you do not need to do that because the first
168 execution of run() or set() will automatically call start().
172 Stop a running instance of R.
176 stop() and start() R.
180 Get or set the path to the R executable.
184 Was R started in shared mode?
192 Return the pid of the running R process
198 Since I<Statistics::R> relies on R to work, you need to install R first. See this
199 page for downloads, L<http://www.r-project.org/>. If R is in your PATH environment
200 variable, then it should be available from a terminal and be detected
201 automatically by I<Statistics::R>. This means that you don't have to do anything
202 on Linux systems to get I<Statistics::R> working. On Windows systems, in addition
203 to the folders described in PATH, the usual suspects will be checked for the
204 presence of the R binary, e.g. C:\Program Files\R. If I<Statistics::R> does not
205 find R installation, your last recourse is to specify its full path when calling
208 my $R = Statistics::R->new( r_bin => $fullpath );
210 You also need to have the following CPAN Perl modules installed:
214 =item Text::Balanced (>= 1.97)
226 =item * L<Statistics::R::Win32>
228 =item * L<Statistics::R::Legacy>
230 =item * The R-project web site: L<http://www.r-project.org/>
232 =item * Statistics:: modules for Perl: L<http://search.cpan.org/search?query=Statistics&mode=module>
238 Florent Angly E<lt>florent.angly@gmail.comE<gt> (2011 rewrite)
240 Graciliano M. P. E<lt>gm@virtuasites.com.brE<gt> (original code)
244 Brian Cassidy E<lt>bricas@cpan.orgE<gt>
246 =head1 COPYRIGHT & LICENSE
248 This program is free software; you can redistribute it and/or
249 modify it under the same terms as Perl itself.
253 All complex software has bugs lurking in it, and this program is no exception.
254 If you find a bug, please report it on the CPAN Tracker of Statistics::R:
255 L<http://rt.cpan.org/Dist/Display.html?Name=Statistics-R>
257 Bug reports, suggestions and patches are welcome. The Statistics::R code is
258 developed on Github (L<http://github.com/bricas/statistics-r>) and is under Git
259 revision control. To get the latest revision, run:
261 git clone git@github.com:bricas/statistics-r.git
267 # Create a new R communication object
268 my ($class, %args) = @_;
270 bless $self, ref($class) || $class;
271 $self->initialize( %args );
277 # Get (/ set) the whether or not Statistics::R is setup to run in shared mode
278 my ($self, $val) = @_;
280 $self->{is_shared} = $val;
282 return $self->{is_shared};
287 no warnings 'redefine';
289 my ($self, %args) = @_;
291 if (not $self->is_started) {
293 # If shared mode option of start() requested, rebuild the bridge in shared
294 # mode. Don't use this option though. It is only here to cater for the legacy
295 # method start_shared()
296 if ( exists($args{shared}) && ($args{shared} == 1) ) {
297 $self->is_shared( 1 );
302 my $bridge = $self->bridge;
303 $status = $bridge->start or die "Error starting $prog: $?\n";
304 $self->bin( $bridge->{KIDS}->[0]->{PATH} );
315 if ($self->is_started) {
316 $status = $self->bridge->finish or die "Error stopping $prog: $?\n";
324 return $self->stop && $self->start;
329 # Query whether or not R is currently running
330 return shift->bridge->{STATE} eq IPC::Run::_started ? 1 : 0;
335 # Get (/ set) the PID of the running R process. It is accessible only after
336 # the bridge has start()ed
337 return shift->bridge->{KIDS}->[0]->{PID};
342 # Get / set the full path to the R binary program to use. Unless you have set
343 # the path yourself, it is accessible only after the bridge has start()ed
344 my ($self, $val) = @_;
353 # Pass the input and get the output
354 my ($self, @cmds) = @_;
356 # Need to start R now if it is not already running
357 $self->start if not $self->is_started;
360 # Process each command
362 for my $cmd (@cmds) {
364 # Wrap command for execution in R
365 $self->stdin( $self->wrap_cmd($cmd) );
367 # Pass input to R and get its output
368 my $bridge = $self->bridge;
369 while ( $self->stdout !~ m/$eos_re/gc && $bridge->pumpable ) {
373 # Parse outputs, detect errors
374 my $out = $self->stdout;
375 $out =~ s/$eos_re//g;
377 my $err = $self->stderr;
379 if ($out =~ m/<simpleError.*?:(.*)>/sg) {
380 # Parse (multi-line) error message
381 my $err_msg = $1."\n".$err;
382 die "Problem running the R command:\n$cmd\n\nGot the error:\n$err_msg\n";
385 # Save results and reinitialize
386 $results .= "\n" if $results;
387 $results .= $err.$out;
393 $self->result($results);
400 my ($self, $file) = @_;
401 my $results = $self->run( qq`source('$file')` );
407 # Assign a variable or array of variables in R. Use undef if you want to
408 # assign NULL to an R variable
409 my ($self, $varname, $arr) = @_;
411 # Check variable type, convert everything into an arrayref
416 } elsif ($ref eq 'ARRAY') {
417 # This is an array reference, nothing to do
419 die "Error: Import variable of type $ref is not supported\n";
422 # Quote strings and nullify undef variables
423 for (my $i = 0; $i < scalar @$arr; $i++) {
424 if (defined $$arr[$i]) {
425 if ( $$arr[$i] !~ /^$RE{num}{real}$/ ) {
426 $$arr[$i] = '"'.$$arr[$i].'"';
433 # Build a string and run it to import data
434 my $cmd = $varname.' <- c('.join(', ',@$arr).')';
441 # Get the value of an R variable
442 my ($self, $varname) = @_;
443 my $string = $self->run(qq`print($varname)`);
447 if ($string eq 'NULL') {
449 } elsif ($string =~ m/^\s*\[\d+\]/) {
450 # Vector: its string look like:
451 # ' [1] 6.4 13.3 4.1 1.3 14.1 10.6 9.9 9.6 15.3
452 # [16] 5.2 10.9 14.4'
453 my @lines = split /\n/, $string;
454 for (my $i = 0; $i < scalar @lines; $i++) {
455 $lines[$i] =~ s/^\s*\[\d+\] //;
457 $value = join ' ', @lines;
459 my @lines = split /\n/, $string;
460 if (scalar @lines == 2) {
461 # String looks like: ' mean
463 # Extract value from second line
465 $value =~ s/^\s*(\S+)\s*$/$1/;
467 #die "Error: Don't know how to handle this R output\n$string\n";
474 if (not defined $value) {
477 # Split string into an array, paying attention to strings containing spaces
478 @arr = extract_multiple( $value, [sub { extract_delimited($_[0],q{ '"}) },] );
479 for (my $i = 0; $i < scalar @arr; $i++) {
481 if ($elem =~ m/^\s*$/) {
482 # Remove elements that are simply whitespaces
487 $arr[$i] =~ s/^\s*(.*?)\s*$/$1/;
488 # Remove double-quotes
489 $arr[$i] =~ s/^"(.*)"$/$1/;
494 # Return either a scalar of an arrayref
496 if (scalar @arr == 1) {
506 #---------- INTERNAL METHODS --------------------------------------------------#
510 my ($self, %args) = @_;
514 if ( $args{ r_bin } || $args{ R_bin } ) {
515 $bin = $args{ r_bin } || $args{ R_bin };
517 $bin = $prog; # IPC::Run will find the full path for the program later
522 if ( exists($args{shared}) && ($args{shared} == 1) ) {
523 $self->is_shared( 1 );
525 $self->is_shared( 0 );
536 # Get or build the communication bridge and IOs with R
537 my ($self, $build) = @_;
539 my $cmd = [ $self->bin, '--vanilla', '--slave' ];
540 if (not $self->is_shared) {
541 my ($stdin, $stdout, $stderr);
542 $self->{stdin} = \$stdin;
543 $self->{stdout} = \$stdout;
544 $self->{stderr} = \$stderr;
545 $self->{bridge} = harness $cmd, $self->{stdin}, $self->{stdout}, $self->{stderr};
547 $self->{stdin} = \$SHARED_STDIN ;
548 $self->{stdout} = \$SHARED_STDOUT;
549 $self->{stderr} = \$SHARED_STDERR;
550 if (not defined $SHARED_BRIDGE) {
551 # The first Statics::R instance builds the bridge
552 $SHARED_BRIDGE = harness $cmd, $self->{stdin}, $self->{stdout}, $self->{stderr};
554 $self->{bridge} = $SHARED_BRIDGE;
557 return $self->{bridge};
562 # Get / set standard input string for R
563 my ($self, $val) = @_;
565 ${$self->{stdin}} = $val;
567 return ${$self->{stdin}};
572 # Get / set standard output string for R
573 my ($self, $val) = @_;
575 ${$self->{stdout}} = $val;
577 return ${$self->{stdout}};
582 # Get / set standard error string for R
583 my ($self, $val) = @_;
585 ${$self->{stderr}} = $val;
587 return ${$self->{stderr}};
592 # Get / set result of last R command
593 my ($self, $val) = @_;
595 $self->{result} = $val;
597 return $self->{result};
602 # Wrap a command to pass to R. Whether the command is successful or not, the
603 # end of stream string will appear on stdout and indicate that R has finished
604 # processing the data. Note that $cmd can be multiple R commands.
605 my ($self, $cmd) = @_;
607 # Escape double-quotes
610 # Evaluate command (and catch syntax and runtime errors)
611 $cmd = qq`tryCatch( eval(parse(text="$cmd")) , error = function(e){print(e)} ); write("$eos",stdout())\n`;