X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;ds=sidebyside;f=lib%2FStatistics%2FR.pm;fp=lib%2FStatistics%2FR.pm;h=7ef37f4632e3c4a0b7221d0bcd6e2b383a3bd25e;hb=5e71068076c23e49ee5655b94ea2c4b403790cbf;hp=8b3371dd8d7455cd7c2b313f72e52efdac2783f4;hpb=9d6b5f35bbae817f76a5099702e13836e5957d80;p=deb_pkgs%2Flibstatistics-r-perl.git diff --git a/lib/Statistics/R.pm b/lib/Statistics/R.pm index 8b3371d..7ef37f4 100644 --- a/lib/Statistics/R.pm +++ b/lib/Statistics/R.pm @@ -1,43 +1,22 @@ package Statistics::R; -use 5.006; -use strict; -use warnings; -use Regexp::Common; -use File::Spec::Functions; -use Statistics::R::Legacy; -use IPC::Run qw( harness start pump finish ); -use Text::Balanced qw ( extract_delimited extract_multiple ); - -if ( $^O =~ m/^(?:.*?win32|dos)$/i ) { - require Statistics::R::Win32; -} - -our $VERSION = '0.24'; - -our ($SHARED_BRIDGE, $SHARED_STDIN, $SHARED_STDOUT, $SHARED_STDERR); - -my $prog = 'R'; # executable we are after... R -my $eos = 'Statistics::R::EOS'; # string to signal the R output stream end -my $eos_re = qr/$eos\n$/; # regexp to match end of R stream - =head1 NAME Statistics::R - Perl interface with the R statistical program =head1 DESCRIPTION -I is a module to controls the R interpreter (R project for statistical -computing: L). It lets you start R, pass commands to -it and retrieve the output. A shared mode allow to have several instances of -I talk to the same R process. +I is a module to controls the R interpreter (R project for +statistical computing: L). It lets you start R, pass +commands to it and retrieve their output. A shared mode allows several instances +of I to talk to the same R process. -The current I implementation uses pipes (for stdin, stdout and -and stderr) to communicate with R. This implementation should be more efficient -and reliable than that in previous version, which relied on reading and writing -files. As before, this module works on GNU/Linux, MS Windows and probably many -more systems. +The current I implementation uses pipes (stdin, stdout and stderr) +to communicate with R. This implementation is more efficient and reliable than +that in versions < 0.20, which relied on reading and writing intermediary files. +As before, this module works on GNU/Linux, MS Windows and probably many more +systems. I has been tested with R version 2 and 3. =head1 SYNOPSIS @@ -48,7 +27,7 @@ more systems. # Run simple R commands my $output_file = "file.ps"; - $R->run(qq`postscript("$output_file" , horizontal=FALSE , width=500 , height=500 , pointsize=1)`); + $R->run(qq`postscript("$output_file", horizontal=FALSE, width=500, height=500)`); $R->run(q`plot(c(1, 5, 10), type = "l")`); $R->run(q`dev.off()`); @@ -67,14 +46,15 @@ more systems. =item new() -Build a I bridge object between Perl and R. Available options are: - +Build a I bridge object connecting Perl and R. Available options +are: =over 4 -=item r_bin +=item bin -Specify the full path to R if it is not automatically found. See L. +Specify the full path to the R executable, if it is not automatically found. See +L. =item shared @@ -90,26 +70,30 @@ Statistics::R can communicate with the same unique R instance. Example: my $x = $R2->get( 'x' ); print "x = $x\n"; -Do not call the I method is you still have processes that need to interact -with R. + $R1->stop; # or $R2->stop + +Note that in shared mode, you are responsible for calling the I method +from one of your Statistics::R instances when you are finished. But be careful +not to call the I method if you still have processes that need to +interact with R! =back =item run() -First, start() R if it is not yet running. Then, execute R commands passed as a -string and return the output as a string. If your command fails to run in R, an -error message will be displayed. +First, I R if it is not yet running. Then, execute R commands passed +as a string and return the output as a string. If your commands failed to run +in R, an error message will be displayed. Example: my $out = $R->run( q`print( 1 + 2 )` ); -If you intend on runnning many R commands, it may be convenient to pass an array +If you intend on runnning many R commands, it may be convenient to pass a list of commands or put multiple commands in an here-doc: - # Array of R commands: + # List of R commands: my $out1 = $R->run( q`a <- 2`, q`b <- 5`, @@ -126,58 +110,85 @@ of commands or put multiple commands in an here-doc: EOF my $out2 = $R->run($cmds); -To run commands from a file, see the run_from_file() method. +Alternatively, to run commands from a file, use the I method. + +The return value you get from I is a combination of what R would display +on the standard output and the standard error, but the exact order may differ. -The output you get from run() is the combination of what R would display on the -standard output and the standard error, but the order may differ. When loading -modules, some may write numerous messages on standard error. You can disable -this behavior using the following R command: +When loading modules, some may write numerous messages on standard error. You +can disable this behavior using the following R command: suppressPackageStartupMessages(library(library_to_load)) +Note that older versions of R impose a limit on how many characters can be +contained on a line: about 4076 bytes maximum. You will be warned if this +occurs, with an error message stating: + + '\0' is an unrecognized escape in character string starting "... + +In this case, try to break down your R code into several smaller, more +manageable statements. Alternatively, adding newline characters "\n" at +strategic places in the R statements will work around the issue. =item run_from_file() -Similar to run() but reads the R commands from the specified file. Internally, -this method uses the R source() command to read the file. +Similar to I but reads the R commands from the specified file. +Internally, this method converts the filename to a format compatible with R and +then passes it to the R I command to read the file and execute the +commands. + +=item result() + +Get the results from the last R command. =item set() -Set the value of an R variable (scalar or arrayref). Example: +Set the value of an R variable (scalar or vector). Example: + # Create an R scalar $R->set( 'x', 'pear' ); -or +or + # Create an R list $R->set( 'y', [1, 2, 3] ); - =item get() -Get the value of an R variable (scalar or arrayref). Example: +Get the value of an R variable (scalar or vector). Example: - my $x = $R->get( 'x' ); # $y is an scalar + # Retrieve an R scalar. $x is a Perl scalar. + my $x = $R->get( 'x' ); or - my $y = $R->get( 'y' ); # $x is an arrayref + # Retrieve an R list. $x is a Perl arrayref. + my $y = $R->get( 'y' ); =item start() Explicitly start R. Most times, you do not need to do that because the first -execution of run() or set() will automatically call start(). +execution of I or I will automatically call I. =item stop() -Stop a running instance of R. +Stop a running instance of R. You need to call this method after running a +shared bridge. For a simple bridge, you do not need to do this because +I is automatically called when the Statistics::R object goes out of +scope. =item restart() -stop() and start() R. +I and I R. =item bin() -Get or set the path to the R executable. +Get or set the path to the R executable. Note that the path will be available +only after start() has been called. + +=item version() + +Get the version number of R. =item is_shared() @@ -189,33 +200,37 @@ Is R running? =item pid() -Return the pid of the running R process +Return the PID of the running R process =back =head1 INSTALLATION -Since I relies on R to work, you need to install R first. See this -page for downloads, L. If R is in your PATH environment -variable, then it should be available from a terminal and be detected -automatically by I. This means that you don't have to do anything -on Linux systems to get I working. On Windows systems, in addition -to the folders described in PATH, the usual suspects will be checked for the -presence of the R binary, e.g. C:\Program Files\R. If I does not -find R installation, your last recourse is to specify its full path when calling -new(): +Since I relies on R to work, you need to install R first. See +this page for downloads, L. If R is in your PATH +environment variable, then it should be available from a terminal and be +detected automatically by I. This means that you don't have to do +anything on Linux systems to get I working. On Windows systems, +in addition to the folders described in PATH, the usual suspects will be checked +for the presence of the R binary, e.g. C:\Program Files\R. If I +does not find where R is installed, your last recourse is to specify its full +path when calling new(): - my $R = Statistics::R->new( r_bin => $fullpath ); + my $R = Statistics::R->new( bin => $fullpath ); You also need to have the following CPAN Perl modules installed: =over 4 -=item Text::Balanced (>= 1.97) +=item IPC::Run =item Regexp::Common -=item IPC::Run +=item Text::Balanced (>= 1.97) + +=item Text::Wrap + +=item version (>= 0.77) =back @@ -229,7 +244,7 @@ You also need to have the following CPAN Perl modules installed: =item * The R-project web site: L -=item * Statistics:: modules for Perl: L +=item * Statistics::* modules for Perl: L =back @@ -239,7 +254,9 @@ Florent Angly Eflorent.angly@gmail.comE (2011 rewrite) Graciliano M. P. Egm@virtuasites.com.brE (original code) -=head1 MAINTAINER +=head1 MAINTAINERS + +Florent Angly Eflorent.angly@gmail.comE Brian Cassidy Ebricas@cpan.orgE @@ -258,23 +275,59 @@ Bug reports, suggestions and patches are welcome. The Statistics::R code is developed on Github (L) and is under Git revision control. To get the latest revision, run: - git clone git@github.com:bricas/statistics-r.git + git clone git://github.com/bricas/statistics-r.git =cut +use 5.006; +use strict; +use warnings; +use version; +use Regexp::Common; +use Statistics::R::Legacy; +use IPC::Run qw( harness start pump finish ); +use File::Spec::Functions qw(catfile splitpath splitdir); +use Text::Balanced qw ( extract_delimited extract_multiple ); + +if ( $^O =~ m/^(?:.*?win32|dos)$/i ) { + require Statistics::R::Win32; +} + +our $VERSION = '0.34'; + +our ($SHARED_BRIDGE, $SHARED_STDIN, $SHARED_STDOUT, $SHARED_STDERR); + +use constant DEBUG => 0; # debugging messages +use constant PROG => 'R'; # executable name... R +use constant MAXLINELEN => 1023; # maximum line length for R < 2.5 + +use constant EOS => '\\1'; # indicate the end of R output with \1 +use constant EOS_RE => qr/[${\(EOS)}]\n$/; # regexp to match end of R stream + +use constant NUMBER_RE => qr/^$RE{num}{real}$/; # regexp matching numbers +use constant BLANK_RE => qr/^\s*$/; # regexp matching whitespaces +use constant ILINE_RE => qr/^\s*\[\d+\] /; # regexp matching indexed line + +my $ERROR_STR_1 = 'Error: '; +my $ERROR_STR_2 = 'Error in '; +my $ERROR_RE; # regexp matching R errors + +my $WRAP_LINES = sub { return shift }; # function to wrap R commands + + sub new { # Create a new R communication object my ($class, %args) = @_; my $self = {}; bless $self, ref($class) || $class; - $self->initialize( %args ); + $self->_initialize( %args ); return $self; } sub is_shared { - # Get (/ set) the whether or not Statistics::R is setup to run in shared mode + # Get (or set) the whether or not Statistics::R is setup to run in shared mode my ($self, $val) = @_; if (defined $val) { $self->{is_shared} = $val; @@ -295,13 +348,34 @@ sub start { # method start_shared() if ( exists($args{shared}) && ($args{shared} == 1) ) { $self->is_shared( 1 ); - $self->bridge( 1 ); + $self->_bridge( 1 ); } # Now, start R - my $bridge = $self->bridge; - $status = $bridge->start or die "Error starting $prog: $?\n"; + my $bridge = $self->_bridge; + $status = $bridge->start or die "Error starting ".PROG.": $?\n"; $self->bin( $bridge->{KIDS}->[0]->{PATH} ); + delete $self->{died}; + print "DBG: Started R, ".$self->bin." (pid ".$self->pid.")\n" if DEBUG; + + # Generate regexp to catch R errors + if (not defined $ERROR_RE) { + $self->_generate_error_re; + $self->_localize_error_str; + $self->_generate_error_re; + } + + # Set up a function to wrap lines for R < 2.5 + if ( version->parse($self->version) < version->parse('2.5.0') ) { + print "DBG: Need to wrap to ".MAXLINELEN."\n" if DEBUG; + require Text::Wrap; + $Text::Wrap::columns = MAXLINELEN; + $Text::Wrap::break = ','; + $Text::Wrap::huge = 'overflow'; + $Text::Wrap::separator = ",\n"; + $WRAP_LINES = sub { return Text::Wrap::wrap('', '', shift) }; + } + } return $status; @@ -312,8 +386,9 @@ sub start { sub stop { my ($self) = @_; my $status = 1; - if ($self->is_started) { - $status = $self->bridge->finish or die "Error stopping $prog: $?\n"; + if ( $self->is_started ) { + $status = $self->_bridge->finish or die "Error stopping ".PROG.": $?\n"; + print "DBG: Stopped R\n" if DEBUG; } return $status; } @@ -326,20 +401,41 @@ sub restart { sub is_started { - # Query whether or not R is currently running - return shift->bridge->{STATE} eq IPC::Run::_started ? 1 : 0; + # Query whether or not R has been started and is still running - hackish. + # See https://rt.cpan.org/Ticket/Display.html?id=70595 + my ($self) = @_; + my $is_started = 0; + my $bridge = $self->_bridge; + if (defined $bridge && not $self->{died}) { + if (not exists $bridge->{STATE}) { + die "Internal error: could not get STATE from IPC::Run\n"; + } + if ($bridge->{STATE} eq IPC::Run::_started && $bridge->pumpable) { + $is_started = 1; + } + } + return $is_started; } sub pid { - # Get (/ set) the PID of the running R process. It is accessible only after - # the bridge has start()ed - return shift->bridge->{KIDS}->[0]->{PID}; + # Get (or set) the PID of the running R process - hackish. + # See https://rt.cpan.org/Ticket/Display.html?id=70595It + # The PID is accessible only after the bridge has start()ed. + my ($self) = @_; + my $bridge = $self->_bridge; + if ( not exists $bridge->{KIDS} ) { + die "Internal error: could not get KIDS from IPC::Run\n"; + } + if ( not exists $bridge->{KIDS}->[0]->{PID} ) { + die "Internal error: could not get PID from IPC::Run\n"; + } + return $bridge->{KIDS}->[0]->{PID}; } sub bin { - # Get / set the full path to the R binary program to use. Unless you have set + # Get or set the full path to the R binary program to use. Unless you have set # the path yourself, it is accessible only after the bridge has start()ed my ($self, $val) = @_; if (defined $val) { @@ -349,6 +445,13 @@ sub bin { } +sub version { + # Get the version of R, e.g. '3.1.1' + my ($self) = @_; + return $self->run(q`write(paste(sep=".",R.Version()$major,R.Version()$minor), stdout())`); +} + + sub run { # Pass the input and get the output my ($self, @cmds) = @_; @@ -356,38 +459,51 @@ sub run { # Need to start R now if it is not already running $self->start if not $self->is_started; - # Process each command my $results = ''; for my $cmd (@cmds) { # Wrap command for execution in R - $self->stdin( $self->wrap_cmd($cmd) ); + print "DBG: Command is '$cmd'\n" if DEBUG; + $self->_stdin( $self->wrap_cmd($cmd) ); + print "DBG: stdin is '".$self->_stdin."'\n" if DEBUG; # Pass input to R and get its output - my $bridge = $self->bridge; - while ( $self->stdout !~ m/$eos_re/gc && $bridge->pumpable ) { + my $bridge = $self->_bridge; + while ( $self->_stdout !~ EOS_RE && $bridge->pumpable ) { $bridge->pump; } - # Parse outputs, detect errors - my $out = $self->stdout; - $out =~ s/$eos_re//g; + # Parse output, detect errors + my $out = $self->_stdout; + $out =~ s/${\(EOS_RE)}//; chomp $out; - my $err = $self->stderr; + my $err = $self->_stderr; chomp $err; - if ($out =~ m//sg) { - # Parse (multi-line) error message - my $err_msg = $1."\n".$err; - die "Problem running the R command:\n$cmd\n\nGot the error:\n$err_msg\n"; + + print "DBG: stdout is '$out'\n" if DEBUG; + print "DBG: stderr is '$err'\n" if DEBUG; + + if ($err =~ $ERROR_RE) { + # Catch errors on stderr. Leave warnings alone. + print "DBG: Error\n" if DEBUG; + $self->{died} = 1; # for proper cleanup after failed eval + my $err_msg = "Error:\n".$1; + if ( $err_msg =~ /unrecognized escape in character string/ && + version->parse($self->version) < version->parse('2.5.0') ) { + $err_msg .= "\nMost likely, the given R command contained lines ". + "exceeding ".MAXLINELEN." characters."; + } + $self->_stdout(''); + $self->_stderr(''); + die "Problem while running this R command:\n$cmd\n\n$err_msg\n"; } - + # Save results and reinitialize $results .= "\n" if $results; $results .= $err.$out; - $self->stdout(''); - $self->stderr(''); - + $self->_stdout(''); + $self->_stderr(''); } $self->result($results); @@ -397,17 +513,49 @@ sub run { sub run_from_file { - my ($self, $file) = @_; - my $results = $self->run( qq`source('$file')` ); + # Execute commands in given file: first, convert filepath to an R-compatible + # format and then pass it to source(). + my ($self, $filepath) = @_; + if (not -f $filepath) { + die "Error: '$filepath' does not seem to exist or is not a file.\n"; + } + + # Split filepath + my ($volume, $directories, $filename) = splitpath($filepath); + my @elems; + push @elems, $volume if $volume; # $volume is '' if unused + push @elems, splitdir($directories); + push @elems, $filename; + + # Use file.path to create an R-compatible filename (bug #77761), e.g.: + # file <- file.path("E:", "DATA", "example.csv") + # Then use source() to read file and execute the commands it contains + # source(file) + my $cmd = 'source(file.path('.join(',',map {'"'.$_.'"'}@elems).'))'; + my $results = $self->run($cmd); + return $results; } +sub result { + # Get / set result of last R command + my ($self, $val) = @_; + if (defined $val) { + $self->{result} = $val; + } + return $self->{result}; +} + + sub set { # Assign a variable or array of variables in R. Use undef if you want to # assign NULL to an R variable my ($self, $varname, $arr) = @_; + # Start R now if it is not already running + $self->start if not $self->is_started; + # Check variable type, convert everything into an arrayref my $ref = ref $arr; if ($ref eq '') { @@ -420,19 +568,21 @@ sub set { } # Quote strings and nullify undef variables - for (my $i = 0; $i < scalar @$arr; $i++) { + for my $i (0 .. scalar @$arr - 1) { if (defined $$arr[$i]) { - if ( $$arr[$i] !~ /^$RE{num}{real}$/ ) { - $$arr[$i] = '"'.$$arr[$i].'"'; + if ( $$arr[$i] !~ NUMBER_RE ) { + $$arr[$i] = _quote( $$arr[$i] ); } } else { $$arr[$i] = 'NULL'; } } - # Build a string and run it to import data - my $cmd = $varname.' <- c('.join(', ',@$arr).')'; - $self->run($cmd); + # Build a variable assignment command and run it! + my $cmd = $varname.'<-c('.join(',',@$arr).')'; + $cmd = &$WRAP_LINES( $cmd ); + $self->run( $cmd ); + return 1; } @@ -446,13 +596,13 @@ sub get { my $value; if ($string eq 'NULL') { $value = undef; - } elsif ($string =~ m/^\s*\[\d+\]/) { + } elsif ($string =~ ILINE_RE) { # Vector: its string look like: # ' [1] 6.4 13.3 4.1 1.3 14.1 10.6 9.9 9.6 15.3 # [16] 5.2 10.9 14.4' my @lines = split /\n/, $string; - for (my $i = 0; $i < scalar @lines; $i++) { - $lines[$i] =~ s/^\s*\[\d+\] //; + for my $i (0 .. scalar @lines - 1) { + $lines[$i] =~ s/${\(ILINE_RE)}//; } $value = join ' ', @lines; } else { @@ -461,10 +611,8 @@ sub get { # String looks like: ' mean # 10.41111 ' # Extract value from second line - $value = $lines[1]; - $value =~ s/^\s*(\S+)\s*$/$1/; + $value = _trim( $lines[1] ); } else { - #die "Error: Don't know how to handle this R output\n$string\n"; $value = $string; } } @@ -474,20 +622,27 @@ sub get { if (not defined $value) { @arr = ( undef ); } else { - # Split string into an array, paying attention to strings containing spaces - @arr = extract_multiple( $value, [sub { extract_delimited($_[0],q{ '"}) },] ); - for (my $i = 0; $i < scalar @arr; $i++) { - my $elem = $arr[$i]; - if ($elem =~ m/^\s*$/) { - # Remove elements that are simply whitespaces - splice @arr, $i, 1; - $i--; - } else { - # Trim whitespaces - $arr[$i] =~ s/^\s*(.*?)\s*$/$1/; - # Remove double-quotes - $arr[$i] =~ s/^"(.*)"$/$1/; + # Split string into an array, paying attention to strings containing spaces: + # extract_delim should be enough but we use extract_delim + split because + # of Text::Balanced bug #73416 + if ($value =~ m{['"]}) { + @arr = extract_multiple( $value, [sub { extract_delimited($_[0],q{'"}) },] ); + my $nof_empty = 0; + for my $i (0 .. scalar @arr - 1) { + my $elem = $arr[$i]; + if ($arr[$i] =~ BLANK_RE) { + # Remove elements that are simply whitespaces later, in a single operation + $nof_empty++; + } else { + # Trim and unquote + $arr[$i-$nof_empty] = _unquote( _trim($elem) ); + } + } + if ($nof_empty > 0) { + splice @arr, -$nof_empty, $nof_empty; } + } else { + @arr = split( /\s+/, _trim($value) ); } } @@ -506,35 +661,31 @@ sub get { #---------- INTERNAL METHODS --------------------------------------------------# -sub initialize { +sub _initialize { my ($self, %args) = @_; - # Path of R binary - my $bin; - if ( $args{ r_bin } || $args{ R_bin } ) { - $bin = $args{ r_bin } || $args{ R_bin }; - } else { - $bin = $prog; # IPC::Run will find the full path for the program later - } - $self->bin( $bin ); + # Full path of R binary specified by bin (r_bin or R_bin for backward + # compatibility), or executable name (IPC::Run will find its full path later) + $self->bin( $args{bin} || $args{r_bin} || $args{R_bin} || PROG ); # Using shared mode? - if ( exists($args{shared}) && ($args{shared} == 1) ) { + if ( exists $args{shared} && $args{shared} == 1 ) { $self->is_shared( 1 ); } else { $self->is_shared( 0 ); } # Build the bridge - $self->bridge( 1 ); + $self->_bridge( 1 ); return 1; } -sub bridge { +sub _bridge { # Get or build the communication bridge and IOs with R my ($self, $build) = @_; + my %params = ( debug => 0 ); if ($build) { my $cmd = [ $self->bin, '--vanilla', '--slave' ]; if (not $self->is_shared) { @@ -542,14 +693,14 @@ sub bridge { $self->{stdin} = \$stdin; $self->{stdout} = \$stdout; $self->{stderr} = \$stderr; - $self->{bridge} = harness $cmd, $self->{stdin}, $self->{stdout}, $self->{stderr}; + $self->{bridge} = harness $cmd, $self->{stdin}, $self->{stdout}, $self->{stderr}, %params; } else { $self->{stdin} = \$SHARED_STDIN ; $self->{stdout} = \$SHARED_STDOUT; $self->{stderr} = \$SHARED_STDERR; if (not defined $SHARED_BRIDGE) { - # The first Statics::R instance builds the bridge - $SHARED_BRIDGE = harness $cmd, $self->{stdin}, $self->{stdout}, $self->{stderr}; + # The first Statistics::R instance builds the bridge + $SHARED_BRIDGE = harness $cmd, $self->{stdin}, $self->{stdout}, $self->{stderr}, %params; } $self->{bridge} = $SHARED_BRIDGE; } @@ -558,7 +709,7 @@ sub bridge { } -sub stdin { +sub _stdin { # Get / set standard input string for R my ($self, $val) = @_; if (defined $val) { @@ -568,7 +719,7 @@ sub stdin { } -sub stdout { +sub _stdout { # Get / set standard output string for R my ($self, $val) = @_; if (defined $val) { @@ -578,7 +729,7 @@ sub stdout { } -sub stderr { +sub _stderr { # Get / set standard error string for R my ($self, $val) = @_; if (defined $val) { @@ -588,29 +739,91 @@ sub stderr { } -sub result { - # Get / set result of last R command - my ($self, $val) = @_; - if (defined $val) { - $self->{result} = $val; - } - return $self->{result}; -} - - sub wrap_cmd { # Wrap a command to pass to R. Whether the command is successful or not, the # end of stream string will appear on stdout and indicate that R has finished # processing the data. Note that $cmd can be multiple R commands. my ($self, $cmd) = @_; + chomp $cmd; + $cmd =~ s/;$//; + $cmd .= qq`; write("`.EOS.qq`",stdout())\n`; + return $cmd; +} - # Escape double-quotes - $cmd =~ s/"/\\"/g; - # Evaluate command (and catch syntax and runtime errors) - $cmd = qq`tryCatch( eval(parse(text="$cmd")) , error = function(e){print(e)} ); write("$eos",stdout())\n`; +sub _generate_error_re { + # Generate a regular expression to catch R internal errors, e.g.: + # Error: object 'zzz' not found" + # Error in print(ASDF) : object 'ASDF' not found + my ($self) = @_; + $ERROR_RE = qr/^(?:$ERROR_STR_1|$ERROR_STR_2)\s*(.*)$/s; + print "DBG: Regexp for catching errors is '$ERROR_RE'\n" if DEBUG; + return 1; +} - return $cmd; + +sub _localize_error_str { + # Find the translation for the R error strings. Internationalization is + # present in R >=2.1, with Natural Language Support enabled. + my ($self) = @_; + my @strings; + for my $error_str ($ERROR_STR_1, $ERROR_STR_2) { + my $cmd = qq`write(ngettext(1, "$error_str", "", domain="R"), stdout())`; + $self->set('cmd', $cmd); + # Try to translate string, return '' if not possible + my $str = $self->run(q`tryCatch( eval(parse(text=cmd)) , error=function(e){write("",stdout())} )`); + $str ||= $error_str; + push @strings, $str; + } + ($ERROR_STR_1, $ERROR_STR_2) = @strings; + return 1; +} + + +sub DESTROY { + # The bridge to R is not automatically bombed when Statistics::R instances + # get out of scope. Do it now (unless running in shared mode)! + my ($self) = @_; + if (not $self->is_shared) { + $self->stop; + } +} + + +#---------- HELPER SUBS -------------------------------------------------------# + + +sub _trim { + # Remove flanking whitespaces + my ($str) = @_; + $str =~ s{^\s+}{}; + $str =~ s{\s+$}{}; + return $str; +} + + +sub _quote { + # Quote a string for use in R. We use double-quotes because the documentation + # Quotes {base} R documentation states that this is preferred over single- + # quotes. Double-quotes inside the string are escaped. + my ($str) = @_; + # Escape " by \" , \" by \\\" , ... + $str =~ s/ (\\*) " / '\\' x (2*length($1)+1) . '"' /egx; + # Surround by " + $str = qq("$str"); + return $str; +} + + +sub _unquote { + # Opposite of _quote + my ($str) = @_; + # Remove surrounding " + $str =~ s{^"}{}; + $str =~ s{"$}{}; + # Interpolate (de-escape) \\\" to \" , \" to " , ... + $str =~ s/ ((?:\\\\)*) \\ " / '\\' x (length($1)*0.5) . '"' /egx; + return $str; }