From 5e71068076c23e49ee5655b94ea2c4b403790cbf Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Sat, 17 Sep 2022 08:11:53 -0700 Subject: [PATCH] New upstream version 0.34 --- Changes | 50 ++- MANIFEST | 3 + META.yml | 16 +- MYMETA.json | 57 ++++ MYMETA.yml | 33 ++ Makefile.PL | 11 +- README | 131 +++++--- inc/Module/Install.pm | 24 +- inc/Module/Install/Base.pm | 2 +- inc/Module/Install/Can.pm | 85 +++++- inc/Module/Install/External.pm | 21 +- inc/Module/Install/Fetch.pm | 2 +- inc/Module/Install/Makefile.pm | 24 +- inc/Module/Install/Metadata.pm | 6 +- inc/Module/Install/Win32.pm | 2 +- inc/Module/Install/WriteAll.pm | 2 +- lib/Statistics/R.pm | 541 +++++++++++++++++++++++---------- lib/Statistics/R/Legacy.pm | 9 +- lib/Statistics/R/Win32.pm | 62 ++-- t/00-load.t | 6 +- t/01-pod.t | 2 + t/02-legacy.t | 5 +- t/03-run.t | 30 +- t/04-start-stop.t | 5 +- t/05-shared.t | 5 +- t/06-get-set.t | 30 +- t/07-robust.t | 28 +- t/08-errors.t | 52 +++- t/FlawedStatisticsR.pm | 16 + 29 files changed, 921 insertions(+), 339 deletions(-) create mode 100644 MYMETA.json create mode 100644 MYMETA.yml create mode 100644 t/FlawedStatisticsR.pm diff --git a/Changes b/Changes index 5635c25..4982e92 100644 --- a/Changes +++ b/Changes @@ -1,7 +1,52 @@ Revision history for Perl extension Statistics::R. +0.34 2015-10-19 + - Fixed obscure issue in DESTROY (reported by Pär Larsson, RT #107246) + +0.33 2014-08-28 + - Better way to set large arrays (patch by Ken Yamaguchi, RT #97359) + - Properly handle the quit() command (reported by tecolo, Github #5) + - Prefer using the 'bin' constructor instead of 'r_bin' + - Method version() to get the version of R + - Better handling of R internationalization + +0.32 2013-12-18 + - Fixed POD error (reported by Srividya Vaidyanathan, RT #91438) + +0.31 2013-02-07 + - Simplification and speedup of communications with R + - Handle multiple locales when looking for errors (patch by Jean Véronis and + Brian Cassidy) + +0.30 2012-11-15 + - Skip tests that hang on Windows (thanks Clifford Sobchuk and Gisbert W. + Selke, RT #77761) + - Automatically destroy the R bridge when Statistics::R goes out of scope + (unless running in shared mode) + +0.29 2012-11-07 + - Fixed cross-platform filename problem in run_from_file (thanks Clifford + Sobchuk, RT #77761) + +0.28 2012-11-06 + - Fixed packaging issue by repackaging with Module::Install version 1.06 + - Fixed character causing failure of POD test + +0.27 2012-03-22 + - Better handling of R line length limits + - Better quoting of strings passed to R + - Optimizations + +0.26 2012-01-28 + - Support more R installation paths in Windows (patch by Adam Kennedy) + +0.25 2011-12-21 + - Fixed a bug in the get() method (reported by Manuel A. Alonso Tarajano, + patched by Brian Cassidy) + 0.24 2011-11-09 - - Require Text::Balanced >= 0.97 to prevent bad surprises (reported by Ryan Golhar) + - Require Text::Balanced >= 0.97 to prevent bad surprises (reported by Ryan + Golhar) 0.23 2011-10-28 - Arrays of number-containing strings are now handled properly (RT bug @@ -31,7 +76,8 @@ Revision history for Perl extension Statistics::R. - Refactoring to remove old code doing platform-specific operations. - Lots of code cleanup - Removed the now useless r_dir and tmp_dir options of new() - - Fix for change of dir bug (RT #6724). Also fixes missing synopsis file (RT #70307) + - Fix for change of dir bug (RT #6724). Also fixes missing synopsis file (RT + #70307) - More subtle cleanup procedure (RT #70392) 0.09 2011-08-23 diff --git a/MANIFEST b/MANIFEST index 0199ad1..d613ebb 100644 --- a/MANIFEST +++ b/MANIFEST @@ -14,6 +14,8 @@ lib/Statistics/R/Win32.pm Makefile.PL MANIFEST This list of files META.yml +MYMETA.json +MYMETA.yml README t/00-load.t t/01-pod.t @@ -25,3 +27,4 @@ t/06-get-set.t t/07-robust.t t/08-errors.t t/data/script.R +t/FlawedStatisticsR.pm diff --git a/META.yml b/META.yml index bbbd233..9379783 100644 --- a/META.yml +++ b/META.yml @@ -3,13 +3,13 @@ abstract: 'Perl interface with the R statistical program' author: - 'Florent Angly (2011 rewrite)' build_requires: - ExtUtils::MakeMaker: 6.56 - Test::More: 0.47 + ExtUtils::MakeMaker: 6.59 + Test::More: '0.47' configure_requires: - ExtUtils::MakeMaker: 6.56 + ExtUtils::MakeMaker: 6.59 distribution_type: module dynamic_config: 1 -generated_by: 'Module::Install version 1.04' +generated_by: 'Module::Install version 1.16' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html @@ -20,13 +20,15 @@ no_index: - inc - t requires: - IPC::Run: 0 + IPC::Run: '0.1' Regexp::Common: 0 - Text::Balanced: 1.97 + Text::Balanced: '1.97' + Text::Wrap: 0 perl: 5.6.0 + version: '0.77' resources: bugtracker: http://rt.cpan.org/Dist/Display.html?Name=Statistics-R homepage: http://search.cpan.org/search?query=statistics%3A%3AR&mode=dist license: http://dev.perl.org/licenses/ repository: git@github.com:bricas/statistics-r.git -version: 0.24 +version: '0.34' diff --git a/MYMETA.json b/MYMETA.json new file mode 100644 index 0000000..3134de6 --- /dev/null +++ b/MYMETA.json @@ -0,0 +1,57 @@ +{ + "abstract" : "Perl interface with the R statistical program", + "author" : [ + "Florent Angly (2011 rewrite)" + ], + "dynamic_config" : 0, + "generated_by" : "Module::Install version 1.16, CPAN::Meta::Converter version 2.150005", + "license" : [ + "perl_5" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "name" : "Statistics-R", + "no_index" : { + "directory" : [ + "inc", + "t" + ] + }, + "prereqs" : { + "build" : { + "requires" : { + "ExtUtils::MakeMaker" : "6.59", + "Test::More" : "0.47" + } + }, + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "runtime" : { + "requires" : { + "IPC::Run" : "0.1", + "Regexp::Common" : "0", + "Text::Balanced" : "1.97", + "Text::Wrap" : "0", + "perl" : "5.006", + "version" : "0.77" + } + } + }, + "release_status" : "stable", + "resources" : { + "bugtracker" : { + "web" : "http://rt.cpan.org/Dist/Display.html?Name=Statistics-R" + }, + "homepage" : "http://search.cpan.org/search?query=statistics%3A%3AR&mode=dist", + "license" : [ + "http://dev.perl.org/licenses/" + ] + }, + "version" : "0.33", + "x_serialization_backend" : "JSON::PP version 2.27203" +} diff --git a/MYMETA.yml b/MYMETA.yml new file mode 100644 index 0000000..d208ce3 --- /dev/null +++ b/MYMETA.yml @@ -0,0 +1,33 @@ +--- +abstract: 'Perl interface with the R statistical program' +author: + - 'Florent Angly (2011 rewrite)' +build_requires: + ExtUtils::MakeMaker: '6.59' + Test::More: '0.47' +configure_requires: + ExtUtils::MakeMaker: '0' +dynamic_config: 0 +generated_by: 'Module::Install version 1.16, CPAN::Meta::Converter version 2.150005' +license: perl +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: '1.4' +name: Statistics-R +no_index: + directory: + - inc + - t +requires: + IPC::Run: '0.1' + Regexp::Common: '0' + Text::Balanced: '1.97' + Text::Wrap: '0' + perl: '5.006' + version: '0.77' +resources: + bugtracker: http://rt.cpan.org/Dist/Display.html?Name=Statistics-R + homepage: http://search.cpan.org/search?query=statistics%3A%3AR&mode=dist + license: http://dev.perl.org/licenses/ +version: '0.33' +x_serialization_backend: 'CPAN::Meta::YAML version 0.012' diff --git a/Makefile.PL b/Makefile.PL index 4d7adc3..22f773b 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -1,7 +1,4 @@ -use strict; -use warnings; - -use inc::Module::Install; +use inc::Module::Install 1.04; use lib 'lib'; if ( $^O =~ m/^(?:.*?win32|dos)$/i ) { @@ -19,9 +16,11 @@ resources build_requires 'Test::More' => '0.47'; +requires 'IPC::Run' => '0.1'; # availability of $self->{STATE} +requires 'Regexp::Common' => 0; requires 'Text::Balanced' => '1.97'; -requires 'Regexp::Common' => '0'; -requires 'IPC::Run' => '0'; +requires 'Text::Wrap' => 0; +requires 'version' => '0.77'; requires_external_bin 'R'; diff --git a/README b/README index 7d474a3..639d26a 100644 --- a/README +++ b/README @@ -4,14 +4,15 @@ NAME DESCRIPTION *Statistics::R* is a module to controls the R interpreter (R project for statistical computing: ). It lets you start - R, pass commands to it and retrieve the output. A shared mode allow to - have several instances of *Statistics::R* talk to the same R process. + R, pass commands to it and retrieve their output. A shared mode allows + several instances of *Statistics::R* to talk to the same R process. - The current *Statistics::R* implementation uses pipes (for stdin, stdout - and and stderr) to communicate with R. This implementation should be - more efficient and reliable than that in previous version, which relied - on reading and writing files. As before, this module works on GNU/Linux, - MS Windows and probably many more systems. + The current *Statistics::R* implementation uses pipes (stdin, stdout and + stderr) to communicate with R. This implementation is more efficient and + reliable than that in versions < 0.20, which relied on reading and + writing intermediary files. As before, this module works on GNU/Linux, + MS Windows and probably many more systems. *Statistics::R* has been + tested with R version 2 and 3. SYNOPSIS use Statistics::R; @@ -21,7 +22,7 @@ SYNOPSIS # Run simple R commands my $output_file = "file.ps"; - $R->run(qq`postscript("$output_file" , horizontal=FALSE , width=500 , height=500 , pointsize=1)`); + $R->run(qq`postscript("$output_file", horizontal=FALSE, width=500, height=500)`); $R->run(q`plot(c(1, 5, 10), type = "l")`); $R->run(q`dev.off()`); @@ -36,12 +37,11 @@ SYNOPSIS METHODS new() - Build a *Statistics::R* bridge object between Perl and R. Available - options are: + Build a *Statistics::R* bridge object connecting Perl and R. + Available options are: - r_bin - Specify the full path to R if it is not automatically found. See - INSTALLATION. + bin Specify the full path to the R executable, if it is not + automatically found. See "INSTALLATION". shared Start a shared bridge. When using a shared bridge, several @@ -57,22 +57,27 @@ METHODS my $x = $R2->get( 'x' ); print "x = $x\n"; - Do not call the *stop()* method is you still have processes that - need to interact with R. + $R1->stop; # or $R2->stop + + Note that in shared mode, you are responsible for calling the + *stop()* method from one of your Statistics::R instances when + you are finished. But be careful not to call the *stop()* method + if you still have processes that need to interact with R! run() - First, start() R if it is not yet running. Then, execute R commands - passed as a string and return the output as a string. If your - command fails to run in R, an error message will be displayed. + First, *start()* R if it is not yet running. Then, execute R + commands passed as a string and return the output as a string. If + your commands failed to run in R, an error message will be + displayed. Example: my $out = $R->run( q`print( 1 + 2 )` ); If you intend on runnning many R commands, it may be convenient to - pass an array of commands or put multiple commands in an here-doc: + pass a list of commands or put multiple commands in an here-doc: - # Array of R commands: + # List of R commands: my $out1 = $R->run( q`a <- 2`, q`b <- 5`, @@ -89,52 +94,80 @@ METHODS EOF my $out2 = $R->run($cmds); - To run commands from a file, see the run_from_file() method. + Alternatively, to run commands from a file, use the + *run_from_file()* method. + + The return value you get from *run()* is a combination of what R + would display on the standard output and the standard error, but the + exact order may differ. - The output you get from run() is the combination of what R would - display on the standard output and the standard error, but the order - may differ. When loading modules, some may write numerous messages - on standard error. You can disable this behavior using the following - R command: + When loading modules, some may write numerous messages on standard + error. You can disable this behavior using the following R command: suppressPackageStartupMessages(library(library_to_load)) + Note that older versions of R impose a limit on how many characters + can be contained on a line: about 4076 bytes maximum. You will be + warned if this occurs, with an error message stating: + + '\0' is an unrecognized escape in character string starting "... + + In this case, try to break down your R code into several smaller, + more manageable statements. Alternatively, adding newline characters + "\n" at strategic places in the R statements will work around the + issue. + run_from_file() - Similar to run() but reads the R commands from the specified file. - Internally, this method uses the R source() command to read the - file. + Similar to *run()* but reads the R commands from the specified file. + Internally, this method converts the filename to a format compatible + with R and then passes it to the R *source()* command to read the + file and execute the commands. + + result() + Get the results from the last R command. set() - Set the value of an R variable (scalar or arrayref). Example: + Set the value of an R variable (scalar or vector). Example: + # Create an R scalar $R->set( 'x', 'pear' ); or + # Create an R list $R->set( 'y', [1, 2, 3] ); get() - Get the value of an R variable (scalar or arrayref). Example: + Get the value of an R variable (scalar or vector). Example: - my $x = $R->get( 'x' ); # $y is an scalar + # Retrieve an R scalar. $x is a Perl scalar. + my $x = $R->get( 'x' ); or - my $y = $R->get( 'y' ); # $x is an arrayref + # Retrieve an R list. $x is a Perl arrayref. + my $y = $R->get( 'y' ); start() Explicitly start R. Most times, you do not need to do that because - the first execution of run() or set() will automatically call - start(). + the first execution of *run()* or *set()* will automatically call + *start()*. stop() - Stop a running instance of R. + Stop a running instance of R. You need to call this method after + running a shared bridge. For a simple bridge, you do not need to do + this because *stop()* is automatically called when the Statistics::R + object goes out of scope. restart() - stop() and start() R. + *stop()* and *start()* R. bin() - Get or set the path to the R executable. + Get or set the path to the R executable. Note that the path will be + available only after start() has been called. + + version() + Get the version number of R. is_shared() Was R started in shared mode? @@ -143,7 +176,7 @@ METHODS Is R running? pid() - Return the pid of the running R process + Return the PID of the running R process INSTALLATION Since *Statistics::R* relies on R to work, you need to install R first. @@ -154,16 +187,18 @@ INSTALLATION *Statistics::R* working. On Windows systems, in addition to the folders described in PATH, the usual suspects will be checked for the presence of the R binary, e.g. C:\Program Files\R. If *Statistics::R* does not - find R installation, your last recourse is to specify its full path when - calling new(): + find where R is installed, your last recourse is to specify its full + path when calling new(): - my $R = Statistics::R->new( r_bin => $fullpath ); + my $R = Statistics::R->new( bin => $fullpath ); You also need to have the following CPAN Perl modules installed: - Text::Balanced (>= 1.97) - Regexp::Common IPC::Run + Regexp::Common + Text::Balanced (>= 1.97) + Text::Wrap + version (>= 0.77) SEE ALSO * Statistics::R::Win32 @@ -172,7 +207,7 @@ SEE ALSO * The R-project web site: - * Statistics:: modules for Perl: + * Statistics::* modules for Perl: AUTHORS @@ -180,7 +215,9 @@ AUTHORS Graciliano M. P. (original code) -MAINTAINER +MAINTAINERS + Florent Angly + Brian Cassidy COPYRIGHT & LICENSE @@ -196,5 +233,5 @@ BUGS is developed on Github () and is under Git revision control. To get the latest revision, run: - git clone git@github.com:bricas/statistics-r.git + git clone git://github.com/bricas/statistics-r.git diff --git a/inc/Module/Install.pm b/inc/Module/Install.pm index c685ca4..f44ab4d 100644 --- a/inc/Module/Install.pm +++ b/inc/Module/Install.pm @@ -17,7 +17,7 @@ package Module::Install; # 3. The ./inc/ version of Module::Install loads # } -use 5.005; +use 5.006; use strict 'vars'; use Cwd (); use File::Find (); @@ -31,7 +31,7 @@ BEGIN { # This is not enforced yet, but will be some time in the next few # releases once we can make sure it won't clash with custom # Module::Install extensions. - $VERSION = '1.04'; + $VERSION = '1.16'; # Storage for the pseudo-singleton $MAIN = undef; @@ -156,10 +156,10 @@ END_DIE sub autoload { my $self = shift; my $who = $self->_caller; - my $cwd = Cwd::cwd(); + my $cwd = Cwd::getcwd(); my $sym = "${who}::AUTOLOAD"; $sym->{$cwd} = sub { - my $pwd = Cwd::cwd(); + my $pwd = Cwd::getcwd(); if ( my $code = $sym->{$pwd} ) { # Delegate back to parent dirs goto &$code unless $cwd eq $pwd; @@ -239,7 +239,7 @@ sub new { # ignore the prefix on extension modules built from top level. my $base_path = Cwd::abs_path($FindBin::Bin); - unless ( Cwd::abs_path(Cwd::cwd()) eq $base_path ) { + unless ( Cwd::abs_path(Cwd::getcwd()) eq $base_path ) { delete $args{prefix}; } return $args{_self} if $args{_self}; @@ -338,7 +338,7 @@ sub find_extensions { if ( $subpath eq lc($subpath) || $subpath eq uc($subpath) ) { my $content = Module::Install::_read($subpath . '.pm'); my $in_pod = 0; - foreach ( split //, $content ) { + foreach ( split /\n/, $content ) { $in_pod = 1 if /^=\w/; $in_pod = 0 if /^=cut/; next if ($in_pod || /^=cut/); # skip pod text @@ -378,6 +378,7 @@ eval( $] >= 5.006 ? <<'END_NEW' : <<'END_OLD' ); die $@ if $@; sub _read { local *FH; open( FH, '<', $_[0] ) or die "open($_[0]): $!"; + binmode FH; my $string = do { local $/; }; close FH or die "close($_[0]): $!"; return $string; @@ -386,6 +387,7 @@ END_NEW sub _read { local *FH; open( FH, "< $_[0]" ) or die "open($_[0]): $!"; + binmode FH; my $string = do { local $/; }; close FH or die "close($_[0]): $!"; return $string; @@ -416,6 +418,7 @@ eval( $] >= 5.006 ? <<'END_NEW' : <<'END_OLD' ); die $@ if $@; sub _write { local *FH; open( FH, '>', $_[0] ) or die "open($_[0]): $!"; + binmode FH; foreach ( 1 .. $#_ ) { print FH $_[$_] or die "print($_[0]): $!"; } @@ -425,6 +428,7 @@ END_NEW sub _write { local *FH; open( FH, "> $_[0]" ) or die "open($_[0]): $!"; + binmode FH; foreach ( 1 .. $#_ ) { print FH $_[$_] or die "print($_[0]): $!"; } @@ -434,7 +438,7 @@ END_OLD # _version is for processing module versions (eg, 1.03_05) not # Perl versions (eg, 5.8.1). -sub _version ($) { +sub _version { my $s = shift || 0; my $d =()= $s =~ /(\.)/g; if ( $d >= 2 ) { @@ -450,12 +454,12 @@ sub _version ($) { return $l + 0; } -sub _cmp ($$) { +sub _cmp { _version($_[1]) <=> _version($_[2]); } # Cloned from Params::Util::_CLASS -sub _CLASS ($) { +sub _CLASS { ( defined $_[0] and @@ -467,4 +471,4 @@ sub _CLASS ($) { 1; -# Copyright 2008 - 2011 Adam Kennedy. +# Copyright 2008 - 2012 Adam Kennedy. diff --git a/inc/Module/Install/Base.pm b/inc/Module/Install/Base.pm index b520616..5762a74 100644 --- a/inc/Module/Install/Base.pm +++ b/inc/Module/Install/Base.pm @@ -4,7 +4,7 @@ package Module::Install::Base; use strict 'vars'; use vars qw{$VERSION}; BEGIN { - $VERSION = '1.04'; + $VERSION = '1.16'; } # Suspend handler for "redefined" warnings diff --git a/inc/Module/Install/Can.pm b/inc/Module/Install/Can.pm index a162ad4..d859276 100644 --- a/inc/Module/Install/Can.pm +++ b/inc/Module/Install/Can.pm @@ -3,13 +3,12 @@ package Module::Install::Can; use strict; use Config (); -use File::Spec (); use ExtUtils::MakeMaker (); use Module::Install::Base (); use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.04'; + $VERSION = '1.16'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } @@ -29,7 +28,7 @@ sub can_use { eval { require $mod; $pkg->VERSION($ver || 0); 1 }; } -# check if we can run some command +# Check if we can run some command sub can_run { my ($self, $cmd) = @_; @@ -38,14 +37,88 @@ sub can_run { for my $dir ((split /$Config::Config{path_sep}/, $ENV{PATH}), '.') { next if $dir eq ''; - my $abs = File::Spec->catfile($dir, $_[1]); + require File::Spec; + my $abs = File::Spec->catfile($dir, $cmd); return $abs if (-x $abs or $abs = MM->maybe_command($abs)); } return; } -# can we locate a (the) C compiler +# Can our C compiler environment build XS files +sub can_xs { + my $self = shift; + + # Ensure we have the CBuilder module + $self->configure_requires( 'ExtUtils::CBuilder' => 0.27 ); + + # Do we have the configure_requires checker? + local $@; + eval "require ExtUtils::CBuilder;"; + if ( $@ ) { + # They don't obey configure_requires, so it is + # someone old and delicate. Try to avoid hurting + # them by falling back to an older simpler test. + return $self->can_cc(); + } + + # Do we have a working C compiler + my $builder = ExtUtils::CBuilder->new( + quiet => 1, + ); + unless ( $builder->have_compiler ) { + # No working C compiler + return 0; + } + + # Write a C file representative of what XS becomes + require File::Temp; + my ( $FH, $tmpfile ) = File::Temp::tempfile( + "compilexs-XXXXX", + SUFFIX => '.c', + ); + binmode $FH; + print $FH <<'END_C'; +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" + +int main(int argc, char **argv) { + return 0; +} + +int boot_sanexs() { + return 1; +} + +END_C + close $FH; + + # Can the C compiler access the same headers XS does + my @libs = (); + my $object = undef; + eval { + local $^W = 0; + $object = $builder->compile( + source => $tmpfile, + ); + @libs = $builder->link( + objects => $object, + module_name => 'sanexs', + ); + }; + my $result = $@ ? 0 : 1; + + # Clean up all the build files + foreach ( $tmpfile, $object, @libs ) { + next unless defined $_; + 1 while unlink; + } + + return $result; +} + +# Can we locate a (the) C compiler sub can_cc { my $self = shift; my @chunks = split(/ /, $Config::Config{cc}) or return; @@ -78,4 +151,4 @@ if ( $^O eq 'cygwin' ) { __END__ -#line 156 +#line 236 diff --git a/inc/Module/Install/External.pm b/inc/Module/Install/External.pm index 062563d..88ed718 100644 --- a/inc/Module/Install/External.pm +++ b/inc/Module/Install/External.pm @@ -8,11 +8,28 @@ use Module::Install::Base (); use vars qw{$VERSION $ISCORE @ISA}; BEGIN { - $VERSION = '1.04'; + $VERSION = '1.16'; $ISCORE = 1; @ISA = qw{Module::Install::Base}; } +sub requires_xs { + my $self = shift; + + # First check for the basic C compiler + $self->requires_external_cc; + + # We need a C compiler that can build XS files + unless ( $self->can_xs ) { + print "Unresolvable missing external dependency.\n"; + print "This package requires perl's header files.\n"; + print STDERR "NA: Unable to build distribution on this platform.\n"; + exit(0); + } + + 1; +} + sub requires_external_cc { my $self = shift; @@ -63,4 +80,4 @@ sub requires_external_bin { __END__ -#line 138 +#line 171 diff --git a/inc/Module/Install/Fetch.pm b/inc/Module/Install/Fetch.pm index a412576..41d3517 100644 --- a/inc/Module/Install/Fetch.pm +++ b/inc/Module/Install/Fetch.pm @@ -6,7 +6,7 @@ use Module::Install::Base (); use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.04'; + $VERSION = '1.16'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } diff --git a/inc/Module/Install/Makefile.pm b/inc/Module/Install/Makefile.pm index 035cef2..e9918d2 100644 --- a/inc/Module/Install/Makefile.pm +++ b/inc/Module/Install/Makefile.pm @@ -8,7 +8,7 @@ use Fcntl qw/:flock :seek/; use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.04'; + $VERSION = '1.16'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } @@ -133,7 +133,7 @@ sub makemaker_args { return $args; } -# For mm args that take multiple space-seperated args, +# For mm args that take multiple space-separated args, # append an argument to the current list. sub makemaker_append { my $self = shift; @@ -215,13 +215,17 @@ sub write { require ExtUtils::MakeMaker; if ( $perl_version and $self->_cmp($perl_version, '5.006') >= 0 ) { - # MakeMaker can complain about module versions that include - # an underscore, even though its own version may contain one! - # Hence the funny regexp to get rid of it. See RT #35800 - # for details. - my ($v) = $ExtUtils::MakeMaker::VERSION =~ /^(\d+\.\d+)/; - $self->build_requires( 'ExtUtils::MakeMaker' => $v ); - $self->configure_requires( 'ExtUtils::MakeMaker' => $v ); + # This previous attempted to inherit the version of + # ExtUtils::MakeMaker in use by the module author, but this + # was found to be untenable as some authors build releases + # using future dev versions of EU:MM that nobody else has. + # Instead, #toolchain suggests we use 6.59 which is the most + # stable version on CPAN at time of writing and is, to quote + # ribasushi, "not terminally fucked, > and tested enough". + # TODO: We will now need to maintain this over time to push + # the version up as new versions are released. + $self->build_requires( 'ExtUtils::MakeMaker' => 6.59 ); + $self->configure_requires( 'ExtUtils::MakeMaker' => 6.59 ); } else { # Allow legacy-compatibility with 5.005 by depending on the # most recent EU:MM that supported 5.005. @@ -411,4 +415,4 @@ sub postamble { __END__ -#line 540 +#line 544 diff --git a/inc/Module/Install/Metadata.pm b/inc/Module/Install/Metadata.pm index 31c953e..9792685 100644 --- a/inc/Module/Install/Metadata.pm +++ b/inc/Module/Install/Metadata.pm @@ -6,7 +6,7 @@ use Module::Install::Base (); use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.04'; + $VERSION = '1.16'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } @@ -347,7 +347,7 @@ sub name_from { ^ \s* package \s* ([\w:]+) - \s* ; + [\s|;]* /ixms ) { my ($name, $module_name) = ($1, $1); @@ -705,7 +705,7 @@ sub _write_mymeta_data { my @yaml = Parse::CPAN::Meta::LoadFile('META.yml'); my $meta = $yaml[0]; - # Overwrite the non-configure dependency hashs + # Overwrite the non-configure dependency hashes delete $meta->{requires}; delete $meta->{build_requires}; delete $meta->{recommends}; diff --git a/inc/Module/Install/Win32.pm b/inc/Module/Install/Win32.pm index 99d9631..218a66b 100644 --- a/inc/Module/Install/Win32.pm +++ b/inc/Module/Install/Win32.pm @@ -6,7 +6,7 @@ use Module::Install::Base (); use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.04'; + $VERSION = '1.16'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } diff --git a/inc/Module/Install/WriteAll.pm b/inc/Module/Install/WriteAll.pm index 86bb25e..530749b 100644 --- a/inc/Module/Install/WriteAll.pm +++ b/inc/Module/Install/WriteAll.pm @@ -6,7 +6,7 @@ use Module::Install::Base (); use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.04'; + $VERSION = '1.16'; @ISA = qw{Module::Install::Base}; $ISCORE = 1; } diff --git a/lib/Statistics/R.pm b/lib/Statistics/R.pm index 8b3371d..7ef37f4 100644 --- a/lib/Statistics/R.pm +++ b/lib/Statistics/R.pm @@ -1,43 +1,22 @@ package Statistics::R; -use 5.006; -use strict; -use warnings; -use Regexp::Common; -use File::Spec::Functions; -use Statistics::R::Legacy; -use IPC::Run qw( harness start pump finish ); -use Text::Balanced qw ( extract_delimited extract_multiple ); - -if ( $^O =~ m/^(?:.*?win32|dos)$/i ) { - require Statistics::R::Win32; -} - -our $VERSION = '0.24'; - -our ($SHARED_BRIDGE, $SHARED_STDIN, $SHARED_STDOUT, $SHARED_STDERR); - -my $prog = 'R'; # executable we are after... R -my $eos = 'Statistics::R::EOS'; # string to signal the R output stream end -my $eos_re = qr/$eos\n$/; # regexp to match end of R stream - =head1 NAME Statistics::R - Perl interface with the R statistical program =head1 DESCRIPTION -I is a module to controls the R interpreter (R project for statistical -computing: L). It lets you start R, pass commands to -it and retrieve the output. A shared mode allow to have several instances of -I talk to the same R process. +I is a module to controls the R interpreter (R project for +statistical computing: L). It lets you start R, pass +commands to it and retrieve their output. A shared mode allows several instances +of I to talk to the same R process. -The current I implementation uses pipes (for stdin, stdout and -and stderr) to communicate with R. This implementation should be more efficient -and reliable than that in previous version, which relied on reading and writing -files. As before, this module works on GNU/Linux, MS Windows and probably many -more systems. +The current I implementation uses pipes (stdin, stdout and stderr) +to communicate with R. This implementation is more efficient and reliable than +that in versions < 0.20, which relied on reading and writing intermediary files. +As before, this module works on GNU/Linux, MS Windows and probably many more +systems. I has been tested with R version 2 and 3. =head1 SYNOPSIS @@ -48,7 +27,7 @@ more systems. # Run simple R commands my $output_file = "file.ps"; - $R->run(qq`postscript("$output_file" , horizontal=FALSE , width=500 , height=500 , pointsize=1)`); + $R->run(qq`postscript("$output_file", horizontal=FALSE, width=500, height=500)`); $R->run(q`plot(c(1, 5, 10), type = "l")`); $R->run(q`dev.off()`); @@ -67,14 +46,15 @@ more systems. =item new() -Build a I bridge object between Perl and R. Available options are: - +Build a I bridge object connecting Perl and R. Available options +are: =over 4 -=item r_bin +=item bin -Specify the full path to R if it is not automatically found. See L. +Specify the full path to the R executable, if it is not automatically found. See +L. =item shared @@ -90,26 +70,30 @@ Statistics::R can communicate with the same unique R instance. Example: my $x = $R2->get( 'x' ); print "x = $x\n"; -Do not call the I method is you still have processes that need to interact -with R. + $R1->stop; # or $R2->stop + +Note that in shared mode, you are responsible for calling the I method +from one of your Statistics::R instances when you are finished. But be careful +not to call the I method if you still have processes that need to +interact with R! =back =item run() -First, start() R if it is not yet running. Then, execute R commands passed as a -string and return the output as a string. If your command fails to run in R, an -error message will be displayed. +First, I R if it is not yet running. Then, execute R commands passed +as a string and return the output as a string. If your commands failed to run +in R, an error message will be displayed. Example: my $out = $R->run( q`print( 1 + 2 )` ); -If you intend on runnning many R commands, it may be convenient to pass an array +If you intend on runnning many R commands, it may be convenient to pass a list of commands or put multiple commands in an here-doc: - # Array of R commands: + # List of R commands: my $out1 = $R->run( q`a <- 2`, q`b <- 5`, @@ -126,58 +110,85 @@ of commands or put multiple commands in an here-doc: EOF my $out2 = $R->run($cmds); -To run commands from a file, see the run_from_file() method. +Alternatively, to run commands from a file, use the I method. + +The return value you get from I is a combination of what R would display +on the standard output and the standard error, but the exact order may differ. -The output you get from run() is the combination of what R would display on the -standard output and the standard error, but the order may differ. When loading -modules, some may write numerous messages on standard error. You can disable -this behavior using the following R command: +When loading modules, some may write numerous messages on standard error. You +can disable this behavior using the following R command: suppressPackageStartupMessages(library(library_to_load)) +Note that older versions of R impose a limit on how many characters can be +contained on a line: about 4076 bytes maximum. You will be warned if this +occurs, with an error message stating: + + '\0' is an unrecognized escape in character string starting "... + +In this case, try to break down your R code into several smaller, more +manageable statements. Alternatively, adding newline characters "\n" at +strategic places in the R statements will work around the issue. =item run_from_file() -Similar to run() but reads the R commands from the specified file. Internally, -this method uses the R source() command to read the file. +Similar to I but reads the R commands from the specified file. +Internally, this method converts the filename to a format compatible with R and +then passes it to the R I command to read the file and execute the +commands. + +=item result() + +Get the results from the last R command. =item set() -Set the value of an R variable (scalar or arrayref). Example: +Set the value of an R variable (scalar or vector). Example: + # Create an R scalar $R->set( 'x', 'pear' ); -or +or + # Create an R list $R->set( 'y', [1, 2, 3] ); - =item get() -Get the value of an R variable (scalar or arrayref). Example: +Get the value of an R variable (scalar or vector). Example: - my $x = $R->get( 'x' ); # $y is an scalar + # Retrieve an R scalar. $x is a Perl scalar. + my $x = $R->get( 'x' ); or - my $y = $R->get( 'y' ); # $x is an arrayref + # Retrieve an R list. $x is a Perl arrayref. + my $y = $R->get( 'y' ); =item start() Explicitly start R. Most times, you do not need to do that because the first -execution of run() or set() will automatically call start(). +execution of I or I will automatically call I. =item stop() -Stop a running instance of R. +Stop a running instance of R. You need to call this method after running a +shared bridge. For a simple bridge, you do not need to do this because +I is automatically called when the Statistics::R object goes out of +scope. =item restart() -stop() and start() R. +I and I R. =item bin() -Get or set the path to the R executable. +Get or set the path to the R executable. Note that the path will be available +only after start() has been called. + +=item version() + +Get the version number of R. =item is_shared() @@ -189,33 +200,37 @@ Is R running? =item pid() -Return the pid of the running R process +Return the PID of the running R process =back =head1 INSTALLATION -Since I relies on R to work, you need to install R first. See this -page for downloads, L. If R is in your PATH environment -variable, then it should be available from a terminal and be detected -automatically by I. This means that you don't have to do anything -on Linux systems to get I working. On Windows systems, in addition -to the folders described in PATH, the usual suspects will be checked for the -presence of the R binary, e.g. C:\Program Files\R. If I does not -find R installation, your last recourse is to specify its full path when calling -new(): +Since I relies on R to work, you need to install R first. See +this page for downloads, L. If R is in your PATH +environment variable, then it should be available from a terminal and be +detected automatically by I. This means that you don't have to do +anything on Linux systems to get I working. On Windows systems, +in addition to the folders described in PATH, the usual suspects will be checked +for the presence of the R binary, e.g. C:\Program Files\R. If I +does not find where R is installed, your last recourse is to specify its full +path when calling new(): - my $R = Statistics::R->new( r_bin => $fullpath ); + my $R = Statistics::R->new( bin => $fullpath ); You also need to have the following CPAN Perl modules installed: =over 4 -=item Text::Balanced (>= 1.97) +=item IPC::Run =item Regexp::Common -=item IPC::Run +=item Text::Balanced (>= 1.97) + +=item Text::Wrap + +=item version (>= 0.77) =back @@ -229,7 +244,7 @@ You also need to have the following CPAN Perl modules installed: =item * The R-project web site: L -=item * Statistics:: modules for Perl: L +=item * Statistics::* modules for Perl: L =back @@ -239,7 +254,9 @@ Florent Angly Eflorent.angly@gmail.comE (2011 rewrite) Graciliano M. P. Egm@virtuasites.com.brE (original code) -=head1 MAINTAINER +=head1 MAINTAINERS + +Florent Angly Eflorent.angly@gmail.comE Brian Cassidy Ebricas@cpan.orgE @@ -258,23 +275,59 @@ Bug reports, suggestions and patches are welcome. The Statistics::R code is developed on Github (L) and is under Git revision control. To get the latest revision, run: - git clone git@github.com:bricas/statistics-r.git + git clone git://github.com/bricas/statistics-r.git =cut +use 5.006; +use strict; +use warnings; +use version; +use Regexp::Common; +use Statistics::R::Legacy; +use IPC::Run qw( harness start pump finish ); +use File::Spec::Functions qw(catfile splitpath splitdir); +use Text::Balanced qw ( extract_delimited extract_multiple ); + +if ( $^O =~ m/^(?:.*?win32|dos)$/i ) { + require Statistics::R::Win32; +} + +our $VERSION = '0.34'; + +our ($SHARED_BRIDGE, $SHARED_STDIN, $SHARED_STDOUT, $SHARED_STDERR); + +use constant DEBUG => 0; # debugging messages +use constant PROG => 'R'; # executable name... R +use constant MAXLINELEN => 1023; # maximum line length for R < 2.5 + +use constant EOS => '\\1'; # indicate the end of R output with \1 +use constant EOS_RE => qr/[${\(EOS)}]\n$/; # regexp to match end of R stream + +use constant NUMBER_RE => qr/^$RE{num}{real}$/; # regexp matching numbers +use constant BLANK_RE => qr/^\s*$/; # regexp matching whitespaces +use constant ILINE_RE => qr/^\s*\[\d+\] /; # regexp matching indexed line + +my $ERROR_STR_1 = 'Error: '; +my $ERROR_STR_2 = 'Error in '; +my $ERROR_RE; # regexp matching R errors + +my $WRAP_LINES = sub { return shift }; # function to wrap R commands + + sub new { # Create a new R communication object my ($class, %args) = @_; my $self = {}; bless $self, ref($class) || $class; - $self->initialize( %args ); + $self->_initialize( %args ); return $self; } sub is_shared { - # Get (/ set) the whether or not Statistics::R is setup to run in shared mode + # Get (or set) the whether or not Statistics::R is setup to run in shared mode my ($self, $val) = @_; if (defined $val) { $self->{is_shared} = $val; @@ -295,13 +348,34 @@ sub start { # method start_shared() if ( exists($args{shared}) && ($args{shared} == 1) ) { $self->is_shared( 1 ); - $self->bridge( 1 ); + $self->_bridge( 1 ); } # Now, start R - my $bridge = $self->bridge; - $status = $bridge->start or die "Error starting $prog: $?\n"; + my $bridge = $self->_bridge; + $status = $bridge->start or die "Error starting ".PROG.": $?\n"; $self->bin( $bridge->{KIDS}->[0]->{PATH} ); + delete $self->{died}; + print "DBG: Started R, ".$self->bin." (pid ".$self->pid.")\n" if DEBUG; + + # Generate regexp to catch R errors + if (not defined $ERROR_RE) { + $self->_generate_error_re; + $self->_localize_error_str; + $self->_generate_error_re; + } + + # Set up a function to wrap lines for R < 2.5 + if ( version->parse($self->version) < version->parse('2.5.0') ) { + print "DBG: Need to wrap to ".MAXLINELEN."\n" if DEBUG; + require Text::Wrap; + $Text::Wrap::columns = MAXLINELEN; + $Text::Wrap::break = ','; + $Text::Wrap::huge = 'overflow'; + $Text::Wrap::separator = ",\n"; + $WRAP_LINES = sub { return Text::Wrap::wrap('', '', shift) }; + } + } return $status; @@ -312,8 +386,9 @@ sub start { sub stop { my ($self) = @_; my $status = 1; - if ($self->is_started) { - $status = $self->bridge->finish or die "Error stopping $prog: $?\n"; + if ( $self->is_started ) { + $status = $self->_bridge->finish or die "Error stopping ".PROG.": $?\n"; + print "DBG: Stopped R\n" if DEBUG; } return $status; } @@ -326,20 +401,41 @@ sub restart { sub is_started { - # Query whether or not R is currently running - return shift->bridge->{STATE} eq IPC::Run::_started ? 1 : 0; + # Query whether or not R has been started and is still running - hackish. + # See https://rt.cpan.org/Ticket/Display.html?id=70595 + my ($self) = @_; + my $is_started = 0; + my $bridge = $self->_bridge; + if (defined $bridge && not $self->{died}) { + if (not exists $bridge->{STATE}) { + die "Internal error: could not get STATE from IPC::Run\n"; + } + if ($bridge->{STATE} eq IPC::Run::_started && $bridge->pumpable) { + $is_started = 1; + } + } + return $is_started; } sub pid { - # Get (/ set) the PID of the running R process. It is accessible only after - # the bridge has start()ed - return shift->bridge->{KIDS}->[0]->{PID}; + # Get (or set) the PID of the running R process - hackish. + # See https://rt.cpan.org/Ticket/Display.html?id=70595It + # The PID is accessible only after the bridge has start()ed. + my ($self) = @_; + my $bridge = $self->_bridge; + if ( not exists $bridge->{KIDS} ) { + die "Internal error: could not get KIDS from IPC::Run\n"; + } + if ( not exists $bridge->{KIDS}->[0]->{PID} ) { + die "Internal error: could not get PID from IPC::Run\n"; + } + return $bridge->{KIDS}->[0]->{PID}; } sub bin { - # Get / set the full path to the R binary program to use. Unless you have set + # Get or set the full path to the R binary program to use. Unless you have set # the path yourself, it is accessible only after the bridge has start()ed my ($self, $val) = @_; if (defined $val) { @@ -349,6 +445,13 @@ sub bin { } +sub version { + # Get the version of R, e.g. '3.1.1' + my ($self) = @_; + return $self->run(q`write(paste(sep=".",R.Version()$major,R.Version()$minor), stdout())`); +} + + sub run { # Pass the input and get the output my ($self, @cmds) = @_; @@ -356,38 +459,51 @@ sub run { # Need to start R now if it is not already running $self->start if not $self->is_started; - # Process each command my $results = ''; for my $cmd (@cmds) { # Wrap command for execution in R - $self->stdin( $self->wrap_cmd($cmd) ); + print "DBG: Command is '$cmd'\n" if DEBUG; + $self->_stdin( $self->wrap_cmd($cmd) ); + print "DBG: stdin is '".$self->_stdin."'\n" if DEBUG; # Pass input to R and get its output - my $bridge = $self->bridge; - while ( $self->stdout !~ m/$eos_re/gc && $bridge->pumpable ) { + my $bridge = $self->_bridge; + while ( $self->_stdout !~ EOS_RE && $bridge->pumpable ) { $bridge->pump; } - # Parse outputs, detect errors - my $out = $self->stdout; - $out =~ s/$eos_re//g; + # Parse output, detect errors + my $out = $self->_stdout; + $out =~ s/${\(EOS_RE)}//; chomp $out; - my $err = $self->stderr; + my $err = $self->_stderr; chomp $err; - if ($out =~ m//sg) { - # Parse (multi-line) error message - my $err_msg = $1."\n".$err; - die "Problem running the R command:\n$cmd\n\nGot the error:\n$err_msg\n"; + + print "DBG: stdout is '$out'\n" if DEBUG; + print "DBG: stderr is '$err'\n" if DEBUG; + + if ($err =~ $ERROR_RE) { + # Catch errors on stderr. Leave warnings alone. + print "DBG: Error\n" if DEBUG; + $self->{died} = 1; # for proper cleanup after failed eval + my $err_msg = "Error:\n".$1; + if ( $err_msg =~ /unrecognized escape in character string/ && + version->parse($self->version) < version->parse('2.5.0') ) { + $err_msg .= "\nMost likely, the given R command contained lines ". + "exceeding ".MAXLINELEN." characters."; + } + $self->_stdout(''); + $self->_stderr(''); + die "Problem while running this R command:\n$cmd\n\n$err_msg\n"; } - + # Save results and reinitialize $results .= "\n" if $results; $results .= $err.$out; - $self->stdout(''); - $self->stderr(''); - + $self->_stdout(''); + $self->_stderr(''); } $self->result($results); @@ -397,17 +513,49 @@ sub run { sub run_from_file { - my ($self, $file) = @_; - my $results = $self->run( qq`source('$file')` ); + # Execute commands in given file: first, convert filepath to an R-compatible + # format and then pass it to source(). + my ($self, $filepath) = @_; + if (not -f $filepath) { + die "Error: '$filepath' does not seem to exist or is not a file.\n"; + } + + # Split filepath + my ($volume, $directories, $filename) = splitpath($filepath); + my @elems; + push @elems, $volume if $volume; # $volume is '' if unused + push @elems, splitdir($directories); + push @elems, $filename; + + # Use file.path to create an R-compatible filename (bug #77761), e.g.: + # file <- file.path("E:", "DATA", "example.csv") + # Then use source() to read file and execute the commands it contains + # source(file) + my $cmd = 'source(file.path('.join(',',map {'"'.$_.'"'}@elems).'))'; + my $results = $self->run($cmd); + return $results; } +sub result { + # Get / set result of last R command + my ($self, $val) = @_; + if (defined $val) { + $self->{result} = $val; + } + return $self->{result}; +} + + sub set { # Assign a variable or array of variables in R. Use undef if you want to # assign NULL to an R variable my ($self, $varname, $arr) = @_; + # Start R now if it is not already running + $self->start if not $self->is_started; + # Check variable type, convert everything into an arrayref my $ref = ref $arr; if ($ref eq '') { @@ -420,19 +568,21 @@ sub set { } # Quote strings and nullify undef variables - for (my $i = 0; $i < scalar @$arr; $i++) { + for my $i (0 .. scalar @$arr - 1) { if (defined $$arr[$i]) { - if ( $$arr[$i] !~ /^$RE{num}{real}$/ ) { - $$arr[$i] = '"'.$$arr[$i].'"'; + if ( $$arr[$i] !~ NUMBER_RE ) { + $$arr[$i] = _quote( $$arr[$i] ); } } else { $$arr[$i] = 'NULL'; } } - # Build a string and run it to import data - my $cmd = $varname.' <- c('.join(', ',@$arr).')'; - $self->run($cmd); + # Build a variable assignment command and run it! + my $cmd = $varname.'<-c('.join(',',@$arr).')'; + $cmd = &$WRAP_LINES( $cmd ); + $self->run( $cmd ); + return 1; } @@ -446,13 +596,13 @@ sub get { my $value; if ($string eq 'NULL') { $value = undef; - } elsif ($string =~ m/^\s*\[\d+\]/) { + } elsif ($string =~ ILINE_RE) { # Vector: its string look like: # ' [1] 6.4 13.3 4.1 1.3 14.1 10.6 9.9 9.6 15.3 # [16] 5.2 10.9 14.4' my @lines = split /\n/, $string; - for (my $i = 0; $i < scalar @lines; $i++) { - $lines[$i] =~ s/^\s*\[\d+\] //; + for my $i (0 .. scalar @lines - 1) { + $lines[$i] =~ s/${\(ILINE_RE)}//; } $value = join ' ', @lines; } else { @@ -461,10 +611,8 @@ sub get { # String looks like: ' mean # 10.41111 ' # Extract value from second line - $value = $lines[1]; - $value =~ s/^\s*(\S+)\s*$/$1/; + $value = _trim( $lines[1] ); } else { - #die "Error: Don't know how to handle this R output\n$string\n"; $value = $string; } } @@ -474,20 +622,27 @@ sub get { if (not defined $value) { @arr = ( undef ); } else { - # Split string into an array, paying attention to strings containing spaces - @arr = extract_multiple( $value, [sub { extract_delimited($_[0],q{ '"}) },] ); - for (my $i = 0; $i < scalar @arr; $i++) { - my $elem = $arr[$i]; - if ($elem =~ m/^\s*$/) { - # Remove elements that are simply whitespaces - splice @arr, $i, 1; - $i--; - } else { - # Trim whitespaces - $arr[$i] =~ s/^\s*(.*?)\s*$/$1/; - # Remove double-quotes - $arr[$i] =~ s/^"(.*)"$/$1/; + # Split string into an array, paying attention to strings containing spaces: + # extract_delim should be enough but we use extract_delim + split because + # of Text::Balanced bug #73416 + if ($value =~ m{['"]}) { + @arr = extract_multiple( $value, [sub { extract_delimited($_[0],q{'"}) },] ); + my $nof_empty = 0; + for my $i (0 .. scalar @arr - 1) { + my $elem = $arr[$i]; + if ($arr[$i] =~ BLANK_RE) { + # Remove elements that are simply whitespaces later, in a single operation + $nof_empty++; + } else { + # Trim and unquote + $arr[$i-$nof_empty] = _unquote( _trim($elem) ); + } + } + if ($nof_empty > 0) { + splice @arr, -$nof_empty, $nof_empty; } + } else { + @arr = split( /\s+/, _trim($value) ); } } @@ -506,35 +661,31 @@ sub get { #---------- INTERNAL METHODS --------------------------------------------------# -sub initialize { +sub _initialize { my ($self, %args) = @_; - # Path of R binary - my $bin; - if ( $args{ r_bin } || $args{ R_bin } ) { - $bin = $args{ r_bin } || $args{ R_bin }; - } else { - $bin = $prog; # IPC::Run will find the full path for the program later - } - $self->bin( $bin ); + # Full path of R binary specified by bin (r_bin or R_bin for backward + # compatibility), or executable name (IPC::Run will find its full path later) + $self->bin( $args{bin} || $args{r_bin} || $args{R_bin} || PROG ); # Using shared mode? - if ( exists($args{shared}) && ($args{shared} == 1) ) { + if ( exists $args{shared} && $args{shared} == 1 ) { $self->is_shared( 1 ); } else { $self->is_shared( 0 ); } # Build the bridge - $self->bridge( 1 ); + $self->_bridge( 1 ); return 1; } -sub bridge { +sub _bridge { # Get or build the communication bridge and IOs with R my ($self, $build) = @_; + my %params = ( debug => 0 ); if ($build) { my $cmd = [ $self->bin, '--vanilla', '--slave' ]; if (not $self->is_shared) { @@ -542,14 +693,14 @@ sub bridge { $self->{stdin} = \$stdin; $self->{stdout} = \$stdout; $self->{stderr} = \$stderr; - $self->{bridge} = harness $cmd, $self->{stdin}, $self->{stdout}, $self->{stderr}; + $self->{bridge} = harness $cmd, $self->{stdin}, $self->{stdout}, $self->{stderr}, %params; } else { $self->{stdin} = \$SHARED_STDIN ; $self->{stdout} = \$SHARED_STDOUT; $self->{stderr} = \$SHARED_STDERR; if (not defined $SHARED_BRIDGE) { - # The first Statics::R instance builds the bridge - $SHARED_BRIDGE = harness $cmd, $self->{stdin}, $self->{stdout}, $self->{stderr}; + # The first Statistics::R instance builds the bridge + $SHARED_BRIDGE = harness $cmd, $self->{stdin}, $self->{stdout}, $self->{stderr}, %params; } $self->{bridge} = $SHARED_BRIDGE; } @@ -558,7 +709,7 @@ sub bridge { } -sub stdin { +sub _stdin { # Get / set standard input string for R my ($self, $val) = @_; if (defined $val) { @@ -568,7 +719,7 @@ sub stdin { } -sub stdout { +sub _stdout { # Get / set standard output string for R my ($self, $val) = @_; if (defined $val) { @@ -578,7 +729,7 @@ sub stdout { } -sub stderr { +sub _stderr { # Get / set standard error string for R my ($self, $val) = @_; if (defined $val) { @@ -588,29 +739,91 @@ sub stderr { } -sub result { - # Get / set result of last R command - my ($self, $val) = @_; - if (defined $val) { - $self->{result} = $val; - } - return $self->{result}; -} - - sub wrap_cmd { # Wrap a command to pass to R. Whether the command is successful or not, the # end of stream string will appear on stdout and indicate that R has finished # processing the data. Note that $cmd can be multiple R commands. my ($self, $cmd) = @_; + chomp $cmd; + $cmd =~ s/;$//; + $cmd .= qq`; write("`.EOS.qq`",stdout())\n`; + return $cmd; +} - # Escape double-quotes - $cmd =~ s/"/\\"/g; - # Evaluate command (and catch syntax and runtime errors) - $cmd = qq`tryCatch( eval(parse(text="$cmd")) , error = function(e){print(e)} ); write("$eos",stdout())\n`; +sub _generate_error_re { + # Generate a regular expression to catch R internal errors, e.g.: + # Error: object 'zzz' not found" + # Error in print(ASDF) : object 'ASDF' not found + my ($self) = @_; + $ERROR_RE = qr/^(?:$ERROR_STR_1|$ERROR_STR_2)\s*(.*)$/s; + print "DBG: Regexp for catching errors is '$ERROR_RE'\n" if DEBUG; + return 1; +} - return $cmd; + +sub _localize_error_str { + # Find the translation for the R error strings. Internationalization is + # present in R >=2.1, with Natural Language Support enabled. + my ($self) = @_; + my @strings; + for my $error_str ($ERROR_STR_1, $ERROR_STR_2) { + my $cmd = qq`write(ngettext(1, "$error_str", "", domain="R"), stdout())`; + $self->set('cmd', $cmd); + # Try to translate string, return '' if not possible + my $str = $self->run(q`tryCatch( eval(parse(text=cmd)) , error=function(e){write("",stdout())} )`); + $str ||= $error_str; + push @strings, $str; + } + ($ERROR_STR_1, $ERROR_STR_2) = @strings; + return 1; +} + + +sub DESTROY { + # The bridge to R is not automatically bombed when Statistics::R instances + # get out of scope. Do it now (unless running in shared mode)! + my ($self) = @_; + if (not $self->is_shared) { + $self->stop; + } +} + + +#---------- HELPER SUBS -------------------------------------------------------# + + +sub _trim { + # Remove flanking whitespaces + my ($str) = @_; + $str =~ s{^\s+}{}; + $str =~ s{\s+$}{}; + return $str; +} + + +sub _quote { + # Quote a string for use in R. We use double-quotes because the documentation + # Quotes {base} R documentation states that this is preferred over single- + # quotes. Double-quotes inside the string are escaped. + my ($str) = @_; + # Escape " by \" , \" by \\\" , ... + $str =~ s/ (\\*) " / '\\' x (2*length($1)+1) . '"' /egx; + # Surround by " + $str = qq("$str"); + return $str; +} + + +sub _unquote { + # Opposite of _quote + my ($str) = @_; + # Remove surrounding " + $str =~ s{^"}{}; + $str =~ s{"$}{}; + # Interpolate (de-escape) \\\" to \" , \" to " , ... + $str =~ s/ ((?:\\\\)*) \\ " / '\\' x (length($1)*0.5) . '"' /egx; + return $str; } diff --git a/lib/Statistics/R/Legacy.pm b/lib/Statistics/R/Legacy.pm index 8acd396..71f68ed 100644 --- a/lib/Statistics/R/Legacy.pm +++ b/lib/Statistics/R/Legacy.pm @@ -21,7 +21,6 @@ BEGIN { receive is_blocked is_locked - receive lock unlock send @@ -36,6 +35,8 @@ Statistics::R::Legacy - Legacy methods for Statistics::R =head1 DESCRIPTION +B instead.> + This module contains legacy methods for I. They are provided solely so that code that uses older versions of I does not crash with recent version. Do not use these methods in new code! @@ -84,7 +85,7 @@ Does nothing anymore. Return 0. -=item +=item error() Return the empty string. @@ -108,7 +109,9 @@ Florent Angly Eflorent.angly@gmail.comE (2011 rewrite) Graciliano M. P. Egm@virtuasites.com.brE (original code) -=head1 MAINTAINER +=head1 MAINTAINERS + +Florent Angly Eflorent.angly@gmail.comE Brian Cassidy Ebricas@cpan.orgE diff --git a/lib/Statistics/R/Win32.pm b/lib/Statistics/R/Win32.pm index 8ad6467..a9e1496 100644 --- a/lib/Statistics/R/Win32.pm +++ b/lib/Statistics/R/Win32.pm @@ -1,14 +1,12 @@ package Statistics::R::Win32; - use strict; use warnings; -use File::Spec::Functions; -use File::DosGlob qw( glob ); +use File::Spec (); +use File::DosGlob (); use Env qw( @PATH $PROGRAMFILES ); use vars qw{@ISA @EXPORT}; - BEGIN { @ISA = 'Exporter'; @EXPORT = qw{ @@ -28,6 +26,8 @@ Statistics::R::Win32 - Helper functions for Statistics::R on MS Windows platform =head1 DESCRIPTION +B instead.> + Helper functions to deal with environment variables and escape file paths on MS Windows platforms. @@ -95,7 +95,9 @@ Florent Angly Eflorent.angly@gmail.comE (2011 rewrite) Graciliano M. P. Egm@virtuasites.com.brE (original code) -=head1 MAINTAINER +=head1 MAINTAINERS + +Florent Angly Eflorent.angly@gmail.comE Brian Cassidy Ebricas@cpan.orgE @@ -123,43 +125,39 @@ revision control. To get the latest revision, run: win32_path_adjust(); +# Find potential R directories in the Windows Program Files folder and +# add them to the PATH environment variable. sub win32_path_adjust { - # Find potential R directories in the Windows Program Files folder and add - # them to the PATH environment variable - + # Find potential R directories, e.g. C:\Program Files (x86)\R-2.1\bin # or C:\Program Files\R\bin\x64 - my @r_dirs; my @prog_file_dirs; if (defined $PROGRAMFILES) { push @prog_file_dirs, $PROGRAMFILES; # e.g. C:\Program Files (x86) my ($programfiles_2) = ($PROGRAMFILES =~ m/^(.*) \(/); # e.g. C:\Program Files - push @prog_file_dirs, $programfiles_2 if not $programfiles_2 eq $PROGRAMFILES; - } - for my $prog_file_dir ( @prog_file_dirs ) { - next if not -d $prog_file_dir; - my @subdirs; - my @globs = ( catfile($prog_file_dir, $PROG), catfile($prog_file_dir, $PROG.'-*') ); - for my $glob ( @globs ) { - $glob = win32_space_escape( win32_double_bs( $glob ) ); - push @subdirs, glob $glob; # DosGlob - } - for my $subdir (@subdirs) { - my $subdir2 = catfile($subdir, 'bin'); - if ( -d $subdir2 ) { - my $subdir3 = catfile($subdir2, 'x64'); - if ( -d $subdir3 ) { - push @r_dirs, $subdir3; - } - push @r_dirs, $subdir2; - } - push @r_dirs, $subdir; + if ( defined $programfiles_2 and $programfiles_2 ne $PROGRAMFILES ) { + push @prog_file_dirs, $programfiles_2; } } - # Append R directories to PATH (order is important) - push @PATH, @r_dirs; - + # Append R directories to PATH + push @PATH, grep { + -d $_ + } map { + # Order is important + File::Spec->catdir( $_, 'bin', 'x64' ), + File::Spec->catdir( $_, 'bin' ), + $_, + } map { + File::DosGlob::glob( win32_space_escape( win32_double_bs($_) ) ) + } map { + File::Spec->catdir( $_, $PROG, "$PROG-*" ), + File::Spec->catdir( $_, "$PROG-*" ), + File::Spec->catdir( $_, $PROG ), + } grep { + -d $_ + } @prog_file_dirs; + return 1; } diff --git a/t/00-load.t b/t/00-load.t index e43ba72..4c3e9f0 100644 --- a/t/00-load.t +++ b/t/00-load.t @@ -2,11 +2,13 @@ use strict; use warnings; -use Test::More tests => 1; +use Test::More; BEGIN { - use_ok( 'Statistics::R' ); + use_ok 'Statistics::R'; } diag( "Testing Statistics::R $Statistics::R::VERSION, Perl $], $^X" ); + +done_testing; diff --git a/t/01-pod.t b/t/01-pod.t index d0907bc..4a562bd 100644 --- a/t/01-pod.t +++ b/t/01-pod.t @@ -7,3 +7,5 @@ use Test::More; eval 'use Test::Pod 1.00'; plan skip_all => 'Test::Pod 1.00 required for testing POD' if $@; all_pod_files_ok(); + +done_testing; diff --git a/t/02-legacy.t b/t/02-legacy.t index 2c9ef95..75237ea 100644 --- a/t/02-legacy.t +++ b/t/02-legacy.t @@ -5,9 +5,6 @@ use warnings; use Test::More; use Statistics::R; -plan tests => 20; - - my $R; my $file = "file.ps"; @@ -55,3 +52,5 @@ ok $R->start_sharedR(); ok $R->stop(); unlink $file; + +done_testing; diff --git a/t/03-run.t b/t/03-run.t index cf3ea5a..a8889ab 100644 --- a/t/03-run.t +++ b/t/03-run.t @@ -3,27 +3,37 @@ use strict; use warnings; use Test::More; +use File::Copy; +use File::Temp; use Statistics::R; +use File::Spec::Functions; -plan tests => 15; - - -my ($R, $expected); +my ($R, $expected, $bin, $version); my $file = 'file.ps'; ok $R = Statistics::R->new(); -ok $R->bin() =~ /\S+/, 'Binary'; +ok $bin = $R->bin(); +ok $bin =~ /\S+/, 'Executable name'; $expected = ''; is $R->run( ), $expected; +ok $bin = $R->bin(); +ok $bin =~ /\S+/, 'Executable path'; + +ok $version = $R->version(); +ok $version =~ /^\d+\.\d+\.\d+$/, 'Version'; + +diag "R version $version found at $bin\n"; + + $expected = ''; is $R->run( qq`postscript("$file" , horizontal=FALSE , width=500 , height=500 , pointsize=1)`), $expected, 'Basic'; $expected = ''; -is $R->run( q`plot(c(1, 5, 10), type = "l")` ), $expected; +is $R->run( q`plot(c(1, 5, 10), type = "l");` ), $expected; $expected = 'null device @@ -76,5 +86,11 @@ Some innocuous message on stdout [1] 123 456 [1] "ok"'; -is $R->run_from_file( './t/data/script.R' ), $expected, 'Commands from file'; +$file = catfile('t', 'data', 'script.R'); +is $R->run_from_file( $file ), $expected, 'Command from file (relative path)'; + +my $absfile = File::Temp->new( UNLINK => 1 )->filename; +copy($file, $absfile) or die "Error: Could not copy file $file to $absfile: $!\n"; +is $R->run_from_file( $absfile ), $expected, 'Commands from file (absolute path)'; +done_testing; diff --git a/t/04-start-stop.t b/t/04-start-stop.t index ed30915..71e7184 100644 --- a/t/04-start-stop.t +++ b/t/04-start-stop.t @@ -6,9 +6,6 @@ use Test::More; use Statistics::R; use Cwd; -plan tests => 16; - - my $R; my $initial_dir = cwd; @@ -44,3 +41,5 @@ is $R->is_shared, 1; ok $R->stop(); is cwd, $initial_dir; # Bug RT #6724 and #70307 + +done_testing; diff --git a/t/05-shared.t b/t/05-shared.t index 904bc26..2604a5d 100644 --- a/t/05-shared.t +++ b/t/05-shared.t @@ -5,9 +5,6 @@ use warnings; use Test::More; use Statistics::R; -plan tests => 29; - - my ($R1, $R2, $R3, $R4); ok $R1 = Statistics::R->new( shared => 1 ), 'Starting in shared mode'; @@ -51,3 +48,5 @@ is $R1->is_started, 0; is $R2->is_started, 0; is $R3->is_started, 0; is $R4->is_started, 0; + +done_testing; diff --git a/t/06-get-set.t b/t/06-get-set.t index 202bee1..dc698de 100644 --- a/t/06-get-set.t +++ b/t/06-get-set.t @@ -5,9 +5,6 @@ use warnings; use Test::More; use Statistics::R; -plan tests => 86; - - my ($R, $input, $output); @@ -95,5 +92,32 @@ is $$output[1], 2; is $$output[2], 3; +# Bug reported by Manuel A. Alonso Tarajano +is $R->run(q`mydat = seq(1:4)`), ''; +ok $output = $R->get('mydat'); +is $$output[0], 1; +is $$output[1], 2; +is $$output[2], 3; +is $$output[3], 4; + + +# Strings containing quotes and escaped quotes +$input = q{He said: "Let's go \"home\" now!\n"}; +ok $R->set('x', $input), 'string'; +ok $output = $R->get('x'); +is ref($output), ''; +is $output, q{He said: "Let's go \"home\" now!\n"}; + + +$input = q{He said: "Let's go \\\\\\\\\\\\\"home\\\\\\\\\\\\\" now!\n"}; +# because \ is a special char that needs to be escaped, this string really is: +# He said: "Let's go \\\\\\\"home\\\\\\\" now!\n +ok $R->set('x', $input), 'string'; +ok $output = $R->get('x'); +is ref($output), ''; +is $output, q{He said: "Let's go \\\\\\\\\\\\\"home\\\\\\\\\\\\\" now!\n"}; + + ok $R->stop(); +done_testing; diff --git a/t/07-robust.t b/t/07-robust.t index 51b6f85..fb17265 100644 --- a/t/07-robust.t +++ b/t/07-robust.t @@ -5,21 +5,31 @@ use warnings; use Test::More; use Statistics::R; -plan tests => 10003; - - -# Test that the IOs are well-oiled. In Statistics::R version 0.20, a slight -# imprecision in the regular expression to parse the output stream caused a -# problem was apparent only once every few thousands times my ($R, $input); ok $R = Statistics::R->new(); -ok $R->set('x', $input); -for my $i (1 .. 10000) { - is($R->get('x'), undef); +# Test that we can recover from a R quit() command +is $R->run(q`quit()`), '', 'Handle quit()'; +is $R->run(q`cat("foo")`), 'foo'; + + +# Test that large arrays can be read +ok $R->set('y', [1 .. 100_000]), 'Large arrays'; +is $R->get('y')->[-1], 100_000; + + +# Test that the IOs are well-oiled. In Statistics::R version 0.20, a slight +# imprecision in the regular expression to parse the output stream caused a +# problem that was apparent only once every few thousands times +ok $R->set('z', $input), 'Smooth IO'; +for my $i (1 .. 10_000) { + is $R->get('z'), undef; } ok $R->stop(); + + +done_testing; diff --git a/t/08-errors.t b/t/08-errors.t index 04b4a0c..a92ca4d 100644 --- a/t/08-errors.t +++ b/t/08-errors.t @@ -5,22 +5,48 @@ use warnings; use Test::More; use Statistics::R; -plan tests => 3; -my $R; +SKIP: { + skip 'because tests hang on Win32 (bug #81159)', 1 if $^O =~ /^(MS)?Win32$/; + + ok my $R = Statistics::R->new(bin => '/foo/ba/R'); + eval { + $R->run( qq`print("Hello");` ); + }; + #diag "Diagnostic: \n".$@."\n"; + ok $@, 'Executable not found'; + + ok $R = Statistics::R->new(); + is $R->run(q`a <- 1;`), ''; + + eval { + $R->run( qq`print("Hello");\nprint(ASDF)` ); + }; + #diag "Diagnostic: \n".$@."\n"; + ok $@, 'Runtime error'; + + is $R->run(q`a <- 1;`), ''; + + ok $R = Statistics::R->new(); + eval { + $R->run( qq`print("Hello");\nprint "ASDF"` ); + }; + #diag "Diagnostic: \n".$@."\n"; + ok $@, 'Syntax error'; + # Actual error message varies depending on locale + + is $R->run(q`a <- 1;`), ''; + + use_ok 't::FlawedStatisticsR'; + ok $R = t::FlawedStatisticsR->new(); + eval { + $R->run( qq`print("Hello");\ncolors<-c("red")` ); + }; + #diag "Diagnostic: \n".$@."\n"; + ok $@, 'Internal error'; -ok $R = Statistics::R->new(); - - -eval { - $R->run( q`print "ASDF"` ); }; -ok $@, 'Syntax error'; -# Actual error message vary depending on locale +done_testing; -eval { - $R->run( q`print(ASDF)` ); -}; -ok $@, 'Runtime error'; diff --git a/t/FlawedStatisticsR.pm b/t/FlawedStatisticsR.pm new file mode 100644 index 0000000..4977250 --- /dev/null +++ b/t/FlawedStatisticsR.pm @@ -0,0 +1,16 @@ +package t::FlawedStatisticsR; + +use Statistics::R; +use base qw(Statistics::R); +my $eos = 'Statistics::R::EOS'; + +# Override the wrap_cmd() method of Statistics::R with a faulty one +sub wrap_cmd { + my ($self, $cmd) = @_; + $cmd = qq`zzzzzzzzzzzzzzz; write("$eos",stdout())\n`; + return $cmd; +} + +1; + +__END__ -- 2.39.2