From d2e350d15d94d80d11a9c5af31978419f7d9db01 Mon Sep 17 00:00:00 2001 From: Steve Hancock Date: Sun, 18 Jul 2021 08:28:00 -0700 Subject: [PATCH] added utility to run tokenizer tests --- dev-bin/RandomTesting.md | 14 + dev-bin/build.pl | 31 +- dev-bin/run_tokenizer_tests.pl | 554 ++++++++++++++++++++++++++++ dev-bin/run_tokenizer_tests.pl.data | 124 +++++++ 4 files changed, 722 insertions(+), 1 deletion(-) create mode 100755 dev-bin/run_tokenizer_tests.pl create mode 100644 dev-bin/run_tokenizer_tests.pl.data diff --git a/dev-bin/RandomTesting.md b/dev-bin/RandomTesting.md index f9d996a7..b8a52763 100644 --- a/dev-bin/RandomTesting.md +++ b/dev-bin/RandomTesting.md @@ -179,6 +179,20 @@ The usage is simply It reads its database, ```run_convergence_tests.pl.data```, and runs the latest version of perltidy on each case. This takes a little time because there are hundreds of cases in the database. The last line of the output will show "OK" if there are no problems. +## Utility for running tokenizer tests + +A similar script for running tests which test the tokenizer is + + - run_tokenizer_tests.pl + +The usage is + +``` +./run_tokenizer_tests.pl +``` + +It reads its database, ```run_tokenizer_tests.pl.data```, and runs the latest version of perltidy on each case. The last line of the output will show "OK" if there are no problems. + ## Utility for stress testing with side comments Another type of test which has been useful is a side comment test. A script to do this type of test is diff --git a/dev-bin/build.pl b/dev-bin/build.pl index dd70668c..ec99ca41 100755 --- a/dev-bin/build.pl +++ b/dev-bin/build.pl @@ -43,7 +43,7 @@ my $fh_log; # These are the main steps, in approximate order, for making a new version # Note: Since perl critic is in the .tidyallrc, a separate 'PC' step is not # needed -my $rsteps = [qw( CHK CONV V PC TIDY T CL DOCS MANIFEST DIST)]; +my $rsteps = [qw( CHK CONV TOK V PC TIDY T CL DOCS MANIFEST DIST)]; my $rstatus = {}; foreach my $step ( @{$rsteps} ) { $rstatus->{$step} = 'TBD' } @@ -59,6 +59,7 @@ my $rcode = { 'PC' => \&run_perl_critic, 'TIDY' => \&run_tidyall, 'CONV' => \&run_convergence_tests, + 'TOK' => \&run_tokenizer_tests, 'MANIFEST' => \&make_manifest, 'T' => \&make_tests, 'DOCS' => \&make_docs, @@ -85,6 +86,7 @@ v - check/update Version Number status: $rstatus->{'V'} tidy - run tidyall (tidy & critic) status: $rstatus->{'TIDY'} pc - run PerlCritic (critic only) status: $rstatus->{'PC'} conv - run convergence tests status: $rstatus->{'CONV'} +tok - run tokenizer tests status: $rstatus->{'TOK'} manifest - make MANIFEST status: $rstatus->{'MANIFEST'} t - make Tests status: $rstatus->{'T'} cl - review/edit CHANGES.md status: $rstatus->{'CL'} @@ -197,6 +199,33 @@ sub run_convergence_tests { return; } +sub run_tokenizer_tests { + my $fout = "tmp/run_tokenizer_tests.out"; + $rstatus->{'TOK'} = 'TBD'; + + # running with any .perltidyrc file + my $cmd = "./dev-bin/run_tokenizer_tests.pl >$fout 2>>$fout"; + system_echo($cmd); + + my $fh; + if ( !open( $fh, '<', $fout ) ) { + hitcr("Strange: cannot open '$fout': $!."); + return; + } + my @lines = <$fh>; + foreach my $line (@lines) { $fh_log->print($line) } + my $error = $lines[-1] !~ /OK/; + + $fh->close(); + if ( !$error ) { + $rstatus->{'TOK'} = 'OK'; + hitcr("Tokenizer check OK."); + return; + } + openurl("$fout"); + return; +} + sub run_perl_critic { my $pcoutput = "tmp/perlcritic.out"; $rstatus->{'PC'} = 'TBD'; diff --git a/dev-bin/run_tokenizer_tests.pl b/dev-bin/run_tokenizer_tests.pl new file mode 100755 index 00000000..d74fcca4 --- /dev/null +++ b/dev-bin/run_tokenizer_tests.pl @@ -0,0 +1,554 @@ +#!/usr/bin/perl -w +use strict; +use warnings; +use File::Copy; +use Perl::Tidy; + +my $usage = < does not take an argument +# =s takes a mandatory string +# :s takes an optional string +# =i takes a mandatory integer +# :i takes an optional integer +# ! does not take an argument and may be negated +# i.e., -foo and -nofoo are allowed +# a double dash signals the end of the options list +my @option_string = qw( + h + m + p + r + u +); + +my %Opts = (); +if ( !GetOptions( \%Opts, @option_string ) ) { + die "Programming Bug: error in setting default options"; +} + +if ( $Opts{h} ) { + print $usage; + exit 1; +} + +# set default .data file +my $db_fname = $0 . ".data"; + +# set tmp dir - you may need to change this depending on setup +my $git_home = qx[git rev-parse --show-toplevel]; +chomp $git_home; +my $tmp_dir = $git_home . "/dev-bin/tmp"; + +if ( -e $tmp_dir && !-d $tmp_dir ) { + print STDERR "'$tmp_dir' exists but is not a dir: please fix\n"; + exit 1; +} + +print <{$case} = $rdata_files->{$fname}; + } + else { + + # A foreign file seems to have entered the database + print "ignoring unknown file '$fname'\n"; + } + } + + my $opath = $tmp_dir . '/'; + + my @selected_cases = keys %{$rsources}; + if ( @{$rcases} ) { + @selected_cases = @{$rcases}; + } + + my @skipped_cases; + my @had_errors; + foreach my $sname ( sort @selected_cases ) { + + # remove any old tmp files for this case + my $str = $opath . $sname . ".[0-9]"; + my @tmp_files = glob("$str"); + if (@tmp_files) { + my $num = unlink @tmp_files; + print "unlinked $num old files for case $sname\n"; + } + + my $output; + my $source = $rsources->{$sname}; + if ( !defined($source) ) { + print "Skipping case $sname : not in database\n"; + push @skipped_cases, $sname; + next; + } + my $stderr_string; + my $errorfile_string; + my @output_history; + my $params = ""; + my $err = Perl::Tidy::perltidy( + source => \$source, + destination => \$output, + perltidyrc => \$params, + argv => '', # don't let perltidy look at my @ARGV + stderr => \$stderr_string, + errorfile => \$errorfile_string, # not used when -se flag is set + ); + if ($stderr_string) { + write_file( $tmp_dir.'/'.$sname.'.STDERR', $stderr_string ); + write_file( $tmp_dir.'/'.$sname.'.in', $source ); + print "$sname: wrote .STDERR file"; + push @had_errors, $sname; + next; + } + if ($errorfile_string) { + write_file( $tmp_dir.'/'.$sname.'.in', $source ); + write_file( $tmp_dir.'/'.$sname.'.ERR', $errorfile_string ); + print "$sname: wrote .ERR file"; + push @had_errors, $sname; + next; + } + if ($err) { + print STDERR "error calling Perl::Tidy for case $sname\n"; + write_file( $tmp_dir.'/'.$sname.'.in', $source ); + print "$sname: error calling perltidy\n"; + push @had_errors, $sname; + next; + } + print "$sname: OK\n"; + } + + print "...\n"; + if (@had_errors) { + + print <{$fname} = $string + # where $fname is the file name and $string is its text + + my $rdata_files = {}; + if ( !-e $db_fname ) { + print STDERR "Database $db_fname does not exist\n"; + exit 1; + } + + my $dstring = get_string($db_fname); + my @lines = split /\n/, $dstring; + my $lines = @lines; + my $fname = ""; + my $lno = 0; + my $string; + + foreach my $line (@lines) { + $lno++; + if ( $line =~ /^==>\s*([\w\.]+)\s*<==/ ) { + if ($string) { + chomp $string if ( $string =~ /\n\n/ ); + $rdata_files->{$fname} = $string; + } + $string = ""; + $fname = $1; + } + else { + $string .= $line . "\n"; + } + } + if ($string) { + $rdata_files->{$fname} = $string; + } + return $rdata_files; +} + +sub unpack_data { + + my ( $rdata_files, $rcases, $rfiles ) = @_; + + # unpack selected cases, files, or all of the database into + # a temporary directory + my $opath = $tmp_dir . '/'; + my @keys = @{$rfiles}; + my $count = 0; + foreach my $case ( @{$rcases} ) { + push @keys, $case . "\.in"; + push @keys, $case . "\.par"; + } + if ( !@keys ) { @keys = keys %{$rdata_files} } + foreach my $key (@keys) { + my $fname = $opath . $key; + my $string = $rdata_files->{$key}; + if ( !$string ) { + print STDERR "could not find '$fname' in database\n"; + } + else { + $count++; + write_file( $fname, $string ); + } + } + print "Wrote $count files to '$tmp_dir'\n"; + return; +} + +sub merge_data { + my ( $db_fname, $rold_data, $rfiles ) = @_; + + my $rnew_data = read_files_to_hash($rfiles); + + # Merge the two data hashes + my $update_count = 0; + my $new_count = 0; + foreach my $fname ( keys %{$rnew_data} ) { + my $string = $rnew_data->{$fname}; + if ( defined( $rold_data->{$fname} ) ) { + $update_count++; + } + else { + $new_count++; + } + $rold_data->{$fname} = $string; + } + + my $num_old = keys %{$rold_data}; + print <{$file} = $string; + } + return $rdata; +} + +sub write_hash_to_data_file { + my ( $db_fname, $rdata ) = @_; + my $nfiles = keys %{$rdata}; + print "packing $nfiles files into $db_fname\n"; + + # Pack into a temporary file first, + # if all goes well... + # backup old data file if it exists, + # then rename + my $ostring; + my $count = 0; + foreach my $fname ( sort keys %{$rdata} ) { + my $string = $rdata->{$fname}; + if ($count) { $ostring .= "\n"; } + if ( $fname =~ /\/([^\/]+$)/ ) { $fname = $1; } + $count++; + $ostring .= "==> $fname <==\n"; + $ostring .= $string; + } + + # Backup an existing database + if ( -e $db_fname ) { + my $backup_extension = ".bak"; + my $backup_name = $db_fname . $backup_extension; + if ( -f $backup_name ) { + unlink($backup_name) + or die( +"unable to remove previous '$backup_name' for -b option; check permissions: $!\n" + ); + } + + # backup the old data file + # use copy for symlinks, move for regular files + if ( -l $db_fname ) { + File::Copy::copy( $db_fname, $backup_name ) + or die("File::Copy failed trying to backup source: $!"); + } + else { + rename( $db_fname, $backup_name ) + or die("problem renaming $db_fname to $backup_name: $!\n"); + } + } + + write_file( $db_fname, $ostring ); + print "Wrote $count files to $db_fname\n"; + return; +} + +sub get_string { + my ($file) = @_; + open my $fh, '<', $file or die "cannot open $file: $!\n"; + local $/ = undef; + my $string = <$fh>; + close $fh; + return $string; +} + +sub make_tmp_dir { + if ( !-d $tmp_dir ) { + unless ( mkdir $tmp_dir ) { + print STDERR "unable to create $tmp_dir\n"; + exit 1; + } + if ( !-d $tmp_dir ) { + print STDERR "problem creating $tmp_dir\n"; + exit 1; + } + } +} + +sub write_file { + my ( $fname, $string, $msg ) = @_; + open my $fh, '>', $fname or die "cannot open $fname: $!\n"; + $fh->print($string); + $fh->close(); + print STDERR "Wrote $fname\n" if ($msg); + return; +} diff --git a/dev-bin/run_tokenizer_tests.pl.data b/dev-bin/run_tokenizer_tests.pl.data new file mode 100644 index 00000000..c7255905 --- /dev/null +++ b/dev-bin/run_tokenizer_tests.pl.data @@ -0,0 +1,124 @@ +==> c015.in <== +# These are syntactically correct; set expecting unknown at a comma +print "hello1\n", || print "hi1\n"; +print "hello2\n", && print "bye2\n"; +print "hello3\n", or print "bye3\n"; +print "hello4\n", and print "bye4\n"; + +==> c017.in <== +my @words = qw(To view this email as a web page go here); +my @subs; +push @subs, sub { my $i=shift; $i %= @words; print "$words[$i] "; return $subs[0]}; +$subs[0](0)(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11); +print "\n"; + +==> c029.in <== +$Msg#sc# +->#sc# +$field#sc# +(#sc# +)#sc# +;#sc# + + +==> c033.in <== +print$my_bag +&#sc# +$your_bag +, +"\n" +; + + +==> c035.in <== +my$ascii#sc# +=#sc# +$formatter#sc# +->#sc# +format#sc# +(#sc# +$html#sc# +)#sc# +;#sc# + + +==> c036.in <== +# this is a valid program, '%#' is a punctuation variable +%# = ( foo => 'bar', baz => 'buz' ); +print keys(%#), "\n"; + +#< 'bar', baz => 'buz' ); +print keys(%#), "\n"; +#>>V + +# this is a valid program, '@#' is a punctuation variable +@# = ( foo , 'bar', baz , 'buz' ); +print @#, "\n"; + +# this is a valid program, the space makes the '#' a side comment +# perltidy formed %# here, causing an error +% # +var = ( foo => 'bar', baz => 'buz' ); +print keys(%var), "\n"; + +==> c037.in <== +is( +$one +->#sc# +package +, +"bar" +, +"Got package" +) +; + + +==> c038.in <== +sub plugh () :# +Ugly('\(") : Bad; + +==> c040.in <== +$bond_str += +VERY_WEAK #sc# +/ #sc# +1.05 +; + +ok +/[^\s]+/#sc# +, +'m/[^\s]/ utf8' +; + + +==> c041.in <== +@ret += +$o +-># +SUPER'method +( +'whatever' +) +; + + +==> c043.in <== +# FIXED ; index not type 'k' +{#sc# +print$fh#sc# +"class='u'" +,#sc# +$self#sc# +->#sc# +index?#sc# +" href='#___top' title='click to go to top of document'\n" +:#sc# +"\n" +;#sc# +}#sc# -- 2.39.5