X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;ds=sidebyside;f=lib%2FPerl%2FTidy.pm;h=63cfdb33cb88953bdff37587c930fd5c021392c3;hb=effbe8e558790d5f5e4eb49a10b2ed020b0eaaee;hp=211b8ad7d6c14a3c0f62856f975402b222e622ff;hpb=c514d57dc8088e1f4d3f51857b1155c20085c296;p=perltidy.git diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm index 211b8ad..63cfdb3 100644 --- a/lib/Perl/Tidy.pm +++ b/lib/Perl/Tidy.pm @@ -104,17 +104,17 @@ use File::Temp qw(tempfile); BEGIN { - # Release version is the approximate YYMMDD of the release. + # Release version is the approximate YYYYMMDD of the release. # Development version is (Last Release).(Development Number) # To make the number continually increasing, the Development Number is a 2 - # digit number starting at 01 after a release is continually bumped along - # at significant points during development. If it ever reaches 99 then the - # Release version must be bumped, and it is probably past time for a - # release anyway. + # digit number starting at 01 after a release. It is continually bumped + # along at significant points during development. If it ever reaches 99 + # then the Release version must be bumped, and it is probably past time for + # a release anyway. - $VERSION = '20220613'; -} + $VERSION = '20230309'; +} ## end BEGIN sub DESTROY { @@ -317,8 +317,7 @@ sub find_input_line_ending { my $missing_file_spec; BEGIN { - eval { require File::Spec }; - $missing_file_spec = $EVAL_ERROR; + $missing_file_spec = !eval { require File::Spec; 1 }; } sub catfile { @@ -386,10 +385,12 @@ sub find_input_line_ending { # messages. It writes a .LOG file, which may be saved with a # '-log' or a '-g' flag. -{ #<<< +{ #<<< (this side comment avoids excessive indentation in a closure) my $Warn_count; my $fh_stderr; +my $loaded_unicode_gcstring; +my $rstatus; # Bump Warn_count only: it is essential to bump the count on all warnings, even # if no message goes out, so that the correct exit status is set. @@ -420,6 +421,57 @@ sub is_char_mode { return; } ## end sub is_char_mode +my $md5_hex = sub { + my ($buf) = @_; + + # Evaluate the MD5 sum for a string + # Patch for [rt.cpan.org #88020] + # Use utf8::encode since md5_hex() only operates on bytes. + # my $digest = md5_hex( utf8::encode($sink_buffer) ); + + # Note added 20180114: the above patch did not work correctly. I'm not + # sure why. But switching to the method recommended in the Perl 5 + # documentation for Encode worked. According to this we can either use + # $octets = encode_utf8($string) or equivalently + # $octets = encode("utf8",$string) + # and then calculate the checksum. So: + my $octets = Encode::encode( "utf8", $buf ); + my $digest = md5_hex($octets); + return $digest; +}; + +BEGIN { + + # Array index names for $self. + # Do not combine with other BEGIN blocks (c101). + my $i = 0; + use constant { + _actual_output_extension_ => $i++, + _debugfile_stream_ => $i++, + _decoded_input_as_ => $i++, + _destination_stream_ => $i++, + _diagnostics_object_ => $i++, + _display_name_ => $i++, + _file_extension_separator_ => $i++, + _fileroot_ => $i++, + _is_encoded_data_ => $i++, + _length_function_ => $i++, + _line_separator_default_ => $i++, + _line_separator_ => $i++, + _logger_object_ => $i++, + _output_file_ => $i++, + _postfilter_ => $i++, + _prefilter_ => $i++, + _rOpts_ => $i++, + _saw_pbp_ => $i++, + _tabsize_ => $i++, + _teefile_stream_ => $i++, + _user_formatter_ => $i++, + _input_copied_verbatim_ => $i++, + _input_output_difference_ => $i++, + }; +} ## end BEGIN + sub perltidy { my %input_hash = @_; @@ -446,7 +498,7 @@ sub perltidy { ); # Status information which can be returned for diagnostic purposes. - # This is intended for testing and subject to change. + # NOTE: This is intended only for testing and subject to change. # List of "key => value" hash entries: @@ -486,7 +538,7 @@ sub perltidy { # blinking => true if stopped on blinking states # ( i.e., unstable formatting, should not happen ) - my $rstatus = { + $rstatus = { file_count => 0, opt_format => EMPTY_STRING, @@ -575,38 +627,52 @@ EOM $fh_stderr = *STDERR; } + my $self = []; + bless $self, __PACKAGE__; + sub Exit { my $flag = shift; if ($flag) { goto ERROR_EXIT } else { goto NORMAL_EXIT } croak "unexpectd return to Exit"; - } + } ## end sub Exit sub Die { my $msg = shift; Warn($msg); Exit(1); croak "unexpected return to Die"; - } - - my $md5_hex = sub { - my ($buf) = @_; - - # Evaluate the MD5 sum for a string - # Patch for [rt.cpan.org #88020] - # Use utf8::encode since md5_hex() only operates on bytes. - # my $digest = md5_hex( utf8::encode($sink_buffer) ); + } ## end sub Die + + sub Fault { + my ($msg) = @_; + + # This routine is called for errors that really should not occur + # except if there has been a bug introduced by a recent program change. + # Please add comments at calls to Fault to explain why the call + # should not occur, and where to look to fix it. + my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0); + my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1); + my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2); + my $pkg = __PACKAGE__; + + my $input_stream_name = $rstatus->{'input_name'}; + $input_stream_name = '(unknown)' unless ($input_stream_name); + Die(<[_file_extension_separator_] = $dot; - #--------------------------------------------------------------- + #------------------------- # get command line options - #--------------------------------------------------------------- + #------------------------- my ( $rOpts, $config_file, $rraw_options, $roption_string, $rexpansion, $roption_category, $roption_range ) = process_command_line( @@ -709,12 +779,18 @@ EOM $rpending_complaint, $dump_options_type, ); + # Only filenames should remain in @ARGV + my @Arg_files = @ARGV; + + $self->[_rOpts_] = $rOpts; + my $saw_pbp = grep { $_ eq '-pbp' || $_ eq '-perl-best-practices' } @{$rraw_options}; + $self->[_saw_pbp_] = $saw_pbp; - #--------------------------------------------------------------- + #------------------------------------ # Handle requests to dump information - #--------------------------------------------------------------- + #------------------------------------ # return or exit immediately after all dumps my $quit_now = 0; @@ -770,11 +846,21 @@ EOM Exit(0); } - #--------------------------------------------------------------- + # --dump-block-summary requires one filename in the arg list. + # This is a safety precaution in case a user accidentally adds -dbs to the + # command line parameters and is expecting formatted output to stdout. + # Another precaution, added elsewhere, is to ignore -dbs in a .perltidyrc + my $numf = @Arg_files; + if ( $rOpts->{'dump-block-summary'} && $numf != 1 ) { + Die(<check_options( $is_Windows, $Windows_type, $rpending_complaint ); if ($user_formatter) { $rOpts->{'format'} = 'user'; @@ -800,73 +886,13 @@ EOM Die("-format='$fmt' but must be one of: $formats\n"); } - my $output_extension = make_extension( $rOpts->{'output-file-extension'}, - $default_file_extension{ $rOpts->{'format'} }, $dot ); - - # If the backup extension contains a / character then the backup should - # be deleted when the -b option is used. On older versions of - # perltidy this will generate an error message due to an illegal - # file name. - # - # A backup file will still be generated but will be deleted - # at the end. If -bext='/' then this extension will be - # the default 'bak'. Otherwise it will be whatever characters - # remains after all '/' characters are removed. For example: - # -bext extension slashes - # '/' bak 1 - # '/delete' delete 1 - # 'delete/' delete 1 - # '/dev/null' devnull 2 (Currently not allowed) - my $bext = $rOpts->{'backup-file-extension'}; - my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g ); - - # At present only one forward slash is allowed. In the future multiple - # slashes may be allowed to allow for other options - if ( $delete_backup > 1 ) { - Die("-bext=$bext contains more than one '/'\n"); - } - - my $backup_extension = - make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot ); - - my $html_toc_extension = - make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot ); - - my $html_src_extension = - make_extension( $rOpts->{'html-src-extension'}, 'src', $dot ); - - # check for -b option; - # silently ignore unless beautify mode - my $in_place_modify = $rOpts->{'backup-and-modify-in-place'} - && $rOpts->{'format'} eq 'tidy'; - - # Turn off -b with warnings in case of conflicts with other options. - # NOTE: Do this silently, without warnings, if there is a source or - # destination stream, or standard output is used. This is because the -b - # flag may have been in a .perltidyrc file and warnings break - # Test::NoWarnings. See email discussion with Merijn Brand 26 Feb 2014. - if ($in_place_modify) { - if ( $rOpts->{'standard-output'} - || $destination_stream - || ref $source_stream - || $rOpts->{'outfile'} - || defined( $rOpts->{'output-path'} ) ) - { - $in_place_modify = 0; - } - } + my $output_extension = + $self->make_file_extension( $rOpts->{'output-file-extension'}, + $default_file_extension{ $rOpts->{'format'} } ); - # Turn off assert-tidy and assert-untidy unless we are tidying files - if ( $rOpts->{'format'} ne 'tidy' ) { - if ( $rOpts->{'assert-tidy'} ) { - $rOpts->{'assert-tidy'} = 0; - Warn("ignoring --assert-tidy, --format is not 'tidy'\n"); - } - if ( $rOpts->{'assert-untidy'} ) { - $rOpts->{'assert-untidy'} = 0; - Warn("ignoring --assert-untidy, --format is not 'tidy'\n"); - } - } + # get parameters associated with the -b option + my ( $in_place_modify, $backup_extension, $delete_backup ) = + $self->check_in_place_modify( $source_stream, $destination_stream ); Perl::Tidy::Formatter::check_options($rOpts); Perl::Tidy::Tokenizer::check_options($rOpts); @@ -896,14 +922,14 @@ EOM # no filenames should be given if input is from an array if ($source_stream) { - if ( @ARGV > 0 ) { + if ( @Arg_files > 0 ) { Die( "You may not specify any filenames when a source array is given\n" ); } - # we'll stuff the source array into ARGV - unshift( @ARGV, $source_stream ); + # we'll stuff the source array into Arg_files + unshift( @Arg_files, $source_stream ); # No special treatment for source stream which is a filename. # This will enable checks for binary files and other bad stuff. @@ -912,7 +938,7 @@ EOM # use stdin by default if no source array and no args else { - unshift( @ARGV, '-' ) unless @ARGV; + unshift( @Arg_files, '-' ) unless @Arg_files; } # Flag for loading module Unicode::GCString for evaluating text width: @@ -920,384 +946,974 @@ EOM # 0 = do not use; failed to load or not wanted # 1 = successfully loaded and ok to use # The module is not actually loaded unless/until it is needed - my $loaded_unicode_gcstring; if ( !$rOpts->{'use-unicode-gcstring'} ) { $loaded_unicode_gcstring = 0; } - #--------------------------------------------------------------- - # Ready to go... - # main loop to process all files in argument list - #--------------------------------------------------------------- - my $formatter = undef; - my $tokenizer = undef; - # Remove duplicate filenames. Otherwise, for example if the user entered # perltidy -b myfile.pl myfile.pl # the backup version of the original would be lost. - if ( @ARGV > 1 ) { + if ( @Arg_files > 1 ) { my %seen = (); - @ARGV = grep { !$seen{$_}++ } @ARGV; + @Arg_files = grep { !$seen{$_}++ } @Arg_files; } # If requested, process in order of increasing file size # This can significantly reduce perl's virtual memory usage during testing. - if ( @ARGV > 1 && $rOpts->{'file-size-order'} ) { - @ARGV = + if ( @Arg_files > 1 && $rOpts->{'file-size-order'} ) { + @Arg_files = map { $_->[0] } sort { $a->[1] <=> $b->[1] } - map { [ $_, -e $_ ? -s $_ : 0 ] } @ARGV; + map { [ $_, -e $_ ? -s $_ : 0 ] } @Arg_files; } - my $number_of_files = @ARGV; - while ( my $input_file = shift @ARGV ) { - my $fileroot; - my @input_file_stat; - my $display_name; + my $logfile_header = make_logfile_header( $rOpts, $config_file, + $rraw_options, $Windows_type, $readable_options, ); - #--------------------------------------------------------------- - # prepare this input stream - #--------------------------------------------------------------- - if ($source_stream) { - $fileroot = "perltidy"; - $display_name = ""; + # Store some values needed by lower level routines + $self->[_diagnostics_object_] = $diagnostics_object; + $self->[_postfilter_] = $postfilter; + $self->[_prefilter_] = $prefilter; + $self->[_user_formatter_] = $user_formatter; - # If the source is from an array or string, then .LOG output - # is only possible if a logfile stream is specified. This prevents - # unexpected perltidy.LOG files. - if ( !defined($logfile_stream) ) { - $logfile_stream = Perl::Tidy::DevNull->new(); + #-------------------------- + # loop to process all files + #-------------------------- + $self->process_all_files( - # Likewise for .TEE and .DEBUG output - } - if ( !defined($teefile_stream) ) { - $teefile_stream = Perl::Tidy::DevNull->new(); - } - if ( !defined($debugfile_stream) ) { - $debugfile_stream = Perl::Tidy::DevNull->new(); - } - } - elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN - $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc - $display_name = ""; - $in_place_modify = 0; - } - else { - $fileroot = $input_file; - $display_name = $input_file; - unless ( -e $input_file ) { + \%input_hash, + \@Arg_files, - # file doesn't exist - check for a file glob - if ( $input_file =~ /([\?\*\[\{])/ ) { + # filename stuff... + $output_extension, + $forbidden_file_extensions, + $in_place_modify, + $backup_extension, + $delete_backup, - # Windows shell may not remove quotes, so do it - my $input_file = $input_file; - if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } - if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } - my $pattern = fileglob_to_re($input_file); - my $dh; - if ( opendir( $dh, './' ) ) { - my @files = - grep { /$pattern/ && !-d } readdir($dh); - ##grep { /$pattern/ && !-d $_ } readdir($dh); - closedir($dh); - if (@files) { - unshift @ARGV, @files; - next; - } - } - } - Warn("skipping file: '$input_file': no matches found\n"); - next; - } + # logfile stuff... + $logfile_header, + $rpending_complaint, + $rpending_logfile_message, - unless ( -f $input_file ) { - Warn("skipping file: $input_file: not a regular file\n"); - next; - } + ); - # As a safety precaution, skip zero length files. - # If for example a source file got clobbered somehow, - # the old .tdy or .bak files might still exist so we - # shouldn't overwrite them with zero length files. - unless ( -s $input_file ) { - Warn("skipping file: $input_file: Zero size\n"); - next; - } + #----- + # Exit + #----- - # And avoid formatting extremely large files. Since perltidy reads - # files into memory, trying to process an extremely large file - # could cause system problems. - my $size_in_mb = ( -s $input_file ) / ( 1024 * 1024 ); - if ( $size_in_mb > $rOpts->{'maximum-file-size-mb'} ) { - $size_in_mb = sprintf( "%0.1f", $size_in_mb ); - Warn( -"skipping file: $input_file: size $size_in_mb MB exceeds limit $rOpts->{'maximum-file-size-mb'}; use -mfs=i to change\n" - ); - next; - } + # Fix for RT #130297: return a true value if anything was written to the + # standard error output, even non-fatal warning messages, otherwise return + # false. - unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { - Warn( - "skipping file: $input_file: Non-text (override with -f)\n" - ); - next; - } + # These exit codes are returned: + # 0 = perltidy ran to completion with no errors + # 1 = perltidy could not run to completion due to errors + # 2 = perltidy ran to completion with error messages - # we should have a valid filename now - $fileroot = $input_file; - @input_file_stat = stat($input_file); + # Note that if perltidy is run with multiple files, any single file with + # errors or warnings will write a line like + # '## Please see file testing.t.ERR' + # to standard output for each file with errors, so the flag will be true, + # even if only some of the multiple files may have had errors. - if ( $OSNAME eq 'VMS' ) { - ( $fileroot, $dot ) = check_vms_filename($fileroot); - } + NORMAL_EXIT: + my $ret = $Warn_count ? 2 : 0; + return wantarray ? ( $ret, $rstatus ) : $ret; - # add option to change path here - if ( defined( $rOpts->{'output-path'} ) ) { + ERROR_EXIT: + return wantarray ? ( 1, $rstatus ) : 1; - my ( $base, $old_path ) = fileparse($fileroot); - my $new_path = $rOpts->{'output-path'}; - unless ( -d $new_path ) { - unless ( mkdir $new_path, 0777 ) { - Die("unable to create directory $new_path: $ERRNO\n"); - } - } - my $path = $new_path; - $fileroot = catfile( $path, $base ); - unless ($fileroot) { - Die(<new( - input_file => $input_file, - rOpts => $rOpts, - rpending_logfile_message => $rpending_logfile_message, - ); - next unless ($source_object); - - my $max_iterations = $rOpts->{'iterations'}; - my $do_convergence_test = $max_iterations > 1; - my $convergence_log_message; - my %saw_md5; - my $digest_input = 0; - - my $buf = EMPTY_STRING; - while ( my $line = $source_object->get_line() ) { - $buf .= $line; - } - - my $remove_terminal_newline = - !$rOpts->{'add-terminal-newline'} && substr( $buf, -1, 1 ) !~ /\n/; - - # Decode the input stream if necessary or requested - my $encoding_in = EMPTY_STRING; - my $rOpts_character_encoding = $rOpts->{'character-encoding'}; - my $encoding_log_message; - my $decoded_input_as = EMPTY_STRING; - $rstatus->{'char_mode_source'} = 0; - - # Case 1: If Perl is already in a character-oriented mode for this - # string rather than a byte-oriented mode. Normally, this happens if - # the caller has decoded a utf8 string before calling perltidy. But it - # could also happen if the user has done some unusual manipulations of - # the source. In any case, we will not attempt to decode it because - # that could result in an output string in a different mode. - if ( is_char_mode($buf) ) { - $encoding_in = "utf8"; - $rstatus->{'char_mode_source'} = 1; - } - - # Case 2. No input stream encoding requested. This is appropriate - # for single-byte encodings like ascii, latin-1, etc - elsif ( !$rOpts_character_encoding - || $rOpts_character_encoding eq 'none' ) - { + # Make a file extension, adding any leading '.' if necessary. + # (the '.' may actually be an '_' under VMS). + my ( $self, $extension, $default ) = @_; - # nothing to do - } + # '$extension' is the first choice (usually a user entry) + # '$default' is a backup extension - # Case 3. guess input stream encoding if requested - elsif ( lc($rOpts_character_encoding) eq 'guess' ) { + $extension = EMPTY_STRING unless defined($extension); + $extension =~ s/^\s+//; + $extension =~ s/\s+$//; - # The guessing strategy is simple: use Encode::Guess to guess - # an encoding. If and only if the guess is utf8, try decoding and - # use it if successful. Otherwise, we proceed assuming the - # characters are encoded as single bytes (same as if 'none' had - # been specified as the encoding). + # Use default extension if nothing remains of the first choice + # + if ( length($extension) == 0 ) { + $extension = $default; + $extension = EMPTY_STRING unless defined($extension); + $extension =~ s/^\s+//; + $extension =~ s/\s+$//; + } - # In testing I have found that including additional guess 'suspect' - # encodings sometimes works but can sometimes lead to disaster by - # using an incorrect decoding. The user can always specify a - # specific input encoding. - my $buf_in = $buf; + # Only extensions with these leading characters get a '.' + # This rule gives the user some freedom. + if ( $extension =~ /^[a-zA-Z0-9]/ ) { + my $dot = $self->[_file_extension_separator_]; + $extension = $dot . $extension; + } + return $extension; +} ## end sub make_file_extension - my $decoder = guess_encoding( $buf_in, 'utf8' ); - if ( ref($decoder) ) { - $encoding_in = $decoder->name; - if ( $encoding_in ne 'UTF-8' && $encoding_in ne 'utf8' ) { - $encoding_in = EMPTY_STRING; - $buf = $buf_in; - $encoding_log_message .= <decode($buf_in); }; - if ($EVAL_ERROR) { + my ( $self, $source_stream, $destination_stream ) = @_; - $encoding_log_message .= <[_rOpts_]; - # Note that a guess failed, but keep going - # This warning can eventually be removed - Warn( -"file: $input_file: bad guess to decode source as $encoding_in\n" - ); - $encoding_in = EMPTY_STRING; - $buf = $buf_in; - } - else { - $encoding_log_message .= <{'backup-and-modify-in-place'} + && $rOpts->{'format'} eq 'tidy'; - # Case 4. Decode with a specific encoding - else { - $encoding_in = $rOpts_character_encoding; - eval { - $buf = Encode::decode( $encoding_in, $buf, - Encode::FB_CROAK | Encode::LEAVE_SRC ); - }; - if ($EVAL_ERROR) { + my ( $backup_extension, $delete_backup ); - # Quit if we cannot decode by the requested encoding; - # Something is not right. - Warn( -"skipping file: $display_name: Unable to decode source as $encoding_in\n" - ); - next; - } - else { - $encoding_log_message .= <{'standard-output'} + || $destination_stream + || ref $source_stream + || $rOpts->{'outfile'} + || defined( $rOpts->{'output-path'} ) ) + { + $in_place_modify = 0; } + } - # Set the encoding to be used for all further i/o: If we have - # decoded the data with any format, then we must continue to - # read and write it as encoded data, and we will normalize these - # operations with utf8. If we have not decoded the data, then - # we must not treat it as encoded data. - my $is_encoded_data = $encoding_in ? 'utf8' : EMPTY_STRING; + if ($in_place_modify) { + + # If the backup extension contains a / character then the backup should + # be deleted when the -b option is used. On older versions of + # perltidy this will generate an error message due to an illegal + # file name. + # + # A backup file will still be generated but will be deleted + # at the end. If -bext='/' then this extension will be + # the default 'bak'. Otherwise it will be whatever characters + # remains after all '/' characters are removed. For example: + # -bext extension slashes + # '/' bak 1 + # '/delete' delete 1 + # 'delete/' delete 1 + # '/dev/null' devnull 2 (Currently not allowed) + my $bext = $rOpts->{'backup-file-extension'}; + $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g ); + + # At present only one forward slash is allowed. In the future multiple + # slashes may be allowed to allow for other options + if ( $delete_backup > 1 ) { + Die("-bext=$bext contains more than one '/'\n"); + } + + $backup_extension = + $self->make_file_extension( $rOpts->{'backup-file-extension'}, + 'bak' ); + } + + my $backup_method = $rOpts->{'backup-method'}; + if ( defined($backup_method) + && $backup_method ne 'copy' + && $backup_method ne 'move' ) + { + Die( +"Unexpected --backup-method='$backup_method'; must be one of: 'move', 'copy'\n" + ); + } + + return ( $in_place_modify, $backup_extension, $delete_backup ); +} ## end sub check_in_place_modify + +sub backup_method_copy { + + my ( $self, $input_file, $output_file, $backup_extension, $delete_backup ) + = @_; + + # Handle the -b (--backup-and-modify-in-place) option with -bm='copy': + # - First copy $input file to $backup_name. + # - Then open input file and rewrite with contents of $output_file + # - Then delete the backup if requested + + # NOTES: + # - Die immediately on any error. + # - $output_file is actually an ARRAY ref + + my $backup_file = $input_file . $backup_extension; + + unless ( -f $input_file ) { + + # no real file to backup .. + # This shouldn't happen because of numerous preliminary checks + Die( + "problem with -b backing up input file '$input_file': not a file\n" + ); + } + + if ( -f $backup_file ) { + unlink($backup_file) + or Die( +"unable to remove previous '$backup_file' for -b option; check permissions: $ERRNO\n" + ); + } + + # Copy input file to backup + File::Copy::copy( $input_file, $backup_file ) + or Die("File::Copy failed trying to backup source: $ERRNO"); + + # set permissions of the backup file to match the input file + my @input_file_stat = stat($input_file); + my $in_place_modify = 1; + $self->set_output_file_permissions( $backup_file, \@input_file_stat, + $in_place_modify ); + + # set the modification time of the copy to the original value (rt#145999) + my ( $read_time, $write_time ) = @input_file_stat[ 8, 9 ]; + if ( defined($write_time) ) { + utime( $read_time, $write_time, $backup_file ) + || Warn("error setting times for backup file '$backup_file'\n"); + } + + # Open the original input file for writing ... opening with ">" will + # truncate the existing data. + open( my $fout, ">", $input_file ) + || Die( +"problem re-opening $input_file for write for -b option; check file and directory permissions: $ERRNO\n" + ); + + if ( $self->[_is_encoded_data_] ) { + binmode $fout, ":raw:encoding(UTF-8)"; + } + + # Now copy the formatted output to it.. + + # if formatted output is in an ARRAY ref (normally this is true)... + if ( ref($output_file) eq 'ARRAY' ) { + foreach my $line ( @{$output_file} ) { + $fout->print($line) + or + Die("cannot print to '$input_file' with -b option: $OS_ERROR\n"); + } + } + + # or in a SCALAR ref (less efficient, and only used for testing) + elsif ( ref($output_file) eq 'SCALAR' ) { + foreach my $line ( split /^/, ${$output_file} ) { + $fout->print($line) + or + Die("cannot print to '$input_file' with -b option: $OS_ERROR\n"); + } + } + + # Error if anything else ... + # This can only happen if the output was changed from \@tmp_buff + else { + my $ref = ref($output_file); + Die(<close() + or Die("cannot close '$input_file' with -b option: $OS_ERROR\n"); + + # Set permissions of the output file to match the input file. This is + # necessary even if the inode remains unchanged because suid/sgid bits may + # have been reset. + $self->set_output_file_permissions( $input_file, \@input_file_stat, + $in_place_modify ); + + # Keep original modification time if no change (rt#145999) + if ( !$self->[_input_output_difference_] && defined($write_time) ) { + utime( $read_time, $write_time, $input_file ) + || Warn("error setting times for '$input_file'\n"); + } + + #--------------------------------------------------------- + # remove the original file for in-place modify as follows: + # $delete_backup=0 never + # $delete_backup=1 only if no errors + # $delete_backup>1 always : NOT ALLOWED, too risky + #--------------------------------------------------------- + if ( $delete_backup && -f $backup_file ) { + + # Currently, $delete_backup may only be 1. But if a future update + # allows a value > 1, then reduce it to 1 if there were warnings. + if ( $delete_backup > 1 + && $self->[_logger_object_]->get_warning_count() ) + { + $delete_backup = 1; + } + + # As an added safety precaution, do not delete the source file + # if its size has dropped from positive to zero, since this + # could indicate a disaster of some kind, including a hardware + # failure. Actually, this could happen if you had a file of + # all comments (or pod) and deleted everything with -dac (-dap) + # for some reason. + if ( !-s $input_file && -s $backup_file && $delete_backup == 1 ) { + Warn( +"output file '$input_file' missing or zero length; original '$backup_file' not deleted\n" + ); + } + else { + unlink($backup_file) + or Die( +"unable to remove backup file '$backup_file' for -b option; check permissions: $ERRNO\n" + ); + } + } + + # Verify that inode is unchanged during development + if (DEVEL_MODE) { + my @output_file_stat = stat($input_file); + my $inode_input = $input_file_stat[1]; + my $inode_output = $output_file_stat[1]; + if ( $inode_input != $inode_output ) { + Fault(<[_is_encoded_data_]; + my ( $fout, $iname ) = + Perl::Tidy::streamhandle( $input_file, 'w', $is_encoded_data ); + if ( !$fout ) { + Die( +"problem re-opening $input_file for write for -b option; check file and directory permissions: $ERRNO\n" + ); + } + + # Now copy the formatted output to it.. + + # if formatted output is in an ARRAY ref ... + if ( ref($output_file) eq 'ARRAY' ) { + foreach my $line ( @{$output_file} ) { + $fout->print($line) + or + Die("cannot print to '$input_file' with -b option: $OS_ERROR\n"); + } + } + + # or in a SCALAR ref (less efficient, for testing only) + elsif ( ref($output_file) eq 'SCALAR' ) { + foreach my $line ( split /^/, ${$output_file} ) { + $fout->print($line) + or + Die("cannot print to '$input_file' with -b option: $OS_ERROR\n"); + } + } + + # Error if anything else ... + # This can only happen if the output was changed from \@tmp_buff + else { + my $ref = ref($output_file); + Die(<close() + or Die("cannot close '$input_file' with -b option: $OS_ERROR\n"); + + # set permissions of the output file to match the input file + my $in_place_modify = 1; + $self->set_output_file_permissions( $input_file, \@input_file_stat, + $in_place_modify ); + + # Keep original modification time if no change (rt#145999) + my ( $read_time, $write_time ) = @input_file_stat[ 8, 9 ]; + if ( !$self->[_input_output_difference_] && defined($write_time) ) { + utime( $read_time, $write_time, $input_file ) + || Warn("error setting times for '$input_file'\n"); + } + + #--------------------------------------------------------- + # remove the original file for in-place modify as follows: + # $delete_backup=0 never + # $delete_backup=1 only if no errors + # $delete_backup>1 always : NOT ALLOWED, too risky + #--------------------------------------------------------- + if ( $delete_backup && -f $backup_name ) { + + # Currently, $delete_backup may only be 1. But if a future update + # allows a value > 1, then reduce it to 1 if there were warnings. + if ( $delete_backup > 1 + && $self->[_logger_object_]->get_warning_count() ) + { + $delete_backup = 1; + } + + # As an added safety precaution, do not delete the source file + # if its size has dropped from positive to zero, since this + # could indicate a disaster of some kind, including a hardware + # failure. Actually, this could happen if you had a file of + # all comments (or pod) and deleted everything with -dac (-dap) + # for some reason. + if ( !-s $input_file && -s $backup_name && $delete_backup == 1 ) { + Warn( +"output file '$input_file' missing or zero length; original '$backup_name' not deleted\n" + ); + } + else { + unlink($backup_name) + or Die( +"unable to remove previous '$backup_name' for -b option; check permissions: $ERRNO\n" + ); + } + } + + return; + +} ## end sub backup_method_move + +sub set_output_file_permissions { + + my ( $self, $output_file, $rinput_file_stat, $in_place_modify ) = @_; + + # Given: + # $output_file = the file whose permissions we will set + # $rinput_file_stat = the result of stat($input_file) + # $in_place_modify = true if --backup-and-modify-in-place is set + + my ( $mode_i, $uid_i, $gid_i ) = @{$rinput_file_stat}[ 2, 4, 5 ]; + my ( $uid_o, $gid_o ) = ( stat($output_file) )[ 4, 5 ]; + my $input_file_permissions = $mode_i & oct(7777); + my $output_file_permissions = $input_file_permissions; + + #rt128477: avoid inconsistent owner/group and suid/sgid + if ( $uid_i != $uid_o || $gid_i != $gid_o ) { + + # try to change owner and group to match input file if + # in -b mode. Note: chown returns number of files + # successfully changed. + if ( $in_place_modify + && chown( $uid_i, $gid_i, $output_file ) ) + { + # owner/group successfully changed + } + else { + + # owner or group differ: do not copy suid and sgid + $output_file_permissions = $mode_i & oct(777); + if ( $input_file_permissions != $output_file_permissions ) { + Warn( +"Unable to copy setuid and/or setgid bits for output file '$output_file'\n" + ); + } + } + } + + # Mark the output file for rw unless we are in -b mode. + # Explanation: perltidy does not unlink existing output + # files before writing to them, for safety. If a + # designated output file exists and is not writable, + # perltidy will halt. This can prevent a data loss if a + # user accidentally enters "perltidy infile -o + # important_ro_file", or "perltidy infile -st + # >important_ro_file". But it also means that perltidy can + # get locked out of rerunning unless it marks its own + # output files writable. The alternative, of always + # unlinking the designated output file, is less safe and + # not always possible, except in -b mode, where there is an + # assumption that a previous backup can be unlinked even if + # not writable. + if ( !$in_place_modify ) { + $output_file_permissions |= oct(600); + } + + if ( !chmod( $output_file_permissions, $output_file ) ) { + + # couldn't change file permissions + my $operm = sprintf "%04o", $output_file_permissions; + Warn( +"Unable to set permissions for output file '$output_file' to $operm\n" + ); + } + return; +} ## end sub set_output_file_permissions + +sub get_decoded_string_buffer { + my ( $self, $input_file, $display_name, $rpending_logfile_message ) = @_; + + # Decode the input buffer if necessary or requested + + # Given + # $input_file = the input file or stream + # $display_name = its name to use in error messages + + # Return + # $buf = string buffer with input, decoded from utf8 if necessary + # $is_encoded_data = true if $buf is decoded from utf8 + # $decoded_input_as = true if perltidy decoded input buf + # $encoding_log_message = messages for log file, + # $length_function = function to use for measuring string width + + # Return nothing on any error; this is a signal to skip this file + + my $rOpts = $self->[_rOpts_]; + + my $source_object = Perl::Tidy::LineSource->new( + input_file => $input_file, + rOpts => $rOpts, + ); + + # return nothing if error + return unless ($source_object); + + my $buf = EMPTY_STRING; + while ( my $line = $source_object->get_line() ) { + $buf .= $line; + } - $rstatus->{'input_name'} = $display_name; - $rstatus->{'opt_encoding'} = $rOpts_character_encoding; - $rstatus->{'char_mode_used'} = $encoding_in ? 1 : 0; - $rstatus->{'input_decoded_as'} = $decoded_input_as; + my $encoding_in = EMPTY_STRING; + my $rOpts_character_encoding = $rOpts->{'character-encoding'}; + my $encoding_log_message; + my $decoded_input_as = EMPTY_STRING; + $rstatus->{'char_mode_source'} = 0; + + # Case 1: If Perl is already in a character-oriented mode for this + # string rather than a byte-oriented mode. Normally, this happens if + # the caller has decoded a utf8 string before calling perltidy. But it + # could also happen if the user has done some unusual manipulations of + # the source. In any case, we will not attempt to decode it because + # that could result in an output string in a different mode. + if ( is_char_mode($buf) ) { + $encoding_in = "utf8"; + $rstatus->{'char_mode_source'} = 1; + } + + # Case 2. No input stream encoding requested. This is appropriate + # for single-byte encodings like ascii, latin-1, etc + elsif ( !$rOpts_character_encoding + || $rOpts_character_encoding eq 'none' ) + { + + # nothing to do + } + + # Case 3. guess input stream encoding if requested + elsif ( lc($rOpts_character_encoding) eq 'guess' ) { + + # The guessing strategy is simple: use Encode::Guess to guess + # an encoding. If and only if the guess is utf8, try decoding and + # use it if successful. Otherwise, we proceed assuming the + # characters are encoded as single bytes (same as if 'none' had + # been specified as the encoding). + + # In testing I have found that including additional guess 'suspect' + # encodings sometimes works but can sometimes lead to disaster by + # using an incorrect decoding. + my $buf_in = $buf; + + my $decoder = guess_encoding( $buf_in, 'utf8' ); + if ( ref($decoder) ) { + $encoding_in = $decoder->name; + if ( $encoding_in ne 'UTF-8' && $encoding_in ne 'utf8' ) { + $encoding_in = EMPTY_STRING; + $buf = $buf_in; + $encoding_log_message .= <decode($buf_in); 1 } ) { + + $encoding_log_message .= <[_is_encoded_data_] = $is_encoded_data; + + # Delete any Byte Order Mark (BOM), which can cause trouble + if ($is_encoded_data) { + $buf =~ s/^\x{FEFF}//; + } + + $rstatus->{'input_name'} = $display_name; + $rstatus->{'opt_encoding'} = $rOpts_character_encoding; + $rstatus->{'char_mode_used'} = $encoding_in ? 1 : 0; + $rstatus->{'input_decoded_as'} = $decoded_input_as; + + # Define the function to determine the display width of character + # strings + my $length_function = sub { return length( $_[0] ) }; + if ($is_encoded_data) { + + # Try to load Unicode::GCString for defining text display width, if + # requested, when the first encoded file is encountered + if ( !defined($loaded_unicode_gcstring) ) { + if ( eval { require Unicode::GCString; 1 } ) { + $loaded_unicode_gcstring = 1; + } + else { + $loaded_unicode_gcstring = 0; + if ( $rOpts->{'use-unicode-gcstring'} ) { + Warn(<new( $_[0] )->columns; + }; + $encoding_log_message .= <{'gcs_used'} = 1; + } + } + return ( + $buf, + $is_encoded_data, + $decoded_input_as, + $encoding_log_message, + $length_function, + + ); +} ## end sub get_decoded_string_buffer + +sub process_all_files { + + my ( + + $self, + $rinput_hash, + $rfiles, + + $output_extension, + $forbidden_file_extensions, + $in_place_modify, + $backup_extension, + $delete_backup, + + $logfile_header, + $rpending_complaint, + $rpending_logfile_message, + + ) = @_; + + # This routine is the main loop to process all files. + # Total formatting is done with these layers of subroutines: + # perltidy - main routine; checks run parameters + # *process_all_files - main loop to process all files; *THIS LAYER + # process_filter_layer - do any pre and post processing; + # process_iteration_layer - handle any iterations on formatting + # process_single_case - solves one formatting problem + + my $rOpts = $self->[_rOpts_]; + my $dot = $self->[_file_extension_separator_]; + my $diagnostics_object = $self->[_diagnostics_object_]; + my $line_separator_default = $self->[_line_separator_default_]; + + my $destination_stream = $rinput_hash->{'destination'}; + my $errorfile_stream = $rinput_hash->{'errorfile'}; + my $logfile_stream = $rinput_hash->{'logfile'}; + my $teefile_stream = $rinput_hash->{'teefile'}; + my $debugfile_stream = $rinput_hash->{'debugfile'}; + my $source_stream = $rinput_hash->{'source'}; + my $stderr_stream = $rinput_hash->{'stderr'}; + + my $number_of_files = @{$rfiles}; + while ( my $input_file = shift @{$rfiles} ) { + + my $fileroot; + my @input_file_stat; + my $display_name; + + #-------------------------- + # prepare this input stream + #-------------------------- + if ($source_stream) { + $fileroot = "perltidy"; + $display_name = ""; + + # If the source is from an array or string, then .LOG output + # is only possible if a logfile stream is specified. This prevents + # unexpected perltidy.LOG files. + if ( !defined($logfile_stream) ) { + $logfile_stream = Perl::Tidy::DevNull->new(); + + # Likewise for .TEE and .DEBUG output + } + if ( !defined($teefile_stream) ) { + $teefile_stream = Perl::Tidy::DevNull->new(); + } + if ( !defined($debugfile_stream) ) { + $debugfile_stream = Perl::Tidy::DevNull->new(); + } + } + elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN + $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc + $display_name = ""; + $in_place_modify = 0; + } + else { + $fileroot = $input_file; + $display_name = $input_file; + unless ( -e $input_file ) { + + # file doesn't exist - check for a file glob + if ( $input_file =~ /([\?\*\[\{])/ ) { + + # Windows shell may not remove quotes, so do it + my $input_file = $input_file; + if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } + if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } + my $pattern = fileglob_to_re($input_file); + my $dh; + if ( opendir( $dh, './' ) ) { + my @files = + grep { /$pattern/ && !-d } readdir($dh); + closedir($dh); + next unless (@files); + unshift @{$rfiles}, @files; + next; + } + } + Warn("skipping file: '$input_file': no matches found\n"); + next; + } + + unless ( -f $input_file ) { + Warn("skipping file: $input_file: not a regular file\n"); + next; + } + + # As a safety precaution, skip zero length files. + # If for example a source file got clobbered somehow, + # the old .tdy or .bak files might still exist so we + # shouldn't overwrite them with zero length files. + unless ( -s $input_file ) { + Warn("skipping file: $input_file: Zero size\n"); + next; + } - # Define the function to determine the display width of character - # strings - my $length_function = sub { return length( $_[0] ) }; - if ($is_encoded_data) { + # And avoid formatting extremely large files. Since perltidy reads + # files into memory, trying to process an extremely large file + # could cause system problems. + my $size_in_mb = ( -s $input_file ) / ( 1024 * 1024 ); + if ( $size_in_mb > $rOpts->{'maximum-file-size-mb'} ) { + $size_in_mb = sprintf( "%0.1f", $size_in_mb ); + Warn( +"skipping file: $input_file: size $size_in_mb MB exceeds limit $rOpts->{'maximum-file-size-mb'}; use -mfs=i to change\n" + ); + next; + } - # Delete any Byte Order Mark (BOM), which can cause trouble - $buf =~ s/^\x{FEFF}//; + unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { + Warn("skipping file: $input_file: Non-text (override with -f)\n" + ); + next; + } - # Try to load Unicode::GCString for defining text display width, if - # requested, when the first encoded file is encountered - if ( !defined($loaded_unicode_gcstring) ) { - eval { require Unicode::GCString }; - $loaded_unicode_gcstring = !$EVAL_ERROR; - if ( $EVAL_ERROR && $rOpts->{'use-unicode-gcstring'} ) { - Warn(<{'backup-method'}; + if ( defined($backup_method) && $backup_method eq 'copy' ) { + Warn +"skipping file '$input_file' for -b option: file reported as non-writable\n"; + next; } } - if ($loaded_unicode_gcstring) { - $length_function = sub { - return Unicode::GCString->new( $_[0] )->columns; - }; - $encoding_log_message .= <[_file_extension_separator_] = $dot; + } + + # add option to change path here + if ( defined( $rOpts->{'output-path'} ) ) { + + my ( $base, $old_path ) = fileparse($fileroot); + my $new_path = $rOpts->{'output-path'}; + unless ( -d $new_path ) { + unless ( mkdir $new_path, 0777 ) { + Die("unable to create directory $new_path: $ERRNO\n"); + } + } + my $path = $new_path; + $fileroot = catfile( $path, $base ); + unless ($fileroot) { + Die(<{'gcs_used'} = 1; + } } } - # MD5 sum of input file is evaluated before any prefilter - my $saved_input_buf; - if ( $rOpts->{'assert-tidy'} || $rOpts->{'assert-untidy'} ) { - $digest_input = $md5_hex->($buf); - $saved_input_buf = $buf; + # Skip files with same extension as the output files because + # this can lead to a messy situation with files like + # script.tdy.tdy.tdy ... or worse problems ... when you + # rerun perltidy over and over with wildcard input. + if ( + !$source_stream + && ( $input_file =~ /$forbidden_file_extensions/ + || $input_file eq 'DIAGNOSTICS' ) + ) + { + Warn("skipping file: $input_file: wrong extension\n"); + next; } - # Prefilters and postfilters: The prefilter is a code reference - # that will be applied to the source before tidying, and the - # postfilter is a code reference to the result before outputting. - - $buf = $prefilter->($buf) if $prefilter; + # copy source to a string buffer, decoding from utf8 if necessary + my ( + $buf, + $is_encoded_data, + $decoded_input_as, + $encoding_log_message, + $length_function, - # starting MD5 sum for convergence test is evaluated after any prefilter - if ($do_convergence_test) { - my $digest = $md5_hex->($buf); - $saw_md5{$digest} = 0; - } + ) = $self->get_decoded_string_buffer( $input_file, $display_name, + $rpending_logfile_message ); - $source_object = Perl::Tidy::LineSource->new( - input_file => \$buf, - rOpts => $rOpts, - rpending_logfile_message => $rpending_logfile_message, - ); + # Skip this file on any error + next if ( !defined($buf) ); - # register this file name with the Diagnostics package + # Register this file name with the Diagnostics package, if any. $diagnostics_object->set_input_file($input_file) if $diagnostics_object; - #--------------------------------------------------------------- + # OK: the (possibly decoded) input is now in string $buf. We just need + # to to prepare the output and error logger before formatting it. + + #-------------------------- # prepare the output stream - #--------------------------------------------------------------- + #-------------------------- my $output_file = undef; my $output_name = EMPTY_STRING; my $actual_output_extension; @@ -1307,7 +1923,8 @@ EOM if ( $number_of_files <= 1 ) { if ( $rOpts->{'standard-output'} ) { - my $msg = "You may not use -o and -st together"; + my $saw_pbp = $self->[_saw_pbp_]; + my $msg = "You may not use -o and -st together"; $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); Die("$msg\n"); } @@ -1341,6 +1958,7 @@ EOM } elsif ( $rOpts->{'standard-output'} ) { if ($destination_stream) { + my $saw_pbp = $self->[_saw_pbp_]; my $msg = "You may not specify a destination array and -st together\n"; $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); @@ -1370,8 +1988,12 @@ EOM } else { if ($in_place_modify) { - $output_file = IO::File->new_tmpfile() - or Die("cannot open temp file for -b option: $ERRNO\n"); + + # Send output to a temporary array buffer. This will + # allow efficient copying back to the input by + # sub backup_and_modify_in_place, below. + my @tmp_buff; + $output_file = \@tmp_buff; $output_name = $display_name; } else { @@ -1386,44 +2008,228 @@ EOM $rstatus->{'iteration_count'} = 0; $rstatus->{'converged'} = 0; - my $fh_tee; - my $tee_file = $fileroot . $dot . "TEE"; - if ($teefile_stream) { $tee_file = $teefile_stream } - if ( $rOpts->{'tee-pod'} - || $rOpts->{'tee-block-comments'} - || $rOpts->{'tee-side-comments'} ) - { - ( $fh_tee, my $tee_filename ) = - Perl::Tidy::streamhandle( $tee_file, 'w', $is_encoded_data ); - if ( !$fh_tee ) { - Warn("couldn't open TEE file $tee_file: $ERRNO\n"); - } + #------------------------------------------ + # initialize the error logger for this file + #------------------------------------------ + my $warning_file = $fileroot . $dot . "ERR"; + if ($errorfile_stream) { $warning_file = $errorfile_stream } + my $log_file = $fileroot . $dot . "LOG"; + if ($logfile_stream) { $log_file = $logfile_stream } + + # The logger object handles warning messages, logfile messages, + # and can supply basic run information to lower level routines. + my $logger_object = Perl::Tidy::Logger->new( + rOpts => $rOpts, + log_file => $log_file, + warning_file => $warning_file, + fh_stderr => $fh_stderr, + display_name => $display_name, + is_encoded_data => $is_encoded_data, + ); + $logger_object->write_logfile_entry($logfile_header); + $logger_object->write_logfile_entry($encoding_log_message) + if $encoding_log_message; + + # Now we can add any pending messages to the log + if ( ${$rpending_logfile_message} ) { + $logger_object->write_logfile_entry( ${$rpending_logfile_message} ); + } + if ( ${$rpending_complaint} ) { + $logger_object->complain( ${$rpending_complaint} ); } - my $line_separator = $rOpts->{'output-line-ending'}; + # Use input line endings if requested + my $line_separator = $line_separator_default; if ( $rOpts->{'preserve-line-endings'} ) { - $line_separator = find_input_line_ending($input_file); + my $ls_input = find_input_line_ending($input_file); + if ( defined($ls_input) ) { $line_separator = $ls_input } + } + + # additional parameters needed by lower level routines + $self->[_actual_output_extension_] = $actual_output_extension; + $self->[_debugfile_stream_] = $debugfile_stream; + $self->[_decoded_input_as_] = $decoded_input_as; + $self->[_destination_stream_] = $destination_stream; + $self->[_display_name_] = $display_name; + $self->[_fileroot_] = $fileroot; + $self->[_is_encoded_data_] = $is_encoded_data; + $self->[_length_function_] = $length_function; + $self->[_line_separator_] = $line_separator; + $self->[_logger_object_] = $logger_object; + $self->[_output_file_] = $output_file; + $self->[_teefile_stream_] = $teefile_stream; + $self->[_input_copied_verbatim_] = 0; + $self->[_input_output_difference_] = 1; ## updated later if -b used + + #---------------------------------------------------------- + # Do all formatting of this buffer. + # Results will go to the selected output file or streams(s) + #---------------------------------------------------------- + $self->process_filter_layer($buf); + + #-------------------------------------------------- + # Handle the -b option (backup and modify in-place) + #-------------------------------------------------- + if ($in_place_modify) { + + # For -b option, leave the file unchanged if a severe error caused + # formatting to be skipped. Otherwise we will overwrite any backup. + if ( !$self->[_input_copied_verbatim_] ) { + + my $backup_method = $rOpts->{'backup-method'}; + + # Option 1, -bm='copy': uses newer version in which original is + # copied to the backup and rewritten; see git #103. + if ( defined($backup_method) && $backup_method eq 'copy' ) { + $self->backup_method_copy( + $input_file, $output_file, + $backup_extension, $delete_backup + ); + } + + # Option 2, -bm='move': uses older version, where original is + # moved to the backup and formatted output goes to a new file. + else { + $self->backup_method_move( + $input_file, $output_file, + $backup_extension, $delete_backup + ); + } + } + $output_file = $input_file; + } + + #------------------------------------------------------------------- + # Otherwise set output file ownership and permissions if appropriate + #------------------------------------------------------------------- + elsif ( $output_file && -f $output_file && !-l $output_file ) { + if (@input_file_stat) { + if ( $rOpts->{'format'} eq 'tidy' ) { + $self->set_output_file_permissions( $output_file, + \@input_file_stat, $in_place_modify ); + } + + # else use default permissions for html and any other format + } } - $line_separator = "\n" unless defined($line_separator); + $logger_object->finish() + if $logger_object; + } ## end of main loop to process all files + + return; +} ## end sub process_all_files + +sub process_filter_layer { + + my ( $self, $buf ) = @_; + + # This is the filter layer of processing. + # Do all requested formatting on the string '$buf', including any + # pre- and post-processing with filters. + # Store the results in the selected output file(s) or stream(s). + + # Total formatting is done with these layers of subroutines: + # perltidy - main routine; checks run parameters + # process_all_files - main loop to process all files; + # *process_filter_layer - do any pre and post processing; *THIS LAYER + # process_iteration_layer - handle any iterations on formatting + # process_single_case - solves one formatting problem + + # Data Flow in this layer: + # $buf + # -> optional prefilter operation + # -> [ formatting by sub process_iteration_layer ] + # -> ( optional postfilter_buffer for postfilter, other operations ) + # -> ( optional destination_buffer for encoding ) + # -> final sink_object + + # What is done based on format type: + # utf8 decoding is done for all format types + # prefiltering is applied to all format types + # - because it may be needed to get through the tokenizer + # postfiltering is only done for format='tidy' + # - might cause problems operating on html text + # encoding of decoded output is only done for format='tidy' + # - because html does its own encoding; user formatter does what it wants + + my $rOpts = $self->[_rOpts_]; + my $is_encoded_data = $self->[_is_encoded_data_]; + my $logger_object = $self->[_logger_object_]; + my $output_file = $self->[_output_file_]; + my $user_formatter = $self->[_user_formatter_]; + my $destination_stream = $self->[_destination_stream_]; + my $prefilter = $self->[_prefilter_]; + my $postfilter = $self->[_postfilter_]; + my $decoded_input_as = $self->[_decoded_input_as_]; + my $line_separator = $self->[_line_separator_]; + + my $remove_terminal_newline = + !$rOpts->{'add-terminal-newline'} && substr( $buf, -1, 1 ) !~ /\n/; + + # vars for postfilter, if used + my $use_postfilter_buffer; + my $postfilter_buffer; + + # vars for destination buffer, if used + my $destination_buffer; + my $use_destination_buffer; + my $encode_destination_buffer; + + # vars for iterations, if done + my $sink_object; + + # vars for checking assertions, if needed + my $digest_input; + my $saved_input_buf; + + my $ref_destination_stream = ref($destination_stream); + + # Setup vars for postfilter, destination buffer, assertions and sink object + # if needed. These are only used for 'tidy' formatting. + if ( $rOpts->{'format'} eq 'tidy' ) { + + # evaluate MD5 sum of input file, if needed, before any prefilter + if ( $rOpts->{'assert-tidy'} + || $rOpts->{'assert-untidy'} + || $rOpts->{'backup-and-modify-in-place'} ) + { + $digest_input = $md5_hex->($buf); + $saved_input_buf = $buf; + } - # the 'sink_object' knows how to write the output file - my ( $sink_object, $postfilter_buffer ); - my $use_buffer = + #----------------------- + # Setup postfilter buffer + #----------------------- + # If we need access to the output for filtering or checking assertions + # before writing to its ultimate destination, then we will send it + # to a temporary buffer. The variables are: + # $postfilter_buffer = the buffer to capture the output + # $use_postfilter_buffer = is a postfilter buffer used? + # These are used below, just after iterations are made. + $use_postfilter_buffer = $postfilter || $remove_terminal_newline || $rOpts->{'assert-tidy'} - || $rOpts->{'assert-untidy'}; - - # Postpone final output to a destination SCALAR or ARRAY ref to allow - # possible encoding at the end of processing. - my $destination_buffer; - my $use_destination_buffer; - my $encode_destination_buffer; - my $ref_destination_stream = ref($destination_stream); - if ( $ref_destination_stream && !$user_formatter ) { + || $rOpts->{'assert-untidy'} + || $rOpts->{'backup-and-modify-in-place'}; + + #------------------------- + # Setup destination_buffer + #------------------------- + # If the final output destination is not a file, then we might need to + # encode the result at the end of processing. So in this case we will + # send the output to a temporary buffer. + # The key variables are: + # $destination_buffer - receives the formatted output + # $use_destination_buffer - is $destination_buffer used? + # $encode_destination_buffer - encode $destination_buffer? + # These are used by sub 'copy_buffer_to_destination', below + + if ($ref_destination_stream) { $use_destination_buffer = 1; $output_file = \$destination_buffer; + $self->[_output_file_] = $output_file; # Strings and arrays use special encoding rules if ( $ref_destination_stream eq 'SCALAR' @@ -1447,641 +2253,564 @@ EOM } } + #------------------------------------------- + # Make a sink object for the iteration phase + #------------------------------------------- $sink_object = Perl::Tidy::LineSink->new( - output_file => $use_buffer ? \$postfilter_buffer : $output_file, - line_separator => $line_separator, - rOpts => $rOpts, - rpending_logfile_message => $rpending_logfile_message, - is_encoded_data => $is_encoded_data, + output_file => $use_postfilter_buffer + ? \$postfilter_buffer + : $output_file, + line_separator => $line_separator, + is_encoded_data => $is_encoded_data, ); + } - #--------------------------------------------------------------- - # initialize the error logger for this file - #--------------------------------------------------------------- - my $warning_file = $fileroot . $dot . "ERR"; - if ($errorfile_stream) { $warning_file = $errorfile_stream } - my $log_file = $fileroot . $dot . "LOG"; - if ($logfile_stream) { $log_file = $logfile_stream } - - my $logger_object = Perl::Tidy::Logger->new( - rOpts => $rOpts, - log_file => $log_file, - warning_file => $warning_file, - fh_stderr => $fh_stderr, - display_name => $display_name, + #----------------------------------------------------------------------- + # Apply any prefilter. The prefilter is a code reference that will be + # applied to the source before tokenizing. Note that we are doing this + # for all format types ('tidy', 'html', 'user') because it may be needed + # to avoid tokenization errors. + #----------------------------------------------------------------------- + $buf = $prefilter->($buf) if $prefilter; + + #---------------------------------------------------------------------- + # Format contents of string '$buf', iterating if requested. + # For 'tidy', formatted result will be written to '$sink_object' + # For 'html' and 'user', result goes directly to its ultimate destination. + #---------------------------------------------------------------------- + $self->process_iteration_layer( $buf, $sink_object ); + + #-------------------------------- + # Do postfilter buffer processing + #-------------------------------- + if ($use_postfilter_buffer) { + + my $sink_object_post = Perl::Tidy::LineSink->new( + output_file => $output_file, + line_separator => $line_separator, is_encoded_data => $is_encoded_data, ); - write_logfile_header( - $rOpts, $logger_object, $config_file, - $rraw_options, $Windows_type, $readable_options, - ); - $logger_object->write_logfile_entry($encoding_log_message) - if $encoding_log_message; - if ( ${$rpending_logfile_message} ) { - $logger_object->write_logfile_entry( ${$rpending_logfile_message} ); - } - if ( ${$rpending_complaint} ) { - $logger_object->complain( ${$rpending_complaint} ); + #---------------------------------------------------------------------- + # Apply any postfilter. The postfilter is a code reference that will be + # applied to the source after tidying. + #---------------------------------------------------------------------- + my $buf_post = + $postfilter + ? $postfilter->($postfilter_buffer) + : $postfilter_buffer; + + if ( defined($digest_input) ) { + my $digest_output = $md5_hex->($buf_post); + $self->[_input_output_difference_] = + $digest_output ne $digest_input; } - #--------------------------------------------------------------- - # initialize the debug object, if any - #--------------------------------------------------------------- - my $debugger_object = undef; - if ( $rOpts->{DEBUG} ) { - my $debug_file = $fileroot . $dot . "DEBUG"; - if ($debugfile_stream) { $debug_file = $debugfile_stream } - $debugger_object = - Perl::Tidy::Debugger->new( $debug_file, $is_encoded_data ); - } - - #--------------------------------------------------------------- - # loop over iterations for one source stream - #--------------------------------------------------------------- - - # save objects to allow redirecting output during iterations - my $sink_object_final = $sink_object; - my $debugger_object_final = $debugger_object; - my $logger_object_final = $logger_object; - my $fh_tee_final = $fh_tee; - my $iteration_of_formatter_convergence; - - foreach my $iter ( 1 .. $max_iterations ) { - - $rstatus->{'iteration_count'} += 1; - - # send output stream to temp buffers until last iteration - my $sink_buffer; - if ( $iter < $max_iterations ) { - $sink_object = Perl::Tidy::LineSink->new( - output_file => \$sink_buffer, - line_separator => $line_separator, - rOpts => $rOpts, - rpending_logfile_message => $rpending_logfile_message, - is_encoded_data => $is_encoded_data, - ); - } - else { - $sink_object = $sink_object_final; + # Check if file changed if requested, but only after any postfilter + if ( $rOpts->{'assert-tidy'} ) { + if ( $self->[_input_output_difference_] ) { + my $diff_msg = + compare_string_buffers( $saved_input_buf, $buf_post, + $is_encoded_data ); + $logger_object->warning(<interrupt_logfile(); + $logger_object->warning( $diff_msg . "\n" ); + $logger_object->resume_logfile(); } + } - # Save logger, debugger and tee output only on pass 1 because: - # (1) line number references must be to the starting - # source, not an intermediate result, and - # (2) we need to know if there are errors so we can stop the - # iterations early if necessary. - # (3) the tee option only works on first pass if comments are also - # being deleted. - - if ( $iter > 1 ) { - $debugger_object = undef; - $logger_object = undef; - $fh_tee = undef; + if ( $rOpts->{'assert-untidy'} ) { + if ( !$self->[_input_output_difference_] ) { + $logger_object->warning( +"assertion failure: '--assert-untidy' is set but output equals input\n" + ); } + } - #------------------------------------------------------------ - # create a formatter for this file : html writer or - # pretty printer - #------------------------------------------------------------ - - # we have to delete any old formatter because, for safety, - # the formatter will check to see that there is only one. - $formatter = undef; + my $source_object = Perl::Tidy::LineSource->new( + input_file => \$buf_post, + rOpts => $rOpts, + ); - if ($user_formatter) { - $formatter = $user_formatter; - } - elsif ( $rOpts->{'format'} eq 'html' ) { - $formatter = Perl::Tidy::HtmlWriter->new( - input_file => $fileroot, - html_file => $output_file, - extension => $actual_output_extension, - html_toc_extension => $html_toc_extension, - html_src_extension => $html_src_extension, - ); - } - elsif ( $rOpts->{'format'} eq 'tidy' ) { - $formatter = Perl::Tidy::Formatter->new( - logger_object => $logger_object, - diagnostics_object => $diagnostics_object, - sink_object => $sink_object, - length_function => $length_function, - is_encoded_data => $is_encoded_data, - fh_tee => $fh_tee, - ); - } - else { - Die("I don't know how to do -format=$rOpts->{'format'}\n"); + # Copy the filtered buffer to the final destination + if ( !$remove_terminal_newline ) { + while ( my $line = $source_object->get_line() ) { + $sink_object_post->write_line($line); } + } + else { - unless ($formatter) { - Die("Unable to continue with $rOpts->{'format'} formatting\n"); + # Copy the filtered buffer but remove the newline char from the + # final line + my $line; + while ( my $next_line = $source_object->get_line() ) { + $sink_object_post->write_line($line) if ($line); + $line = $next_line; } + if ($line) { + $sink_object_post->set_line_separator(undef); + chomp $line; + $sink_object_post->write_line($line); + } + } + $sink_object_post->close_output_file(); + $source_object->close_input_file(); + } - #--------------------------------------------------------------- - # create the tokenizer for this file - #--------------------------------------------------------------- - $tokenizer = undef; # must destroy old tokenizer - $tokenizer = Perl::Tidy::Tokenizer->new( - source_object => $source_object, - logger_object => $logger_object, - debugger_object => $debugger_object, - diagnostics_object => $diagnostics_object, - tabsize => $tabsize, - rOpts => $rOpts, - - starting_level => $rOpts->{'starting-indentation-level'}, - indent_columns => $rOpts->{'indent-columns'}, - look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, - look_for_autoloader => $rOpts->{'look-for-autoloader'}, - look_for_selfloader => $rOpts->{'look-for-selfloader'}, - trim_qw => $rOpts->{'trim-qw'}, - extended_syntax => $rOpts->{'extended-syntax'}, - - continuation_indentation => - $rOpts->{'continuation-indentation'}, - outdent_labels => $rOpts->{'outdent-labels'}, - ); - - #--------------------------------------------------------------- - # now we can do it - #--------------------------------------------------------------- - process_this_file( $tokenizer, $formatter ); + #-------------------------------------------------------- + # Do destination buffer processing, encoding if required. + #-------------------------------------------------------- + if ($use_destination_buffer) { + $self->copy_buffer_to_destination( $destination_buffer, + $destination_stream, $encode_destination_buffer ); + } + else { - #--------------------------------------------------------------- - # close the input source and report errors - #--------------------------------------------------------------- - $source_object->close_input_file(); + # output went to a file in 'tidy' mode... + if ( $is_encoded_data && $rOpts->{'format'} eq 'tidy' ) { + $rstatus->{'output_encoded_as'} = 'UTF-8'; + } + } - # see if the formatter is converged - if ( $max_iterations > 1 - && !defined($iteration_of_formatter_convergence) - && $formatter->can('get_convergence_check') ) - { - if ( $formatter->get_convergence_check() ) { - $iteration_of_formatter_convergence = $iter; - $rstatus->{'converged'} = 1; - } - } + # The final formatted result should now be in the selected output file(s) + # or stream(s). + return; - # line source for next iteration (if any) comes from the current - # temporary output buffer - if ( $iter < $max_iterations ) { +} ## end sub process_filter_layer + +sub process_iteration_layer { + + my ( $self, $buf, $sink_object ) = @_; + + # This is the iteration layer of processing. + # Do all formatting, iterating if requested, on the source string $buf. + # Output depends on format type: + # For 'tidy' formatting, output goes to sink object + # For 'html' formatting, output goes to the ultimate destination + # For 'user' formatting, user formatter handles output + + # Total formatting is done with these layers of subroutines: + # perltidy - main routine; checks run parameters + # process_all_files - main loop to process all files; + # process_filter_layer - do any pre and post processing + # *process_iteration_layer - do any iterations on formatting; *THIS LAYER + # process_single_case - solves one formatting problem + + # Data Flow in this layer: + # $buf -> [ loop over iterations ] -> $sink_object + + # Only 'tidy' formatting can use multiple iterations. + + my $diagnostics_object = $self->[_diagnostics_object_]; + my $display_name = $self->[_display_name_]; + my $fileroot = $self->[_fileroot_]; + my $is_encoded_data = $self->[_is_encoded_data_]; + my $length_function = $self->[_length_function_]; + my $line_separator = $self->[_line_separator_]; + my $logger_object = $self->[_logger_object_]; + my $rOpts = $self->[_rOpts_]; + my $tabsize = $self->[_tabsize_]; + my $user_formatter = $self->[_user_formatter_]; + + # create a source object for the buffer + my $source_object = Perl::Tidy::LineSource->new( + input_file => \$buf, + rOpts => $rOpts, + ); - $sink_object->close_output_file(); - $source_object = Perl::Tidy::LineSource->new( - input_file => \$sink_buffer, - rOpts => $rOpts, - rpending_logfile_message => $rpending_logfile_message, - ); + # make a debugger object if requested + my $debugger_object; + if ( $rOpts->{DEBUG} ) { + my $debug_file = $self->[_debugfile_stream_] + || $fileroot . $self->make_file_extension('DEBUG'); + $debugger_object = + Perl::Tidy::Debugger->new( $debug_file, $is_encoded_data ); + } - # stop iterations if errors or converged - my $stop_now = $tokenizer->report_tokenization_errors(); - $stop_now ||= $tokenizer->get_unexpected_error_count(); - my $stopping_on_error = $stop_now; - if ($stop_now) { - $convergence_log_message = <{'tee-pod'} + || $rOpts->{'tee-block-comments'} + || $rOpts->{'tee-side-comments'} ) + { + my $tee_file = $self->[_teefile_stream_] + || $fileroot . $self->make_file_extension('TEE'); + ( $fh_tee, my $tee_filename ) = + Perl::Tidy::streamhandle( $tee_file, 'w', $is_encoded_data ); + if ( !$fh_tee ) { + Warn("couldn't open TEE file $tee_file: $ERRNO\n"); + } + } - # stop if the formatter has converged - $stop_now ||= defined($iteration_of_formatter_convergence); + # vars for iterations and convergence test + my $max_iterations = 1; + my $convergence_log_message; + my $do_convergence_test; + my %saw_md5; + + # Only 'tidy' formatting can use multiple iterations + if ( $rOpts->{'format'} eq 'tidy' ) { + + # check iteration count and quietly fix if necessary: + # - iterations option only applies to code beautification mode + # - the convergence check should stop most runs on iteration 2, and + # virtually all on iteration 3. But we'll allow up to 6. + $max_iterations = $rOpts->{'iterations'}; + if ( !defined($max_iterations) + || $max_iterations <= 0 ) + { + $max_iterations = 1; + } + elsif ( $max_iterations > 6 ) { + $max_iterations = 6; + } - my $digest = $md5_hex->($sink_buffer); - if ( !defined( $saw_md5{$digest} ) ) { - $saw_md5{$digest} = $iter; - } - else { + # get starting MD5 sum for convergence test + if ( $max_iterations > 1 ) { + $do_convergence_test = 1; + my $digest = $md5_hex->($buf); + $saw_md5{$digest} = 0; + } + } - # Deja vu, stop iterating - $stop_now = 1; - my $iterm = $iter - 1; - if ( $saw_md5{$digest} != $iterm ) { - - # Blinking (oscillating) between two or more stable - # end states. This is unlikely to occur with normal - # parameters, but it can occur in stress testing - # with extreme parameter values, such as very short - # maximum line lengths. We want to catch and fix - # them when they happen. - $rstatus->{'blinking'} = 1; - $convergence_log_message = <write_diagnostics( - $convergence_log_message) - if $diagnostics_object; + # save objects to allow redirecting output during iterations + my $sink_object_final = $sink_object; + my $logger_object_final = $logger_object; + my $iteration_of_formatter_convergence; -# Uncomment to search for blinking states -# Warn( "$display_name: blinking; iter $iter same as for $saw_md5{$digest}\n" ); + #--------------------- + # Loop over iterations + #--------------------- + foreach my $iter ( 1 .. $max_iterations ) { - } - else { - $convergence_log_message = <write_diagnostics( - $convergence_log_message) - if $diagnostics_object && $iterm > 2; - $rstatus->{'converged'} = 1; - } - } - } ## end if ($do_convergence_test) + $rstatus->{'iteration_count'} += 1; - if ($stop_now) { + # send output stream to temp buffers until last iteration + my $sink_buffer; + if ( $iter < $max_iterations ) { + $sink_object = Perl::Tidy::LineSink->new( + output_file => \$sink_buffer, + line_separator => $line_separator, + is_encoded_data => $is_encoded_data, + ); + } + else { + $sink_object = $sink_object_final; + } - if (DEVEL_MODE) { + # Save logger, debugger and tee output only on pass 1 because: + # (1) line number references must be to the starting + # source, not an intermediate result, and + # (2) we need to know if there are errors so we can stop the + # iterations early if necessary. + # (3) the tee option only works on first pass if comments are also + # being deleted. + if ( $iter > 1 ) { + + $debugger_object->close_debug_file() if ($debugger_object); + $fh_tee->close() if ($fh_tee); + + $debugger_object = undef; + $logger_object = undef; + $fh_tee = undef; + } - if ( defined($iteration_of_formatter_convergence) ) { + #--------------------------------- + # create a formatter for this file + #--------------------------------- - # This message cannot appear unless the formatter - # convergence test above is temporarily skipped for - # testing. - if ( $iteration_of_formatter_convergence < - $iter - 1 ) - { - print STDERR -"STRANGE Early conv in $display_name: Stopping on it=$iter, converged in formatter on $iteration_of_formatter_convergence\n"; - } - } - elsif ( !$stopping_on_error ) { - print STDERR -"STRANGE no conv in $display_name: stopping on it=$iter, but not converged in formatter\n"; - } - } + my $formatter; - # we are stopping the iterations early; - # copy the output stream to its final destination - $sink_object = $sink_object_final; - while ( my $line = $source_object->get_line() ) { - $sink_object->write_line($line); - } - $source_object->close_input_file(); - last; - } - } ## end if ( $iter < $max_iterations) - } ## end loop over iterations for one source file - - # restore objects which have been temporarily undefined - # for second and higher iterations - $debugger_object = $debugger_object_final; - $logger_object = $logger_object_final; - $fh_tee = $fh_tee_final; - - $logger_object->write_logfile_entry($convergence_log_message) - if $convergence_log_message; - - #--------------------------------------------------------------- - # Perform any postfilter operation - #--------------------------------------------------------------- - if ($use_buffer) { - $sink_object->close_output_file(); - $sink_object = Perl::Tidy::LineSink->new( - output_file => $output_file, - line_separator => $line_separator, - rOpts => $rOpts, - rpending_logfile_message => $rpending_logfile_message, - is_encoded_data => $is_encoded_data, + if ($user_formatter) { + $formatter = $user_formatter; + } + elsif ( $rOpts->{'format'} eq 'html' ) { + + my $html_toc_extension = + $self->make_file_extension( $rOpts->{'html-toc-extension'}, + 'toc' ); + + my $html_src_extension = + $self->make_file_extension( $rOpts->{'html-src-extension'}, + 'src' ); + + $formatter = Perl::Tidy::HtmlWriter->new( + input_file => $fileroot, + html_file => $self->[_output_file_], + extension => $self->[_actual_output_extension_], + html_toc_extension => $html_toc_extension, + html_src_extension => $html_src_extension, ); + } + elsif ( $rOpts->{'format'} eq 'tidy' ) { + $formatter = Perl::Tidy::Formatter->new( + logger_object => $logger_object, + diagnostics_object => $diagnostics_object, + sink_object => $sink_object, + length_function => $length_function, + is_encoded_data => $is_encoded_data, + fh_tee => $fh_tee, + ); + } + else { + Die("I don't know how to do -format=$rOpts->{'format'}\n"); + } - my $buf_post = - $postfilter - ? $postfilter->($postfilter_buffer) - : $postfilter_buffer; - - # Check if file changed if requested, but only after any postfilter - if ( $rOpts->{'assert-tidy'} ) { - my $digest_output = $md5_hex->($buf_post); - if ( $digest_output ne $digest_input ) { - my $diff_msg = - compare_string_buffers( $saved_input_buf, $buf_post, - $is_encoded_data ); - $logger_object->warning(<interrupt_logfile(); - $logger_object->warning( $diff_msg . "\n" ); - $logger_object->resume_logfile(); - ## $Warn_count ||= 1; # logger warning does this now - } - } - if ( $rOpts->{'assert-untidy'} ) { - my $digest_output = $md5_hex->($buf_post); - if ( $digest_output eq $digest_input ) { - $logger_object->warning( -"assertion failure: '--assert-untidy' is set but output equals input\n" - ); - ## $Warn_count ||= 1; # logger warning does this now - } - } - - $source_object = Perl::Tidy::LineSource->new( - input_file => \$buf_post, - rOpts => $rOpts, - rpending_logfile_message => $rpending_logfile_message, - ); + unless ($formatter) { + Die("Unable to continue with $rOpts->{'format'} formatting\n"); + } - # Copy the filtered buffer to the final destination - if ( !$remove_terminal_newline ) { - while ( my $line = $source_object->get_line() ) { - $sink_object->write_line($line); - } - } - else { + #----------------------------------- + # create the tokenizer for this file + #----------------------------------- + my $tokenizer = Perl::Tidy::Tokenizer->new( + source_object => $source_object, + logger_object => $logger_object, + debugger_object => $debugger_object, + diagnostics_object => $diagnostics_object, + tabsize => $tabsize, + rOpts => $rOpts, + + starting_level => $rOpts->{'starting-indentation-level'}, + indent_columns => $rOpts->{'indent-columns'}, + look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, + look_for_autoloader => $rOpts->{'look-for-autoloader'}, + look_for_selfloader => $rOpts->{'look-for-selfloader'}, + trim_qw => $rOpts->{'trim-qw'}, + extended_syntax => $rOpts->{'extended-syntax'}, + + continuation_indentation => $rOpts->{'continuation-indentation'}, + outdent_labels => $rOpts->{'outdent-labels'}, + ); - # Copy the filtered buffer but remove the newline char from the - # final line - my $line; - while ( my $next_line = $source_object->get_line() ) { - $sink_object->write_line($line) if ($line); - $line = $next_line; - } - if ($line) { - $sink_object->set_line_separator(undef); - chomp $line; - $sink_object->write_line($line); - } - } + #--------------------------------- + # do processing for this iteration + #--------------------------------- + $self->process_single_case( $tokenizer, $formatter ); - $source_object->close_input_file(); - } + #----------------------------------------- + # close the input source and report errors + #----------------------------------------- + $source_object->close_input_file(); - #------------------------------------------------------------------ - # For string output, store the result to the destination, encoding - # if requested. This is a fix for issue git #83 (tidyall issue) - #------------------------------------------------------------------ - if ($use_destination_buffer) { + # see if the formatter is converged + if ( $max_iterations > 1 + && !defined($iteration_of_formatter_convergence) + && $formatter->can('get_convergence_check') ) + { + if ( $formatter->get_convergence_check() ) { + $iteration_of_formatter_convergence = $iter; + $rstatus->{'converged'} = 1; + } + } - # At this point, all necessary encoding has been done except for - # output to a string or array ref. We use the -eos flag to decide - # if we should encode. + # line source for next iteration (if any) comes from the current + # temporary output buffer + if ( $iter < $max_iterations ) { - # -neos, DEFAULT: perltidy does not return encoded string output. - # This is a result of the code evolution but not very convenient for - # most applications. It would be hard to change without breaking - # some programs. + $sink_object->close_output_file(); + $source_object = Perl::Tidy::LineSource->new( + input_file => \$sink_buffer, + rOpts => $rOpts, + ); - # -eos flag set: If perltidy decodes a string, regardless of - # source, it encodes before returning. - $rstatus->{'output_encoded_as'} = EMPTY_STRING; + # stop iterations if errors or converged + my $stop_now = $self->[_input_copied_verbatim_]; + $stop_now ||= $tokenizer->get_unexpected_error_count(); + my $stopping_on_error = $stop_now; + if ($stop_now) { + $convergence_log_message = <($sink_buffer); + if ( !defined( $saw_md5{$digest} ) ) { + $saw_md5{$digest} = $iter; } else { - $destination_buffer = $encoded_buffer; - $rstatus->{'output_encoded_as'} = 'UTF-8'; - } - } - # Send data for SCALAR, ARRAY & OBJ refs to its final destination - if ( ref($destination_stream) eq 'SCALAR' ) { - ${$destination_stream} = $destination_buffer; - } - elsif ($destination_buffer) { - my @lines = split /^/, $destination_buffer; - if ( ref($destination_stream) eq 'ARRAY' ) { - @{$destination_stream} = @lines; + # Deja vu, stop iterating + $stop_now = 1; + my $iterm = $iter - 1; + if ( $saw_md5{$digest} != $iterm ) { + + # Blinking (oscillating) between two or more stable + # end states. This is unlikely to occur with normal + # parameters, but it can occur in stress testing + # with extreme parameter values, such as very short + # maximum line lengths. We want to catch and fix + # them when they happen. + $rstatus->{'blinking'} = 1; + $convergence_log_message = <write_diagnostics( + $convergence_log_message) + if $diagnostics_object; + +# Uncomment to search for blinking states +# Warn( "$display_name: blinking; iter $iter same as for $saw_md5{$digest}\n" ); + + } + else { + $convergence_log_message = <write_diagnostics( + $convergence_log_message) + if $diagnostics_object && $iterm > 2; + $rstatus->{'converged'} = 1; + } } + } ## end if ($do_convergence_test) - # destination stream must be an object with print method - else { - foreach my $line (@lines) { - $destination_stream->print($line); + if ($stop_now) { + + if (DEVEL_MODE) { + + if ( defined($iteration_of_formatter_convergence) ) { + + # This message cannot appear unless the formatter + # convergence test above is temporarily skipped for + # testing. + if ( $iteration_of_formatter_convergence < $iter - 1 ) { + print STDERR +"STRANGE Early conv in $display_name: Stopping on it=$iter, converged in formatter on $iteration_of_formatter_convergence\n"; + } } - if ( $ref_destination_stream->can('close') ) { - $destination_stream->close(); + elsif ( !$stopping_on_error ) { + print STDERR +"STRANGE no conv in $display_name: stopping on it=$iter, but not converged in formatter\n"; } } - } - else { - # Empty destination buffer not going to a string ... could - # happen for example if user deleted all pod or comments + # we are stopping the iterations early; + # copy the output stream to its final destination + $sink_object = $sink_object_final; + while ( my $line = $source_object->get_line() ) { + $sink_object->write_line($line); + } + $source_object->close_input_file(); + last; } - } - else { + } ## end if ( $iter < $max_iterations) + } ## end loop over iterations for one source file - # output went to a file ... - if ($is_encoded_data) { - $rstatus->{'output_encoded_as'} = 'UTF-8'; - } - } + $sink_object->close_output_file() if $sink_object; + $debugger_object->close_debug_file() if $debugger_object; + $fh_tee->close() if $fh_tee; - # Save names of the input and output files - my $ifname = $input_file; - my $ofname = $output_file; + # leave logger object open for additional messages + $logger_object = $logger_object_final; + $logger_object->write_logfile_entry($convergence_log_message) + if $convergence_log_message; - #--------------------------------------------------------------- - # handle the -b option (backup and modify in-place) - #--------------------------------------------------------------- - if ($in_place_modify) { - unless ( -f $input_file ) { + return; - # oh, oh, no real file to backup .. - # shouldn't happen because of numerous preliminary checks - Die( -"problem with -b backing up input file '$input_file': not a file\n" - ); - } - my $backup_name = $input_file . $backup_extension; - if ( -f $backup_name ) { - unlink($backup_name) - or Die( -"unable to remove previous '$backup_name' for -b option; check permissions: $ERRNO\n" - ); - } +} ## end sub process_iteration_layer - # backup the input file - # we use copy for symlinks, move for regular files - if ( -l $input_file ) { - File::Copy::copy( $input_file, $backup_name ) - or Die("File::Copy failed trying to backup source: $ERRNO"); - } - else { - rename( $input_file, $backup_name ) - or Die( -"problem renaming $input_file to $backup_name for -b option: $ERRNO\n" - ); - } - $ifname = $backup_name; - - # copy the output to the original input file - # NOTE: it would be nice to just close $output_file and use - # File::Copy::copy here, but in this case $output_file is the - # handle of an open nameless temporary file so we would lose - # everything if we closed it. - seek( $output_file, 0, 0 ) - or - Die("unable to rewind a temporary file for -b option: $ERRNO\n"); +sub process_single_case { - my ( $fout, $iname ) = - Perl::Tidy::streamhandle( $input_file, 'w', $is_encoded_data ); - if ( !$fout ) { - Die( -"problem re-opening $input_file for write for -b option; check file and directory permissions: $ERRNO\n" - ); - } + # run the formatter on a single defined case + my ( $self, $tokenizer, $formatter ) = @_; - my $line; - while ( $line = $output_file->getline() ) { - $fout->print($line); - } - $fout->close(); - $output_file = $input_file; - $ofname = $input_file; - } + # Total formatting is done with these layers of subroutines: + # perltidy - main routine; checks run parameters + # process_all_files - main loop to process all files; + # process_filter_layer - do any pre and post processing; + # process_iteration_layer - do any iterations on formatting + # *process_single_case - solve one formatting problem; *THIS LAYER - #--------------------------------------------------------------- - # clean up and report errors - #--------------------------------------------------------------- - $sink_object->close_output_file() if $sink_object; - $debugger_object->close_debug_file() if $debugger_object; + while ( my $line = $tokenizer->get_line() ) { + $formatter->write_line($line); + } - # set output file permissions - if ( $output_file && -f $output_file && !-l $output_file ) { - if (@input_file_stat) { + # user-defined formatters are possible, and may not have a + # sub 'finish_formatting', so we have to check + if ( $formatter->can('finish_formatting') ) { + my $severe_error = $tokenizer->report_tokenization_errors(); + my $verbatim = $formatter->finish_formatting($severe_error); + $self->[_input_copied_verbatim_] = $verbatim; + } - # Set file ownership and permissions - if ( $rOpts->{'format'} eq 'tidy' ) { - my ( $mode_i, $uid_i, $gid_i ) = - @input_file_stat[ 2, 4, 5 ]; - my ( $uid_o, $gid_o ) = ( stat($output_file) )[ 4, 5 ]; - my $input_file_permissions = $mode_i & oct(7777); - my $output_file_permissions = $input_file_permissions; - - #rt128477: avoid inconsistent owner/group and suid/sgid - if ( $uid_i != $uid_o || $gid_i != $gid_o ) { - - # try to change owner and group to match input file if - # in -b mode. Note: chown returns number of files - # successfully changed. - if ( $in_place_modify - && chown( $uid_i, $gid_i, $output_file ) ) - { - # owner/group successfully changed - } - else { + return; +} ## end sub process_single_case - # owner or group differ: do not copy suid and sgid - $output_file_permissions = $mode_i & oct(777); - if ( $input_file_permissions != - $output_file_permissions ) - { - Warn( -"Unable to copy setuid and/or setgid bits for output file '$output_file'\n" - ); - } - } - } +sub copy_buffer_to_destination { - # Make the output file for rw unless we are in -b mode. - # Explanation: perltidy does not unlink existing output - # files before writing to them, for safety. If a - # designated output file exists and is not writable, - # perltidy will halt. This can prevent a data loss if a - # user accidentally enters "perltidy infile -o - # important_ro_file", or "perltidy infile -st - # >important_ro_file". But it also means that perltidy can - # get locked out of rerunning unless it marks its own - # output files writable. The alternative, of always - # unlinking the designated output file, is less safe and - # not always possible, except in -b mode, where there is an - # assumption that a previous backup can be unlinked even if - # not writable. - if ( !$in_place_modify ) { - $output_file_permissions |= oct(600); - } + my ( $self, $destination_buffer, $destination_stream, + $encode_destination_buffer ) + = @_; - if ( !chmod( $output_file_permissions, $output_file ) ) { + # Copy $destination_buffer to the final $destination_stream, + # encoding if the flag $encode_destination_buffer is true. - # couldn't change file permissions - my $operm = sprintf "%04o", $output_file_permissions; - Warn( -"Unable to set permissions for output file '$output_file' to $operm\n" - ); - } - } + # Data Flow: + # $destination_buffer -> [ encode? ] -> $destination_stream - # else use default permissions for html and any other format + $rstatus->{'output_encoded_as'} = EMPTY_STRING; + + if ($encode_destination_buffer) { + my $encoded_buffer; + if ( + !eval { + $encoded_buffer = + Encode::encode( "UTF-8", $destination_buffer, + Encode::FB_CROAK | Encode::LEAVE_SRC ); + 1; } + ) + { + + Warn( +"Error attempting to encode output string ref; encoding not done\n" + ); } + else { + $destination_buffer = $encoded_buffer; + $rstatus->{'output_encoded_as'} = 'UTF-8'; + } + } - #--------------------------------------------------------------- - # remove the original file for in-place modify as follows: - # $delete_backup=0 never - # $delete_backup=1 only if no errors - # $delete_backup>1 always : NOT ALLOWED, too risky, see above - #--------------------------------------------------------------- - if ( $in_place_modify - && $delete_backup - && -f $ifname - && ( $delete_backup > 1 || !$logger_object->get_warning_count() ) ) - { + # Send data for SCALAR, ARRAY & OBJ refs to its final destination + if ( ref($destination_stream) eq 'SCALAR' ) { + ${$destination_stream} = $destination_buffer; + } + elsif ($destination_buffer) { + my @lines = split /^/, $destination_buffer; + if ( ref($destination_stream) eq 'ARRAY' ) { + @{$destination_stream} = @lines; + } - # As an added safety precaution, do not delete the source file - # if its size has dropped from positive to zero, since this - # could indicate a disaster of some kind, including a hardware - # failure. Actually, this could happen if you had a file of - # all comments (or pod) and deleted everything with -dac (-dap) - # for some reason. - if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) { - Warn( -"output file '$output_file' missing or zero length; original '$ifname' not deleted\n" - ); + # destination stream must be an object with print method + else { + foreach my $line (@lines) { + $destination_stream->print($line); } - else { - unlink($ifname) - or Die( -"unable to remove previous '$ifname' for -b option; check permissions: $ERRNO\n" - ); + my $ref_destination_stream = ref($destination_stream); + if ( $ref_destination_stream->can('close') ) { + $destination_stream->close(); } } + } + else { - $logger_object->finish($formatter) - if $logger_object; - } ## end of main loop to process all files - - # Fix for RT #130297: return a true value if anything was written to the - # standard error output, even non-fatal warning messages, otherwise return - # false. - - # These exit codes are returned: - # 0 = perltidy ran to completion with no errors - # 1 = perltidy could not run to completion due to errors - # 2 = perltidy ran to completion with error messages - - # Note that if perltidy is run with multiple files, any single file with - # errors or warnings will write a line like - # '## Please see file testing.t.ERR' - # to standard output for each file with errors, so the flag will be true, - # even if only some of the multiple files may have had errors. - - NORMAL_EXIT: - my $ret = $Warn_count ? 2 : 0; - return wantarray ? ( $ret, $rstatus ) : $ret; - - ERROR_EXIT: - return wantarray ? ( 1, $rstatus ) : 1; + # Empty destination buffer not going to a string ... could + # happen for example if user deleted all pod or comments + } + return; +} ## end sub copy_buffer_to_destination -} ## end sub perltidy } ## end of closure for sub perltidy sub line_diff { @@ -2193,7 +2922,6 @@ sub compare_string_buffers { $last_nonblank_line = $truncate->( $last_nonblank_line, 72 ); if ($last_nonblank_line) { - my $countm = $counti - 1; $msg .= <getline() ) { - $fout->print($line); - } - $fout->close(); - } - $fh_stream->close(); - } - } - elsif ( $stream ne '-' && -f $stream ) { - $fname = $stream; - } - } - return ( $fname, $is_tmpfile ); -} ## end sub get_stream_as_named_file - sub fileglob_to_re { # modified (corrected) from version in find2perl @@ -2274,65 +2962,39 @@ sub fileglob_to_re { $x =~ s#\*#.*#g; # '*' -> '.*' $x =~ s#\?#.#g; # '?' -> '.' return "^$x\\z"; # match whole word -} - -sub make_extension { +} ## end sub fileglob_to_re - # Make a file extension, including any leading '.' if necessary - # The '.' may actually be an '_' under VMS - my ( $extension, $default, $dot ) = @_; - - # Use the default if none specified - $extension = $default unless ($extension); - - # Only extensions with these leading characters get a '.' - # This rule gives the user some freedom - if ( $extension =~ /^[a-zA-Z0-9]/ ) { - $extension = $dot . $extension; - } - return $extension; -} ## end sub make_extension - -sub write_logfile_header { - my ( - $rOpts, $logger_object, $config_file, - $rraw_options, $Windows_type, $readable_options - ) = @_; +sub make_logfile_header { + my ( $rOpts, $config_file, $rraw_options, $Windows_type, $readable_options ) + = @_; # Note: the punctuation variable '$]' is not in older versions of # English.pm so leave it as is to avoid failing installation tests. - $logger_object->write_logfile_entry( -"perltidy version $VERSION log file on a $OSNAME system, OLD_PERL_VERSION=$]\n" - ); + my $msg = +"perltidy version $VERSION log file on a $OSNAME system, OLD_PERL_VERSION=$]\n"; if ($Windows_type) { - $logger_object->write_logfile_entry("Windows type is $Windows_type\n"); + $msg .= "Windows type is $Windows_type\n"; } my $options_string = join( SPACE, @{$rraw_options} ); if ($config_file) { - $logger_object->write_logfile_entry( - "Found Configuration File >>> $config_file \n"); + $msg .= "Found Configuration File >>> $config_file \n"; } - $logger_object->write_logfile_entry( - "Configuration and command line parameters for this run:\n"); - $logger_object->write_logfile_entry("$options_string\n"); + $msg .= "Configuration and command line parameters for this run:\n"; + $msg .= "$options_string\n"; if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) { $rOpts->{'logfile'} = 1; # force logfile to be saved - $logger_object->write_logfile_entry( - "Final parameter set for this run\n"); - $logger_object->write_logfile_entry( - "------------------------------------\n"); + $msg .= "Final parameter set for this run\n"; + $msg .= "------------------------------------\n"; - $logger_object->write_logfile_entry($readable_options); + $msg .= $readable_options; - $logger_object->write_logfile_entry( - "------------------------------------\n"); + $msg .= "------------------------------------\n"; } - $logger_object->write_logfile_entry( - "To find error messages search for 'WARNING' with your editor\n"); - return; -} ## end sub write_logfile_header + $msg .= "To find error messages search for 'WARNING' with your editor\n"; + return $msg; +} ## end sub make_logfile_header sub generate_options { @@ -2370,9 +3032,9 @@ sub generate_options { # i.e., -foo and -nofoo are allowed # a double dash signals the end of the options list # - #--------------------------------------------------------------- + #----------------------------------------------- # Define the option string passed to GetOptions. - #--------------------------------------------------------------- + #----------------------------------------------- my @option_string = (); my %expansion = (); @@ -2462,6 +3124,7 @@ sub generate_options { ########################### $add_option->( 'backup-and-modify-in-place', 'b', '!' ); $add_option->( 'backup-file-extension', 'bext', '=s' ); + $add_option->( 'backup-method', 'bm', '=s' ); $add_option->( 'character-encoding', 'enc', '=s' ); $add_option->( 'force-read-binary', 'f', '!' ); $add_option->( 'format', 'fmt', '=s' ); @@ -2506,6 +3169,7 @@ sub generate_options { $add_option->( 'sub-alias-list', 'sal', '=s' ); $add_option->( 'grep-alias-list', 'gal', '=s' ); $add_option->( 'grep-alias-exclusion-list', 'gaxl', '=s' ); + $add_option->( 'use-feature', 'uf', '=s' ); ######################################## $category = 2; # Code indentation control @@ -2532,11 +3196,15 @@ sub generate_options { ######################################## $category = 3; # Whitespace control ######################################## + $add_option->( 'add-trailing-commas', 'atc', '!' ); $add_option->( 'add-semicolons', 'asc', '!' ); $add_option->( 'add-whitespace', 'aws', '!' ); $add_option->( 'block-brace-tightness', 'bbt', '=i' ); $add_option->( 'brace-tightness', 'bt', '=i' ); $add_option->( 'delete-old-whitespace', 'dws', '!' ); + $add_option->( 'delete-repeated-commas', 'drc', '!' ); + $add_option->( 'delete-trailing-commas', 'dtc', '!' ); + $add_option->( 'delete-weld-interfering-commas', 'dwic', '!' ); $add_option->( 'delete-semicolons', 'dsm', '!' ); $add_option->( 'function-paren-vertical-alignment', 'fpva', '!' ); $add_option->( 'keyword-paren-inner-tightness', 'kpit', '=i' ); @@ -2559,6 +3227,7 @@ sub generate_options { $add_option->( 'trim-pod', 'trp', '!' ); $add_option->( 'want-left-space', 'wls', '=s' ); $add_option->( 'want-right-space', 'wrs', '=s' ); + $add_option->( 'want-trailing-commas', 'wtc', '=s' ); $add_option->( 'space-prototype-paren', 'spp', '=i' ); $add_option->( 'valign-code', 'vc', '!' ); $add_option->( 'valign-block-comments', 'vbc', '!' ); @@ -2604,12 +3273,14 @@ sub generate_options { $add_option->( 'add-newlines', 'anl', '!' ); $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' ); $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' ); + $add_option->( 'brace-follower-vertical-tightness', 'bfvt', '=i' ); $add_option->( 'brace-vertical-tightness', 'bvt', '=i' ); $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' ); $add_option->( 'cuddled-else', 'ce', '!' ); $add_option->( 'cuddled-block-list', 'cbl', '=s' ); $add_option->( 'cuddled-block-list-exclusive', 'cblx', '!' ); $add_option->( 'cuddled-break-option', 'cbo', '=i' ); + $add_option->( 'cuddled-paren-brace', 'cpb', '!' ); $add_option->( 'delete-old-newlines', 'dnl', '!' ); $add_option->( 'opening-brace-always-on-right', 'bar', '!' ); $add_option->( 'opening-brace-on-new-line', 'bl', '!' ); @@ -2622,6 +3293,7 @@ sub generate_options { $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' ); $add_option->( 'weld-nested-containers', 'wn', '!' ); $add_option->( 'weld-nested-exclusion-list', 'wnxl', '=s' ); + $add_option->( 'weld-fat-comma', 'wfc', '!' ); $add_option->( 'space-backslash-quote', 'sbq', '=i' ); $add_option->( 'stack-closing-block-brace', 'scbb', '!' ); $add_option->( 'stack-closing-hash-brace', 'schb', '!' ); @@ -2639,6 +3311,7 @@ sub generate_options { $add_option->( 'keep-interior-semicolons', 'kis', '!' ); $add_option->( 'one-line-block-semicolons', 'olbs', '=i' ); $add_option->( 'one-line-block-nesting', 'olbn', '=i' ); + $add_option->( 'one-line-block-exclusion-list', 'olbxl', '=s' ); $add_option->( 'break-before-hash-brace', 'bbhb', '=i' ); $add_option->( 'break-before-hash-brace-and-indent', 'bbhbi', '=i' ); $add_option->( 'break-before-square-bracket', 'bbsb', '=i' ); @@ -2717,19 +3390,22 @@ sub generate_options { ######################################## $category = 13; # Debugging ######################################## - $add_option->( 'DIAGNOSTICS', 'I', '!' ) if (DEVEL_MODE); - $add_option->( 'DEBUG', 'D', '!' ); - $add_option->( 'dump-cuddled-block-list', 'dcbl', '!' ); - $add_option->( 'dump-defaults', 'ddf', '!' ); - $add_option->( 'dump-long-names', 'dln', '!' ); - $add_option->( 'dump-options', 'dop', '!' ); - $add_option->( 'dump-profile', 'dpro', '!' ); - $add_option->( 'dump-short-names', 'dsn', '!' ); - $add_option->( 'dump-token-types', 'dtt', '!' ); - $add_option->( 'dump-want-left-space', 'dwls', '!' ); - $add_option->( 'dump-want-right-space', 'dwrs', '!' ); - $add_option->( 'fuzzy-line-length', 'fll', '!' ); - $add_option->( 'help', 'h', EMPTY_STRING ); + $add_option->( 'DIAGNOSTICS', 'I', '!' ) if (DEVEL_MODE); + $add_option->( 'DEBUG', 'D', '!' ); + $add_option->( 'dump-block-summary', 'dbs', '!' ); + $add_option->( 'dump-block-minimum-lines', 'dbl', '=i' ); + $add_option->( 'dump-block-types', 'dbt', '=s' ); + $add_option->( 'dump-cuddled-block-list', 'dcbl', '!' ); + $add_option->( 'dump-defaults', 'ddf', '!' ); + $add_option->( 'dump-long-names', 'dln', '!' ); + $add_option->( 'dump-options', 'dop', '!' ); + $add_option->( 'dump-profile', 'dpro', '!' ); + $add_option->( 'dump-short-names', 'dsn', '!' ); + $add_option->( 'dump-token-types', 'dtt', '!' ); + $add_option->( 'dump-want-left-space', 'dwls', '!' ); + $add_option->( 'dump-want-right-space', 'dwrs', '!' ); + $add_option->( 'fuzzy-line-length', 'fll', '!' ); + $add_option->( 'help', 'h', EMPTY_STRING ); $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); $add_option->( 'show-options', 'opt', '!' ); $add_option->( 'timestamp', 'ts', '!' ); @@ -2764,9 +3440,9 @@ sub generate_options { } } - #--------------------------------------------------------------- + #--------------------------------------- # Assign valid ranges to certain options - #--------------------------------------------------------------- + #--------------------------------------- # In the future, these may be used to make preliminary checks # hash keys are long names # If key or value is undefined: @@ -2789,6 +3465,7 @@ sub generate_options { 'square-bracket-tightness' => [ 0, 2 ], 'block-brace-vertical-tightness' => [ 0, 2 ], + 'brace-follower-vertical-tightness' => [ 0, 2 ], 'brace-vertical-tightness' => [ 0, 2 ], 'brace-vertical-tightness-closing' => [ 0, 2 ], 'paren-vertical-tightness' => [ 0, 2 ], @@ -2816,11 +3493,11 @@ sub generate_options { # Note: we could actually allow negative ci if someone really wants it: # $option_range{'continuation-indentation'} = [ undef, undef ]; - #--------------------------------------------------------------- + #------------------------------------------------------------------ # DEFAULTS: Assign default values to the above options here, except # for 'outfile' and 'help'. # These settings should approximate the perlstyle(1) suggestions. - #--------------------------------------------------------------- + #------------------------------------------------------------------ my @defaults = qw( add-newlines add-terminal-newline @@ -2840,6 +3517,7 @@ sub generate_options { block-brace-tightness=0 block-brace-vertical-tightness=0 + brace-follower-vertical-tightness=1 brace-tightness=1 brace-vertical-tightness-closing=0 brace-vertical-tightness=0 @@ -2869,6 +3547,8 @@ sub generate_options { cuddled-break-option=1 delete-old-newlines delete-semicolons + dump-block-minimum-lines=20 + dump-block-types=sub extended-syntax encode-output-strings function-paren-vertical-alignment @@ -2914,6 +3594,7 @@ sub generate_options { noweld-nested-containers recombine nouse-unicode-gcstring + use-feature=class valign-code valign-block-comments valign-side-comments @@ -2928,6 +3609,7 @@ sub generate_options { timestamp trim-qw format=tidy + backup-method=copy backup-file-extension=bak code-skipping format-skipping @@ -2938,12 +3620,10 @@ sub generate_options { html-entities ); - push @defaults, "perl-syntax-check-flags=-c -T"; - - #--------------------------------------------------------------- + #----------------------------------------------------------------------- # Define abbreviations which will be expanded into the above primitives. # These may be defined recursively. - #--------------------------------------------------------------- + #----------------------------------------------------------------------- %expansion = ( %expansion, 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], @@ -3218,9 +3898,11 @@ sub _process_command_line { # breaking old versions of Perl without these routines. # Previous configuration is reset at the exit of this routine. my $glc; - eval { $glc = Getopt::Long::Configure() }; - unless ($EVAL_ERROR) { - eval { Getopt::Long::ConfigDefaults() }; + if ( eval { $glc = Getopt::Long::Configure(); 1 } ) { + my $ok = eval { Getopt::Long::ConfigDefaults(); 1 }; + if ( !$ok && DEVEL_MODE ) { + Fault("Failed call to Getopt::Long::ConfigDefaults: $EVAL_ERROR\n"); + } } else { $glc = undef } @@ -3229,9 +3911,9 @@ sub _process_command_line { $roption_category, $roption_range ) = generate_options(); - #--------------------------------------------------------------- + #-------------------------------------------------------------- # set the defaults by passing the above list through GetOptions - #--------------------------------------------------------------- + #-------------------------------------------------------------- my %Opts = (); { local @ARGV = (); @@ -3247,17 +3929,16 @@ sub _process_command_line { } } - my $word; my @raw_options = (); my $config_file = EMPTY_STRING; my $saw_ignore_profile = 0; my $saw_dump_profile = 0; - #--------------------------------------------------------------- + #-------------------------------------------------------------- # Take a first look at the command-line parameters. Do as many # immediate dumps as possible, which can avoid confusion if the # perltidyrc file has an error. - #--------------------------------------------------------------- + #-------------------------------------------------------------- foreach my $i (@ARGV) { $i =~ s/^--/-/; @@ -3330,9 +4011,9 @@ sub _process_command_line { Exit(1); } - #--------------------------------------------------------------- + #---------------------------------------- # read any .perltidyrc configuration file - #--------------------------------------------------------------- + #---------------------------------------- unless ($saw_ignore_profile) { # resolve possible conflict between $perltidyrc_stream passed @@ -3434,6 +4115,7 @@ EOM dump-token-types dump-want-left-space dump-want-right-space + dump-block-summary help stylesheet version @@ -3450,9 +4132,9 @@ EOM } } - #--------------------------------------------------------------- + #---------------------------------------- # now process the command line parameters - #--------------------------------------------------------------- + #---------------------------------------- expand_command_abbreviations( $rexpansion, \@raw_options, $config_file ); local $SIG{'__WARN__'} = sub { Warn( $_[0] ) }; @@ -3461,7 +4143,12 @@ EOM } # reset Getopt::Long configuration back to its previous value - eval { Getopt::Long::Configure($glc) } if defined $glc; + if ( defined($glc) ) { + my $ok = eval { Getopt::Long::Configure($glc); 1 }; + if ( !$ok && DEVEL_MODE ) { + Fault("Could not reset Getopt::Long configuration: $EVAL_ERROR\n"); + } + } return ( \%Opts, $config_file, \@raw_options, $roption_string, $rexpansion, $roption_category, $roption_range ); @@ -3527,13 +4214,63 @@ sub make_grep_alias_string { return; } ## end sub make_grep_alias_string +sub cleanup_word_list { + my ( $rOpts, $option_name, $rforced_words ) = @_; + + # Clean up the list of words in a user option to simplify use by + # later routines (delete repeats, replace commas with single space, + # remove non-words) + + # Given: + # $rOpts - the global option hash + # $option_name - hash key of this option + # $rforced_words - ref to list of any words to be added + + # Returns: + # \%seen - hash of the final list of words + + my %seen; + my @input_list; + + my $input_string = $rOpts->{$option_name}; + if ( defined($input_string) && length($input_string) ) { + $input_string =~ s/,/ /g; # allow commas + $input_string =~ s/^\s+//; + $input_string =~ s/\s+$//; + @input_list = split /\s+/, $input_string; + } + + if ($rforced_words) { + push @input_list, @{$rforced_words}; + } + + my @filtered_word_list; + foreach my $word (@input_list) { + if ($word) { + + # look for obviously bad words + if ( $word =~ /^\d/ || $word !~ /^\w[\w\d]*$/ ) { + Warn("unexpected '$option_name' word '$word' - ignoring\n"); + } + if ( !$seen{$word} ) { + $seen{$word}++; + push @filtered_word_list, $word; + } + } + } + $rOpts->{$option_name} = join SPACE, @filtered_word_list; + return \%seen; +} ## end sub cleanup_word_list + sub check_options { - my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_; + my ( $self, $is_Windows, $Windows_type, $rpending_complaint ) = @_; - #--------------------------------------------------------------- + my $rOpts = $self->[_rOpts_]; + + #------------------------------------------------------------ # check and handle any interactions among the basic options.. - #--------------------------------------------------------------- + #------------------------------------------------------------ # Since perltidy only encodes in utf8, problems can occur if we let it # decode anything else. See discussions for issue git #83. @@ -3576,21 +4313,6 @@ EOM # compatibility but is ignored if set. $rOpts->{'check-syntax'} = 0; - # check iteration count and quietly fix if necessary: - # - iterations option only applies to code beautification mode - # - the convergence check should stop most runs on iteration 2, and - # virtually all on iteration 3. But we'll allow up to 6. - if ( $rOpts->{'format'} ne 'tidy' ) { - $rOpts->{'iterations'} = 1; - } - elsif ( defined( $rOpts->{'iterations'} ) ) { - if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 } - elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 } - } - else { - $rOpts->{'iterations'} = 1; - } - my $check_blank_count = sub { my ( $key, $abbrev ) = @_; if ( $rOpts->{$key} ) { @@ -3691,30 +4413,30 @@ EOM $rOpts->{'default-tabsize'} = 8; } + # Check and clean up any use-feature list + my $saw_use_feature_class; + if ( $rOpts->{'use-feature'} ) { + my $rseen = cleanup_word_list( $rOpts, 'use-feature' ); + $saw_use_feature_class = $rseen->{'class'}; + } + # Check and clean up any sub-alias-list - if ( $rOpts->{'sub-alias-list'} ) { - my $sub_alias_string = $rOpts->{'sub-alias-list'}; - $sub_alias_string =~ s/,/ /g; # allow commas - $sub_alias_string =~ s/^\s+//; - $sub_alias_string =~ s/\s+$//; - my @sub_alias_list = split /\s+/, $sub_alias_string; - my @filtered_word_list = ('sub'); - my %seen; - - # include 'sub' for later convenience - $seen{sub}++; - foreach my $word (@sub_alias_list) { - if ($word) { - if ( $word !~ /^\w[\w\d]*$/ ) { - Warn("unexpected sub alias '$word' - ignoring\n"); - } - if ( !$seen{$word} ) { - $seen{$word}++; - push @filtered_word_list, $word; - } - } - } - $rOpts->{'sub-alias-list'} = join SPACE, @filtered_word_list; + if ( + defined( $rOpts->{'sub-alias-list'} ) + && length( $rOpts->{'sub-alias-list'} ) + + || $saw_use_feature_class + ) + { + my @forced_words; + + # include 'sub' for convenience if this option is used + push @forced_words, 'sub'; + + # use-feature=class requires method as a sub alias + push @forced_words, 'method' if ($saw_use_feature_class); + + cleanup_word_list( $rOpts, 'sub-alias-list', \@forced_words ); } make_grep_alias_string($rOpts); @@ -3729,6 +4451,11 @@ EOM } } + # Large values of -scl can cause convergence problems, issue c167 + if ( $rOpts->{'short-concatenation-item-length'} > 12 ) { + $rOpts->{'short-concatenation-item-length'} = 12; + } + # The freeze-whitespace option is currently a derived option which has its # own key $rOpts->{'freeze-whitespace'} = !$rOpts->{'add-whitespace'} @@ -3742,16 +4469,58 @@ EOM # Define $tabsize, the number of spaces per tab for use in # guessing the indentation of source lines with leading tabs. - # Assume same as for this run if tabs are used , otherwise assume + # Assume same as for this run if tabs are used, otherwise assume # a default value, typically 8 - my $tabsize = + $self->[_tabsize_] = $rOpts->{'entab-leading-whitespace'} ? $rOpts->{'entab-leading-whitespace'} : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'} : $rOpts->{'default-tabsize'}; - return $tabsize; + + # Define the default line ending, before any -ple option is applied + $self->[_line_separator_default_] = get_line_separator_default($rOpts); + + return; } ## end sub check_options +sub get_line_separator_default { + + my ( $rOpts, $input_file ) = @_; + + # Get the line separator that will apply unless overriden by a + # --preserve-line-endings flag for a specific file + + my $line_separator_default = "\n"; + + my $ole = $rOpts->{'output-line-ending'}; + if ($ole) { + my %endings = ( + dos => "\015\012", + win => "\015\012", + mac => "\015", + unix => "\012", + ); + + $line_separator_default = $endings{ lc $ole }; + + if ( !$line_separator_default ) { + my $str = join SPACE, keys %endings; + Die(<{'preserve-line-endings'} ) { + Warn("Ignoring -ple; conflicts with -ole\n"); + $rOpts->{'preserve-line-endings'} = undef; + } + } + + return $line_separator_default; + +} ## end sub get_line_separator_default + sub find_file_upwards { my ( $search_dir, $search_file ) = @_; @@ -3843,7 +4612,7 @@ sub expand_command_abbreviations { # update parameter list @ARGV to the new one @ARGV = @new_argv; - last unless ( $abbrev_count > 0 ); + last if ( !$abbrev_count ); # make sure we are not in an infinite loop if ( $pass_count == $max_passes ) { @@ -3955,7 +4724,13 @@ sub Win_OS_Type { # Use the standard API call to determine the version my ( $undef, $major, $minor, $build, $id ); - eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() }; + my $ok = eval { + ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion(); + 1; + }; + if ( !$ok && DEVEL_MODE ) { + Fault("Could not cal Win32::GetOSVersion(): $EVAL_ERROR\n"); + } # # NAME ID MAJOR MINOR @@ -3970,13 +4745,13 @@ sub Win_OS_Type { 1 => { 0 => "95", 10 => "98", - 90 => "Me" + 90 => "Me", }, 2 => { 0 => "2000", # or NT 4, see below 1 => "XP/.Net", 2 => "Win2003", - 51 => "NT3.51" + 51 => "NT3.51", } }->{$id}->{$minor}; @@ -3999,14 +4774,6 @@ EOS return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os; } ## end sub Win_OS_Type -sub is_unix { - return - ( $OSNAME !~ /win32|dos/i ) - && ( $OSNAME ne 'VMS' ) - && ( $OSNAME ne 'OS2' ) - && ( $OSNAME ne 'MacOS' ); -} - sub look_for_Windows { # determine Windows sub-type and location of @@ -4171,10 +4938,6 @@ sub Win_Config_Locs { # Directory, and All Users Directory. All Users will be empty on a # 9x/Me box. Contributed by: Yves Orton. - # Original coding: - # my $rpending_complaint = shift; - # my $os = (@_) ? shift : Win_OS_Type(); - my ( $rpending_complaint, $os ) = @_; if ( !$os ) { $os = Win_OS_Type(); } @@ -4206,11 +4969,14 @@ sub Win_Config_Locs { sub dump_config_file { my ( $fh, $config_file, $rconfig_file_chatter ) = @_; - print STDOUT "$$rconfig_file_chatter"; + print STDOUT "${$rconfig_file_chatter}"; if ($fh) { print STDOUT "# Dump of file: '$config_file'\n"; while ( my $line = $fh->getline() ) { print STDOUT $line } - eval { $fh->close() }; + my $ok = eval { $fh->close(); 1 }; + if ( !$ok && DEVEL_MODE ) { + Fault("Could not close file handle(): $EVAL_ERROR\n"); + } } else { print STDOUT "# ...no config file found\n"; @@ -4246,7 +5012,6 @@ sub read_config_file { # See rules in perltidy's perldoc page # Section: Other Controls - Creating a new abbreviation if ( $line =~ /^((\w+)\s*\{)(.*)?$/ ) { - my $oldname = $name; ( $name, $body ) = ( $2, $3 ); # Cannot start new abbreviation unless old abbreviation is complete @@ -4316,7 +5081,10 @@ EOM $death_message = "Didn't see a '}' to match the '{' at line $opening_brace_line in config file '$config_file'\n"; } - eval { $fh->close() }; + my $ok = eval { $fh->close(); 1 }; + if ( !$ok && DEVEL_MODE ) { + Fault("Could not close file handle(): $EVAL_ERROR\n"); + } return ( \@config_list, $death_message ); } ## end sub read_config_file @@ -4466,7 +5234,7 @@ sub dump_long_names { my @names = @_; print STDOUT < does not take an argument # =s takes a mandatory string @@ -4477,7 +5245,7 @@ sub dump_long_names { # i.e., -foo and -nofoo are allowed # a double dash signals the end of the options list # -#--------------------------------------------------------------- +#-------------------------------------------------- EOM foreach my $name ( sort @names ) { print STDOUT "$name\n" } @@ -4489,7 +5257,7 @@ sub dump_defaults { print STDOUT "Default command line options:\n"; foreach my $line ( sort @defaults ) { print STDOUT "$line\n" } return; -} +} ## end sub dump_defaults sub readable_options { @@ -4709,7 +5477,7 @@ Outdenting -okwl=s specify alternative keywords for -okw command Other controls - -mft=n maximum fields per table; default n=40 + -mft=n maximum fields per table; default n=0 (no limit) -x do not format lines before hash-bang line (i.e., for VMS) -asc allows perltidy to add a ';' when missing (default) -dsm allows perltidy to delete an unnecessary ';' (default) @@ -4752,20 +5520,4 @@ EOF return; } ## end sub usage -sub process_this_file { - - my ( $tokenizer, $formatter ) = @_; - - while ( my $line = $tokenizer->get_line() ) { - $formatter->write_line($line); - } - my $severe_error = $tokenizer->report_tokenization_errors(); - - # user-defined formatters are possible, and may not have a - # sub 'finish_formatting', so we have to check - $formatter->finish_formatting($severe_error) - if $formatter->can('finish_formatting'); - - return; -} ## end sub process_this_file 1;