From: Steve Hancock Date: Wed, 27 Sep 2023 23:29:06 +0000 (-0700) Subject: simplify code for trimming leading whitespace X-Git-Tag: 20230912.02~5 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=a49daa4c05565b9bc3e6c4ebf3dcd756946e0efc;p=perltidy.git simplify code for trimming leading whitespace --- diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index 5fb6e78a..06775b82 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -27,10 +27,9 @@ our $VERSION = '20230912.01'; use Carp; -use constant USE_FAST_TRIM => 1; -use constant DEVEL_MODE => 0; -use constant EMPTY_STRING => q{}; -use constant SPACE => q{ }; +use constant DEVEL_MODE => 0; +use constant EMPTY_STRING => q{}; +use constant SPACE => q{ }; # Decimal values of some ascii characters for quick checks use constant ORD_TAB => 9; @@ -217,7 +216,7 @@ BEGIN { _rOpts_ => $i++, _rinput_lines_ => $i++, _input_line_index_next_ => $i++, - _rleading_space_char_count_ => $i++, + _rtrimmed_input_lines_ => $i++, }; } ## end BEGIN @@ -638,45 +637,32 @@ EOM $source_string = join( EMPTY_STRING, @{$rinput_lines} ); } - # Optional optimization. It is much faster to find leading whitespace on - # the whole input file than line-by-line. If we define an array @spaces - # with the count of leading space characters for each line, they will be - # used. If @spaces is an empty array, spaces will be found line-by-line in - # sub 'tokenize_this_line'. - my @spaces; - if (USE_FAST_TRIM) { + # Get trimmed lines. It is much faster to strip leading whitespace from + # the whole input file at once than line-by-line. - # we remove all whitespace from left, but stop at a newline - $source_string =~ s/^ [^\S\n]+ //gxm; - my @trimmed_lines = split /^/, $source_string; + # Remove all whitespace from left, but stop at a newline, + my @trimmed_lines; + $source_string =~ s/^ [^\S\n]+ //gxm; + @trimmed_lines = split /^/, $source_string; - # The change in line length gives the number of space characters - if ( @trimmed_lines == @{$rinput_lines} ) { - my $i = -1; - foreach my $line (@trimmed_lines) { - push @spaces, length( $rinput_lines->[ ++$i ] ) - length($line); - } + # then remove the newlines. + for (@trimmed_lines) { chomp } - # Be sure there are no negative spaces (shouldn't happen) - my $min_space = List::Util::min(@spaces); - if ( defined($min_space) && $min_space < 0 ) { + # Safety check - be sure the number of lines has not changed + if ( @trimmed_lines != @{$rinput_lines} ) { - # shouldn't happen - safely continue with undefined spaces - DEVEL_MODE - && $self->Fault( - "Expecting min spaces >=0 but is $min_space\n"); - @spaces = (); - } - } - else { - # Shouldn't happen - safely continue with undefined spaces - DEVEL_MODE && $self->Fault("line counts differ\n"); - } + # Shouldn't happen - die in DEVEL_MODE and fix + DEVEL_MODE + && $self->Fault("trimmed/untrimmed line counts differ\n"); + + # But we can safely continue with undefined trimmed lines. They will + # be detected and fixed later. + @trimmed_lines = (); } - $self->[_rinput_lines_] = $rinput_lines; - $self->[_rleading_space_char_count_] = \@spaces; - $self->[_input_line_index_next_] = 0; + $self->[_rinput_lines_] = $rinput_lines; + $self->[_rtrimmed_input_lines_] = \@trimmed_lines; + $self->[_input_line_index_next_] = 0; return; } ## end sub make_source_array @@ -1045,13 +1031,12 @@ sub get_line { # get the next line from the input array my $input_line; - my $leading_space_char_count; + my $trimmed_input_line; my $line_index = $self->[_input_line_index_next_]; my $rinput_lines = $self->[_rinput_lines_]; if ( $line_index < @{$rinput_lines} ) { - $leading_space_char_count = - $self->[_rleading_space_char_count_]->[$line_index]; - $input_line = $rinput_lines->[ $line_index++ ]; + $trimmed_input_line = $self->[_rtrimmed_input_lines_]->[$line_index]; + $input_line = $rinput_lines->[ $line_index++ ]; $self->[_input_line_index_next_] = $line_index; } @@ -1074,10 +1059,10 @@ sub get_line { if ( $input_line =~ s/([\r\035\032])+$// ) { $input_line_separator = $1 . $input_line_separator; - # This could make the old leading space count incorrect, so the - # safe thing to do is to make it undef. This will cause the slow - # method to be used to find the leading space. - $leading_space_char_count = undef; + # This could make the trimmed input line incorrect, so the + # safe thing to do is to make it undef to force it to be + # recomputed later. + $trimmed_input_line = undef; } } @@ -1394,7 +1379,8 @@ sub get_line { # _in_skipped_ # _in_pod_ # _in_quote_ - $self->tokenize_this_line( $line_of_tokens, $leading_space_char_count ); + + $self->tokenize_this_line( $line_of_tokens, $trimmed_input_line ); # Now finish defining the return structure and return it $line_of_tokens->{_ending_in_quote} = $self->[_in_quote_]; @@ -5095,10 +5081,6 @@ EOM # can use your editor to search for the string "NEW_TOKENS" to find the # appropriate sections to change): # - # *. Try to talk somebody else into doing it! If not, .. - # - # *. Make a backup of your current version in case things don't work out! - # # *. Think of a new, unused character for the token type, and add to # the array @valid_token_types in the BEGIN section of this package. # For example, I used 'v' for v-strings. @@ -5156,13 +5138,13 @@ EOM # Given: # $line_of_tokens = ref to hash of values being filled for this line - # $leading_space_char_count - # = number of leading space characters on this line, or - # = undef if not availailable + # $trimmed_input_line + # = the input line without leading whitespace, and chomped, OR + # = undef if not available # Returns: # nothing - my ( $self, $line_of_tokens, $leading_space_char_count ) = @_; + my ( $self, $line_of_tokens, $trimmed_input_line ) = @_; my $untrimmed_input_line = $line_of_tokens->{_line_text}; # Extract line number for use in error messages @@ -5187,99 +5169,34 @@ EOM } } - $input_line = $untrimmed_input_line; - chomp $input_line; - - # Reinitialize the multi-line quote flag + # Use untrimmed line if we are continuing in a type 'Q' quote if ( $in_quote && $quote_type eq 'Q' ) { $line_of_tokens->{_starting_in_quote} = 1; + $input_line = $untrimmed_input_line; + chomp $input_line; } - # Trim start of this line unless we are continuing a quoted line. + # Trim start of this line if we are not continuing a quoted line. # Do not trim end because we might end in a quote (test: deken4.pl) # Perl::Tidy::Formatter will delete needless trailing blanks else { $line_of_tokens->{_starting_in_quote} = 0; - if ( !length($input_line) ) { + # Use the pre-computed trimmed line if it is defined + $input_line = $trimmed_input_line; - # line is empty + # but fix if $trimmed_input_line is not defined + if ( !defined($input_line) ) { + $input_line = $untrimmed_input_line; + $input_line =~ s/^\s+//; + chomp $input_line; } - else { - - # Trim the leading spaces.. - - #---------------------------------------------- - # Option 1: Use saved leading spaces if defined - #---------------------------------------------- - my $spaces = $leading_space_char_count; - if ( defined($spaces) ) { - - if ( $spaces >= length($input_line) ) { - - # line has all blank characters - $input_line = EMPTY_STRING; - $spaces = 0; - } - } - - #---------------------------------------------------------- - # Option 2: Otherwise, find leading whitespace with a regex - #---------------------------------------------------------- - else { - # otherwise use slow method - if ( $input_line =~ m/\S/g ) { - - # line has non-space with possible leading spaces - $spaces = pos($input_line) - 1; - } - else { - - # line has all blank characters - $input_line = EMPTY_STRING; - $spaces = 0; - } - } - - # Any leading whitespace? - if ( $spaces > 0 ) { - - # Verify that the leading whitespace is all whitespace - if (DEVEL_MODE) { - - # A change must have been made to the line text after - # $spaces was calculated - my $leading_space = substr( $input_line, 0, $spaces ); - if ( $leading_space =~ /\S/ ) { - $self->Fault(<[_save_logfile_] ) { - my $guess = $self->guess_old_indentation_level( - $untrimmed_input_line); - $line_of_tokens->{_guessed_indentation_level} = $guess; - } - } - elsif ($spaces) { - - # Negative space count - should never happen - DEVEL_MODE && $self->Fault(<Fault(<[_save_logfile_] && length($input_line) ) { + my $guess = + $self->guess_old_indentation_level($untrimmed_input_line); + $line_of_tokens->{_guessed_indentation_level} = $guess; } }