From: Steve Hancock Date: Fri, 6 Nov 2020 04:11:37 +0000 (-0800) Subject: speedup Tokenizer X-Git-Tag: 20201202~56 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=71cecc75f1a4079f6dd20d68a9d6a322d1095260;p=perltidy.git speedup Tokenizer --- diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index ef46d5d8..163e085f 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -98,6 +98,7 @@ use vars qw{ %is_sub %is_package %is_comma_question_colon + %other_line_endings }; # possible values of operator_expected() @@ -652,9 +653,12 @@ sub get_line { my $input_line_separator = ""; if ( chomp($input_line) ) { $input_line_separator = $/ } - # TODO: what other characters should be included here? - if ( $input_line =~ s/((\r|\035|\032)+)$// ) { - $input_line_separator = $2 . $input_line_separator; + # The first test here very significantly speeds things up, but be sure to + # keep the regex and hash %other_line_endings the same. + if ( $other_line_endings{ substr( $input_line, -1 ) } ) { + if ( $input_line =~ s/((\r|\035|\032)+)$// ) { + $input_line_separator = $2 . $input_line_separator; + } } # for backwards compatibility we keep the line text terminated with @@ -8504,6 +8508,12 @@ BEGIN { push @q, ','; @is_comma_question_colon{@q} = (1) x scalar(@q); + # Hash of other possible line endings which may occur. + # Keep these coordinated with the regex where this is used. + # Note: chr(015)="\r". + @q = ( chr(015), chr(035), chr(032) ); + @other_line_endings{@q} = (1) x scalar(@q); + # These keywords are handled specially in the tokenizer code: my @special_keywords = qw( do