- }
- }
- return ( $is_pattern, $msg );
-}
-
-sub find_here_doc {
-
- # find the target of a here document, if any
- # input parameters:
- # $i - token index of the second < of <<
- # ($i must be less than the last token index if this is called)
- # output parameters:
- # $found_target = 0 didn't find target; =1 found target
- # HERE_TARGET - the target string (may be empty string)
- # $i - unchanged if not here doc,
- # or index of the last token of the here target
- my ( $expecting, $i, $rtokens, $rtoken_map ) = @_;
- my $ibeg = $i;
- my $found_target = 0;
- my $here_doc_target = '';
- my $here_quote_character = '';
- my ( $next_nonblank_token, $i_next_nonblank, $next_token );
- $next_token = $$rtokens[ $i + 1 ];
-
- # perl allows a backslash before the target string (heredoc.t)
- my $backslash = 0;
- if ( $next_token eq '\\' ) {
- $backslash = 1;
- $next_token = $$rtokens[ $i + 2 ];
- }
-
- ( $next_nonblank_token, $i_next_nonblank ) =
- find_next_nonblank_token_on_this_line( $i, $rtokens );
-
- if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
-
- my $in_quote = 1;
- my $quote_depth = 0;
- my $quote_pos = 0;
-
- ( $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth ) =
- follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
- $here_quote_character, $quote_pos, $quote_depth );
-
- if ($in_quote) { # didn't find end of quote, so no target found
- $i = $ibeg;
- }
- else { # found ending quote
- my $j;
- $found_target = 1;
-
- my $tokj;
- for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
- $tokj = $$rtokens[$j];
-
- # we have to remove any backslash before the quote character
- # so that the here-doc-target exactly matches this string
- next
- if ( $tokj eq "\\"
- && $j < $i - 1
- && $$rtokens[ $j + 1 ] eq $here_quote_character );
- $here_doc_target .= $tokj;
- }
- }
- }
-
- elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
- $found_target = 1;
- write_logfile_entry(
- "found blank here-target after <<; suggest using \"\"\n");
- $i = $ibeg;
- }
- elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
-
- my $here_doc_expected;
- if ( $expecting == UNKNOWN ) {
- $here_doc_expected = guess_if_here_doc($next_token);
- }
- else {
- $here_doc_expected = 1;
- }
-
- if ($here_doc_expected) {
- $found_target = 1;
- $here_doc_target = $next_token;
- $i = $ibeg + 1;
- }
-
- }
- else {
-
- if ( $expecting == TERM ) {
- $found_target = 1;
- write_logfile_entry("Note: bare here-doc operator <<\n");
- }
- else {
- $i = $ibeg;
- }
- }
-
- # patch to neglect any prepended backslash
- if ( $found_target && $backslash ) { $i++ }
-
- return ( $found_target, $here_doc_target, $here_quote_character, $i );
-}
-
-# try to resolve here-doc vs. shift by looking ahead for
-# non-code or the end token (currently only looks for end token)
-# returns 1 if it is probably a here doc, 0 if not
-sub guess_if_here_doc {
-
- # This is how many lines we will search for a target as part of the
- # guessing strategy. It is a constant because there is probably
- # little reason to change it.
- use constant HERE_DOC_WINDOW => 40;
-
- my $next_token = shift;
- my $here_doc_expected = 0;
- my $line;
- my $k = 0;
- my $msg = "checking <<";
-
- while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
- {
- chomp $line;
-
- if ( $line =~ /^$next_token$/ ) {
- $msg .= " -- found target $next_token ahead $k lines\n";
- $here_doc_expected = 1; # got it
- last;
- }
- last if ( $k >= HERE_DOC_WINDOW );
- }
-
- unless ($here_doc_expected) {
-
- if ( !defined($line) ) {
- $here_doc_expected = -1; # hit eof without seeing target
- $msg .= " -- must be shift; target $next_token not in file\n";
-
- }
- else { # still unsure..taking a wild guess
-
- if ( !$is_constant{$current_package}{$next_token} ) {
- $here_doc_expected = 1;
- $msg .=
- " -- guessing it's a here-doc ($next_token not a constant)\n";
- }
- else {
- $msg .=
- " -- guessing it's a shift ($next_token is a constant)\n";
- }
- }
- }
- write_logfile_entry($msg);
- return $here_doc_expected;
-}
-
-sub do_quote {
-
- # follow (or continue following) quoted string or pattern
- # $in_quote return code:
- # 0 - ok, found end
- # 1 - still must find end of quote whose target is $quote_character
- # 2 - still looking for end of first of two quotes
- my ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, $rtokens,
- $rtoken_map )
- = @_;
-
- if ( $in_quote == 2 ) { # two quotes/patterns to follow
- my $ibeg = $i;
- ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
- follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
- $quote_pos, $quote_depth );
-
- if ( $in_quote == 1 ) {
- if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
- $quote_character = '';
- }
- }
-
- if ( $in_quote == 1 ) { # one (more) quote to follow
- my $ibeg = $i;
- ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
- follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
- $quote_pos, $quote_depth );
- }
- return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth );
-}
-
-sub scan_number_do {
-
- # scan a number in any of the formats that Perl accepts
- # Underbars (_) are allowed in decimal numbers.
- # input parameters -
- # $input_line - the string to scan
- # $i - pre_token index to start scanning
- # $rtoken_map - reference to the pre_token map giving starting
- # character position in $input_line of token $i
- # output parameters -
- # $i - last pre_token index of the number just scanned
- # number - the number (characters); or undef if not a number
-
- my ( $input_line, $i, $rtoken_map, $input_type ) = @_;
- my $pos_beg = $$rtoken_map[$i];
- my $pos;
- my $i_begin = $i;
- my $number = undef;
- my $type = $input_type;
-
- my $first_char = substr( $input_line, $pos_beg, 1 );
-
- # Look for bad starting characters; Shouldn't happen..
- if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
- warning("Program bug - scan_number given character $first_char\n");
- report_definite_bug();
- return ( $i, $type, $number );
- }
-
- # handle v-string without leading 'v' character ('Two Dot' rule)
- # (vstring.t)
- # TODO: v-strings may contain underscores
- pos($input_line) = $pos_beg;
- if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
- $pos = pos($input_line);
- my $numc = $pos - $pos_beg;
- $number = substr( $input_line, $pos_beg, $numc );
- $type = 'v';
- unless ($saw_v_string) { report_v_string($number) }
- }
-
- # handle octal, hex, binary
- if ( !defined($number) ) {
- pos($input_line) = $pos_beg;
- if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
- {
- $pos = pos($input_line);
- my $numc = $pos - $pos_beg;
- $number = substr( $input_line, $pos_beg, $numc );
- $type = 'n';
- }
- }
-
- # handle decimal
- if ( !defined($number) ) {
- pos($input_line) = $pos_beg;
-
- if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
- $pos = pos($input_line);
-
- # watch out for things like 0..40 which would give 0. by this;
- if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
- && ( substr( $input_line, $pos, 1 ) eq '.' ) )
- {
- $pos--;
- }
- my $numc = $pos - $pos_beg;
- $number = substr( $input_line, $pos_beg, $numc );
- $type = 'n';
- }
- }
-
- # filter out non-numbers like e + - . e2 .e3 +e6
- # the rule: at least one digit, and any 'e' must be preceded by a digit
- if (
- $number !~ /\d/ # no digits
- || ( $number =~ /^(.*)[eE]/
- && $1 !~ /\d/ ) # or no digits before the 'e'
- )
- {
- $number = undef;
- $type = $input_type;
- return ( $i, $type, $number );
- }
-
- # Found a number; now we must convert back from character position
- # to pre_token index. An error here implies user syntax error.
- # An example would be an invalid octal number like '009'.
- my $error;
- ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
- if ($error) { warning("Possibly invalid number\n") }
-
- return ( $i, $type, $number );
-}
-
-sub scan_bare_identifier_do {
-
- # this routine is called to scan a token starting with an alphanumeric
- # variable or package separator, :: or '.
-
- my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map ) = @_;
- my $i_begin = $i;
- my $package = undef;
-
- my $i_beg = $i;
-
- # we have to back up one pretoken at a :: since each : is one pretoken
- if ( $tok eq '::' ) { $i_beg-- }
- if ( $tok eq '->' ) { $i_beg-- }
- my $pos_beg = $$rtoken_map[$i_beg];
- pos($input_line) = $pos_beg;
-
- # Examples:
- # A::B::C
- # A::
- # ::A
- # A'B
- if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
-
- my $pos = pos($input_line);
- my $numc = $pos - $pos_beg;
- $tok = substr( $input_line, $pos_beg, $numc );
-
- # type 'w' includes anything without leading type info
- # ($,%,@,*) including something like abc::def::ghi
- $type = 'w';
-
- my $sub_name = "";
- if ( defined($2) ) { $sub_name = $2; }
- if ( defined($1) ) {
- $package = $1;