+ # TODO: add future error checks to be sure we have a valid
+ # sub name. For example, 'sub &doit' is wrong. Also, be sure
+ # a name is given if and only if a non-anonymous sub is
+ # appropriate.
+ # USES GLOBAL VARS: $current_package, $last_nonblank_token,
+ # $in_attribute_list, %saw_function_definition,
+ # $statement_type
+
+ my (
+ $input_line, $i, $i_beg,
+ $tok, $type, $rtokens,
+ $rtoken_map, $id_scan_state, $max_token_index
+ ) = @_;
+ $id_scan_state = ""; # normally we get everything in one call
+ my $subname = undef;
+ my $package = undef;
+ my $proto = undef;
+ my $attrs = undef;
+ my $match;
+
+ my $pos_beg = $$rtoken_map[$i_beg];
+ pos($input_line) = $pos_beg;
+
+ # sub NAME PROTO ATTRS
+ if (
+ $input_line =~ m/\G\s*
+ ((?:\w*(?:'|::))*) # package - something that ends in :: or '
+ (\w+) # NAME - required
+ (\s*\([^){]*\))? # PROTO - something in parens
+ (\s*:)? # ATTRS - leading : of attribute list
+ /gcx
+ )
+ {
+ $match = 1;
+ $subname = $2;
+ $proto = $3;
+ $attrs = $4;
+
+ $package = ( defined($1) && $1 ) ? $1 : $current_package;
+ $package =~ s/\'/::/g;
+ if ( $package =~ /^\:/ ) { $package = 'main' . $package }
+ $package =~ s/::$//;
+ my $pos = pos($input_line);
+ my $numc = $pos - $pos_beg;
+ $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
+ $type = 'i';
+ }
+
+ # Look for prototype/attributes not preceded on this line by subname;
+ # This might be an anonymous sub with attributes,
+ # or a prototype on a separate line from its sub name
+ elsif (
+ $input_line =~ m/\G(\s*\([^){]*\))? # PROTO
+ (\s*:)? # ATTRS leading ':'
+ /gcx
+ && ( $1 || $2 )
+ )
+ {
+ $match = 1;
+ $proto = $1;
+ $attrs = $2;
+
+ # Handle prototype on separate line from subname
+ if ($subname_saved) {
+ $package = $package_saved;
+ $subname = $subname_saved;
+ $tok = $last_nonblank_token;
+ }
+ $type = 'i';
+ }
+
+ if ($match) {
+
+ # ATTRS: if there are attributes, back up and let the ':' be
+ # found later by the scanner.
+ my $pos = pos($input_line);
+ if ($attrs) {
+ $pos -= length($attrs);
+ }
+
+ my $next_nonblank_token = $tok;
+
+ # catch case of line with leading ATTR ':' after anonymous sub
+ if ( $pos == $pos_beg && $tok eq ':' ) {
+ $type = 'A';
+ $in_attribute_list = 1;
+ }
+
+ # We must convert back from character position
+ # to pre_token index.
+ else {
+
+ # I don't think an error flag can occur here ..but ?
+ my $error;
+ ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
+ $max_token_index );
+ if ($error) { warning("Possibly invalid sub\n") }
+
+ # check for multiple definitions of a sub
+ ( $next_nonblank_token, my $i_next ) =
+ find_next_nonblank_token_on_this_line( $i, $rtokens,
+ $max_token_index );
+ }
+
+ if ( $next_nonblank_token =~ /^(\s*|#)$/ )
+ { # skip blank or side comment
+ my ( $rpre_tokens, $rpre_types ) =
+ peek_ahead_for_n_nonblank_pre_tokens(1);
+ if ( defined($rpre_tokens) && @$rpre_tokens ) {
+ $next_nonblank_token = $rpre_tokens->[0];
+ }
+ else {
+ $next_nonblank_token = '}';
+ }
+ }
+ $package_saved = "";
+ $subname_saved = "";
+ if ( $next_nonblank_token eq '{' ) {
+ if ($subname) {
+
+ # Check for multiple definitions of a sub, but
+ # it is ok to have multiple sub BEGIN, etc,
+ # so we do not complain if name is all caps
+ if ( $saw_function_definition{$package}{$subname}
+ && $subname !~ /^[A-Z]+$/ )
+ {
+ my $lno = $saw_function_definition{$package}{$subname};
+ warning(
+"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
+ );
+ }
+ $saw_function_definition{$package}{$subname} =
+ $tokenizer_self->{_last_line_number};
+ }
+ }
+ elsif ( $next_nonblank_token eq ';' ) {
+ }
+ elsif ( $next_nonblank_token eq '}' ) {
+ }
+
+ # ATTRS - if an attribute list follows, remember the name
+ # of the sub so the next opening brace can be labeled.
+ # Setting 'statement_type' causes any ':'s to introduce
+ # attributes.
+ elsif ( $next_nonblank_token eq ':' ) {
+ $statement_type = $tok;
+ }
+
+ # see if PROTO follows on another line:
+ elsif ( $next_nonblank_token eq '(' ) {
+ if ( $attrs || $proto ) {
+ warning(
+"unexpected '(' after definition or declaration of sub '$subname'\n"
+ );
+ }
+ else {
+ $id_scan_state = 'sub'; # we must come back to get proto
+ $statement_type = $tok;
+ $package_saved = $package;
+ $subname_saved = $subname;
+ }
+ }
+ elsif ($next_nonblank_token) { # EOF technically ok
+ warning(
+"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
+ );
+ }
+ check_prototype( $proto, $package, $subname );
+ }
+
+ # no match but line not blank
+ else {
+ }
+ return ( $i, $tok, $type, $id_scan_state );
+ }
+}
+
+#########i###############################################################
+# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
+#########################################################################
+
+sub find_next_nonblank_token {
+ my ( $i, $rtokens, $max_token_index ) = @_;
+
+ if ( $i >= $max_token_index ) {
+ if ( !peeked_ahead() ) {
+ peeked_ahead(1);
+ $rtokens =
+ peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
+ }
+ }
+ my $next_nonblank_token = $$rtokens[ ++$i ];
+
+ if ( $next_nonblank_token =~ /^\s*$/ ) {
+ $next_nonblank_token = $$rtokens[ ++$i ];
+ }
+ return ( $next_nonblank_token, $i );
+}
+
+sub numerator_expected {
+
+ # this is a filter for a possible numerator, in support of guessing
+ # for the / pattern delimiter token.
+ # returns -
+ # 1 - yes
+ # 0 - can't tell
+ # -1 - no
+ # Note: I am using the convention that variables ending in
+ # _expected have these 3 possible values.
+ my ( $i, $rtokens, $max_token_index ) = @_;
+ my $next_token = $$rtokens[ $i + 1 ];
+ if ( $next_token eq '=' ) { $i++; } # handle /=
+ my ( $next_nonblank_token, $i_next ) =
+ find_next_nonblank_token( $i, $rtokens, $max_token_index );
+
+ if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
+ 1;
+ }
+ else {
+
+ if ( $next_nonblank_token =~ /^\s*$/ ) {
+ 0;
+ }
+ else {
+ -1;
+ }
+ }
+}
+
+sub pattern_expected {
+
+ # This is the start of a filter for a possible pattern.
+ # It looks at the token after a possbible pattern and tries to
+ # determine if that token could end a pattern.
+ # returns -
+ # 1 - yes
+ # 0 - can't tell
+ # -1 - no
+ my ( $i, $rtokens, $max_token_index ) = @_;
+ my $next_token = $$rtokens[ $i + 1 ];
+ if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier
+ my ( $next_nonblank_token, $i_next ) =
+ find_next_nonblank_token( $i, $rtokens, $max_token_index );
+
+ # list of tokens which may follow a pattern
+ # (can probably be expanded)
+ if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
+ {
+ 1;
+ }
+ else {
+
+ if ( $next_nonblank_token =~ /^\s*$/ ) {
+ 0;
+ }
+ else {
+ -1;
+ }
+ }
+}
+
+sub find_next_nonblank_token_on_this_line {
+ my ( $i, $rtokens, $max_token_index ) = @_;
+ my $next_nonblank_token;
+
+ if ( $i < $max_token_index ) {
+ $next_nonblank_token = $$rtokens[ ++$i ];
+
+ if ( $next_nonblank_token =~ /^\s*$/ ) {
+
+ if ( $i < $max_token_index ) {
+ $next_nonblank_token = $$rtokens[ ++$i ];
+ }
+ }
+ }
+ else {
+ $next_nonblank_token = "";
+ }
+ return ( $next_nonblank_token, $i );
+}
+
+sub find_angle_operator_termination {
+
+ # We are looking at a '<' and want to know if it is an angle operator.
+ # We are to return:
+ # $i = pretoken index of ending '>' if found, current $i otherwise
+ # $type = 'Q' if found, '>' otherwise
+ my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
+ my $i = $i_beg;
+ my $type = '<';
+ pos($input_line) = 1 + $$rtoken_map[$i];
+
+ my $filter;
+
+ # we just have to find the next '>' if a term is expected
+ if ( $expecting == TERM ) { $filter = '[\>]' }
+
+ # we have to guess if we don't know what is expected
+ elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
+
+ # shouldn't happen - we shouldn't be here if operator is expected
+ else { warning("Program Bug in find_angle_operator_termination\n") }
+
+ # To illustrate what we might be looking at, in case we are
+ # guessing, here are some examples of valid angle operators
+ # (or file globs):
+ # <tmp_imp/*>
+ # <FH>
+ # <$fh>
+ # <*.c *.h>
+ # <_>
+ # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
+ # <${PREFIX}*img*.$IMAGE_TYPE>
+ # <img*.$IMAGE_TYPE>
+ # <Timg*.$IMAGE_TYPE>
+ # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
+ #
+ # Here are some examples of lines which do not have angle operators:
+ # return undef unless $self->[2]++ < $#{$self->[1]};
+ # < 2 || @$t >
+ #
+ # the following line from dlister.pl caused trouble:
+ # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
+ #
+ # If the '<' starts an angle operator, it must end on this line and
+ # it must not have certain characters like ';' and '=' in it. I use
+ # this to limit the testing. This filter should be improved if
+ # possible.
+
+ if ( $input_line =~ /($filter)/g ) {
+
+ if ( $1 eq '>' ) {
+
+ # We MAY have found an angle operator termination if we get
+ # here, but we need to do more to be sure we haven't been
+ # fooled.
+ my $pos = pos($input_line);
+
+ my $pos_beg = $$rtoken_map[$i];
+ my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
+
+ # Reject if the closing '>' follows a '-' as in:
+ # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
+ if ( $expecting eq UNKNOWN ) {
+ my $check = substr( $input_line, $pos - 2, 1 );
+ if ( $check eq '-' ) {
+ return ( $i, $type );
+ }
+ }
+
+ ######################################debug#####
+ #write_diagnostics( "ANGLE? :$str\n");
+ #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
+ ######################################debug#####
+ $type = 'Q';
+ my $error;
+ ( $i, $error ) =
+ inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
+
+ # It may be possible that a quote ends midway in a pretoken.
+ # If this happens, it may be necessary to split the pretoken.
+ if ($error) {
+ warning(
+ "Possible tokinization error..please check this line\n");
+ report_possible_bug();
+ }
+
+ # Now let's see where we stand....
+ # OK if math op not possible
+ if ( $expecting == TERM ) {
+ }
+
+ # OK if there are no more than 2 pre-tokens inside
+ # (not possible to write 2 token math between < and >)
+ # This catches most common cases
+ elsif ( $i <= $i_beg + 3 ) {
+ write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
+ }
+
+ # Not sure..
+ else {
+
+ # Let's try a Brace Test: any braces inside must balance
+ my $br = 0;
+ while ( $str =~ /\{/g ) { $br++ }
+ while ( $str =~ /\}/g ) { $br-- }
+ my $sb = 0;
+ while ( $str =~ /\[/g ) { $sb++ }
+ while ( $str =~ /\]/g ) { $sb-- }
+ my $pr = 0;
+ while ( $str =~ /\(/g ) { $pr++ }
+ while ( $str =~ /\)/g ) { $pr-- }
+
+ # if braces do not balance - not angle operator
+ if ( $br || $sb || $pr ) {
+ $i = $i_beg;
+ $type = '<';
+ write_diagnostics(
+ "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
+ }
+
+ # we should keep doing more checks here...to be continued
+ # Tentatively accepting this as a valid angle operator.
+ # There are lots more things that can be checked.
+ else {
+ write_diagnostics(
+ "ANGLE-Guessing yes: $str expecting=$expecting\n");
+ write_logfile_entry("Guessing angle operator here: $str\n");
+ }
+ }
+ }
+
+ # didn't find ending >
+ else {
+ if ( $expecting == TERM ) {
+ warning("No ending > for angle operator\n");
+ }
+ }
+ }
+ return ( $i, $type );
+}
+
+sub scan_number_do {
+
+ # scan a number in any of the formats that Perl accepts
+ # Underbars (_) are allowed in decimal numbers.
+ # input parameters -
+ # $input_line - the string to scan
+ # $i - pre_token index to start scanning
+ # $rtoken_map - reference to the pre_token map giving starting
+ # character position in $input_line of token $i
+ # output parameters -
+ # $i - last pre_token index of the number just scanned
+ # number - the number (characters); or undef if not a number
+
+ my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
+ my $pos_beg = $$rtoken_map[$i];
+ my $pos;
+ my $i_begin = $i;
+ my $number = undef;
+ my $type = $input_type;
+
+ my $first_char = substr( $input_line, $pos_beg, 1 );
+
+ # Look for bad starting characters; Shouldn't happen..
+ if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
+ warning("Program bug - scan_number given character $first_char\n");
+ report_definite_bug();
+ return ( $i, $type, $number );
+ }
+
+ # handle v-string without leading 'v' character ('Two Dot' rule)
+ # (vstring.t)
+ # TODO: v-strings may contain underscores
+ pos($input_line) = $pos_beg;
+ if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
+ $pos = pos($input_line);
+ my $numc = $pos - $pos_beg;
+ $number = substr( $input_line, $pos_beg, $numc );
+ $type = 'v';
+ report_v_string($number);
+ }
+
+ # handle octal, hex, binary
+ if ( !defined($number) ) {
+ pos($input_line) = $pos_beg;
+ if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
+ {
+ $pos = pos($input_line);
+ my $numc = $pos - $pos_beg;
+ $number = substr( $input_line, $pos_beg, $numc );
+ $type = 'n';
+ }
+ }
+
+ # handle decimal
+ if ( !defined($number) ) {
+ pos($input_line) = $pos_beg;
+
+ if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
+ $pos = pos($input_line);
+
+ # watch out for things like 0..40 which would give 0. by this;
+ if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
+ && ( substr( $input_line, $pos, 1 ) eq '.' ) )
+ {
+ $pos--;
+ }
+ my $numc = $pos - $pos_beg;
+ $number = substr( $input_line, $pos_beg, $numc );
+ $type = 'n';
+ }
+ }
+
+ # filter out non-numbers like e + - . e2 .e3 +e6
+ # the rule: at least one digit, and any 'e' must be preceded by a digit
+ if (
+ $number !~ /\d/ # no digits
+ || ( $number =~ /^(.*)[eE]/
+ && $1 !~ /\d/ ) # or no digits before the 'e'
+ )
+ {
+ $number = undef;
+ $type = $input_type;
+ return ( $i, $type, $number );
+ }
+
+ # Found a number; now we must convert back from character position
+ # to pre_token index. An error here implies user syntax error.
+ # An example would be an invalid octal number like '009'.
+ my $error;
+ ( $i, $error ) =
+ inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
+ if ($error) { warning("Possibly invalid number\n") }
+
+ return ( $i, $type, $number );
+}
+
+sub inverse_pretoken_map {
+
+ # Starting with the current pre_token index $i, scan forward until
+ # finding the index of the next pre_token whose position is $pos.
+ my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
+ my $error = 0;
+
+ while ( ++$i <= $max_token_index ) {
+
+ if ( $pos <= $$rtoken_map[$i] ) {
+
+ # Let the calling routine handle errors in which we do not
+ # land on a pre-token boundary. It can happen by running
+ # perltidy on some non-perl scripts, for example.
+ if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
+ $i--;
+ last;
+ }
+ }
+ return ( $i, $error );
+}
+
+sub find_here_doc {
+
+ # find the target of a here document, if any
+ # input parameters:
+ # $i - token index of the second < of <<
+ # ($i must be less than the last token index if this is called)
+ # output parameters:
+ # $found_target = 0 didn't find target; =1 found target
+ # HERE_TARGET - the target string (may be empty string)
+ # $i - unchanged if not here doc,
+ # or index of the last token of the here target
+ # $saw_error - flag noting unbalanced quote on here target
+ my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
+ my $ibeg = $i;
+ my $found_target = 0;
+ my $here_doc_target = '';
+ my $here_quote_character = '';
+ my $saw_error = 0;
+ my ( $next_nonblank_token, $i_next_nonblank, $next_token );
+ $next_token = $$rtokens[ $i + 1 ];
+
+ # perl allows a backslash before the target string (heredoc.t)
+ my $backslash = 0;
+ if ( $next_token eq '\\' ) {
+ $backslash = 1;
+ $next_token = $$rtokens[ $i + 2 ];
+ }
+
+ ( $next_nonblank_token, $i_next_nonblank ) =
+ find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
+
+ if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
+
+ my $in_quote = 1;
+ my $quote_depth = 0;
+ my $quote_pos = 0;
+ my $quoted_string;
+
+ (
+ $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
+ $quoted_string
+ )
+ = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
+ $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
+
+ if ($in_quote) { # didn't find end of quote, so no target found
+ $i = $ibeg;
+ if ( $expecting == TERM ) {
+ warning(
+"Did not find here-doc string terminator ($here_quote_character) before end of line \n"
+ );
+ $saw_error = 1;
+ }
+ }
+ else { # found ending quote
+ my $j;
+ $found_target = 1;
+
+ my $tokj;
+ for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
+ $tokj = $$rtokens[$j];
+
+ # we have to remove any backslash before the quote character
+ # so that the here-doc-target exactly matches this string
+ next
+ if ( $tokj eq "\\"
+ && $j < $i - 1
+ && $$rtokens[ $j + 1 ] eq $here_quote_character );
+ $here_doc_target .= $tokj;
+ }
+ }
+ }
+
+ elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
+ $found_target = 1;
+ write_logfile_entry(
+ "found blank here-target after <<; suggest using \"\"\n");
+ $i = $ibeg;
+ }
+ elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
+
+ my $here_doc_expected;
+ if ( $expecting == UNKNOWN ) {
+ $here_doc_expected = guess_if_here_doc($next_token);
+ }
+ else {
+ $here_doc_expected = 1;
+ }
+
+ if ($here_doc_expected) {
+ $found_target = 1;
+ $here_doc_target = $next_token;
+ $i = $ibeg + 1;
+ }
+
+ }
+ else {
+
+ if ( $expecting == TERM ) {
+ $found_target = 1;
+ write_logfile_entry("Note: bare here-doc operator <<\n");
+ }
+ else {
+ $i = $ibeg;
+ }
+ }
+
+ # patch to neglect any prepended backslash
+ if ( $found_target && $backslash ) { $i++ }
+
+ return ( $found_target, $here_doc_target, $here_quote_character, $i,
+ $saw_error );
+}
+
+sub do_quote {
+
+ # follow (or continue following) quoted string(s)
+ # $in_quote return code:
+ # 0 - ok, found end
+ # 1 - still must find end of quote whose target is $quote_character
+ # 2 - still looking for end of first of two quotes
+ #
+ # Returns updated strings:
+ # $quoted_string_1 = quoted string seen while in_quote=1
+ # $quoted_string_2 = quoted string seen while in_quote=2
+ my (
+ $i, $in_quote, $quote_character,
+ $quote_pos, $quote_depth, $quoted_string_1,
+ $quoted_string_2, $rtokens, $rtoken_map,
+ $max_token_index
+ ) = @_;
+
+ my $in_quote_starting = $in_quote;
+
+ my $quoted_string;
+ if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
+ my $ibeg = $i;
+ (
+ $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+ $quoted_string
+ )
+ = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
+ $quote_pos, $quote_depth, $max_token_index );
+ $quoted_string_2 .= $quoted_string;
+ if ( $in_quote == 1 ) {
+ if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
+ $quote_character = '';
+ }
+ else {
+ $quoted_string_2 .= "\n";
+ }
+ }
+
+ if ( $in_quote == 1 ) { # one (more) quote to follow
+ my $ibeg = $i;
+ (
+ $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+ $quoted_string
+ )
+ = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
+ $quote_pos, $quote_depth, $max_token_index );
+ $quoted_string_1 .= $quoted_string;
+ if ( $in_quote == 1 ) {
+ $quoted_string_1 .= "\n";
+ }
+ }
+ return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+ $quoted_string_1, $quoted_string_2 );
+}
+
+sub follow_quoted_string {
+
+ # scan for a specific token, skipping escaped characters
+ # if the quote character is blank, use the first non-blank character
+ # input parameters:
+ # $rtokens = reference to the array of tokens
+ # $i = the token index of the first character to search
+ # $in_quote = number of quoted strings being followed
+ # $beginning_tok = the starting quote character
+ # $quote_pos = index to check next for alphanumeric delimiter
+ # output parameters:
+ # $i = the token index of the ending quote character
+ # $in_quote = decremented if found end, unchanged if not
+ # $beginning_tok = the starting quote character
+ # $quote_pos = index to check next for alphanumeric delimiter
+ # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
+ # $quoted_string = the text of the quote (without quotation tokens)
+ my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
+ $max_token_index )
+ = @_;
+ my ( $tok, $end_tok );
+ my $i = $i_beg - 1;
+ my $quoted_string = "";
+
+ TOKENIZER_DEBUG_FLAG_QUOTE && do {
+ print
+"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
+ };
+
+ # get the corresponding end token
+ if ( $beginning_tok !~ /^\s*$/ ) {
+ $end_tok = matching_end_token($beginning_tok);
+ }
+
+ # a blank token means we must find and use the first non-blank one
+ else {
+ my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
+
+ while ( $i < $max_token_index ) {
+ $tok = $$rtokens[ ++$i ];
+
+ if ( $tok !~ /^\s*$/ ) {
+
+ if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
+ $i = $max_token_index;
+ }
+ else {
+
+ if ( length($tok) > 1 ) {
+ if ( $quote_pos <= 0 ) { $quote_pos = 1 }
+ $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
+ }
+ else {
+ $beginning_tok = $tok;
+ $quote_pos = 0;
+ }
+ $end_tok = matching_end_token($beginning_tok);
+ $quote_depth = 1;
+ last;
+ }
+ }
+ else {
+ $allow_quote_comments = 1;
+ }
+ }
+ }
+
+ # There are two different loops which search for the ending quote
+ # character. In the rare case of an alphanumeric quote delimiter, we
+ # have to look through alphanumeric tokens character-by-character, since
+ # the pre-tokenization process combines multiple alphanumeric
+ # characters, whereas for a non-alphanumeric delimiter, only tokens of
+ # length 1 can match.
+
+ ###################################################################
+ # Case 1 (rare): loop for case of alphanumeric quote delimiter..
+ # "quote_pos" is the position the current word to begin searching
+ ###################################################################
+ if ( $beginning_tok =~ /\w/ ) {
+
+ # Note this because it is not recommended practice except
+ # for obfuscated perl contests
+ if ( $in_quote == 1 ) {
+ write_logfile_entry(
+ "Note: alphanumeric quote delimiter ($beginning_tok) \n");
+ }
+
+ while ( $i < $max_token_index ) {
+
+ if ( $quote_pos == 0 || ( $i < 0 ) ) {
+ $tok = $$rtokens[ ++$i ];
+
+ if ( $tok eq '\\' ) {
+
+ # retain backslash unless it hides the end token
+ $quoted_string .= $tok
+ unless $$rtokens[ $i + 1 ] eq $end_tok;
+ $quote_pos++;
+ last if ( $i >= $max_token_index );
+ $tok = $$rtokens[ ++$i ];
+ }
+ }
+ my $old_pos = $quote_pos;
+
+ unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
+ {
+
+ }
+ $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
+
+ if ( $quote_pos > 0 ) {
+
+ $quoted_string .=
+ substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
+
+ $quote_depth--;
+
+ if ( $quote_depth == 0 ) {
+ $in_quote--;
+ last;
+ }
+ }
+ else {
+ $quoted_string .= substr( $tok, $old_pos );
+ }
+ }
+ }
+
+ ########################################################################
+ # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
+ ########################################################################
+ else {
+
+ while ( $i < $max_token_index ) {
+ $tok = $$rtokens[ ++$i ];
+
+ if ( $tok eq $end_tok ) {
+ $quote_depth--;
+
+ if ( $quote_depth == 0 ) {
+ $in_quote--;
+ last;
+ }
+ }
+ elsif ( $tok eq $beginning_tok ) {
+ $quote_depth++;
+ }
+ elsif ( $tok eq '\\' ) {
+
+ # retain backslash unless it hides the beginning or end token
+ $tok = $$rtokens[ ++$i ];
+ $quoted_string .= '\\'
+ unless ( $tok eq $end_tok || $tok eq $beginning_tok );
+ }
+ $quoted_string .= $tok;
+ }
+ }
+ if ( $i > $max_token_index ) { $i = $max_token_index }
+ return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
+ $quoted_string );
+}
+
+sub indicate_error {
+ my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
+ interrupt_logfile();
+ warning($msg);
+ write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
+ resume_logfile();
+}
+
+sub write_error_indicator_pair {
+ my ( $line_number, $input_line, $pos, $carrat ) = @_;
+ my ( $offset, $numbered_line, $underline ) =
+ make_numbered_line( $line_number, $input_line, $pos );
+ $underline = write_on_underline( $underline, $pos - $offset, $carrat );
+ warning( $numbered_line . "\n" );
+ $underline =~ s/\s*$//;
+ warning( $underline . "\n" );
+}
+
+sub make_numbered_line {
+
+ # Given an input line, its line number, and a character position of
+ # interest, create a string not longer than 80 characters of the form
+ # $lineno: sub_string
+ # such that the sub_string of $str contains the position of interest
+ #
+ # Here is an example of what we want, in this case we add trailing
+ # '...' because the line is long.
+ #
+ # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
+ #
+ # Here is another example, this time in which we used leading '...'
+ # because of excessive length:
+ #
+ # 2: ... er of the World Wide Web Consortium's
+ #
+ # input parameters are:
+ # $lineno = line number
+ # $str = the text of the line
+ # $pos = position of interest (the error) : 0 = first character
+ #
+ # We return :
+ # - $offset = an offset which corrects the position in case we only
+ # display part of a line, such that $pos-$offset is the effective
+ # position from the start of the displayed line.
+ # - $numbered_line = the numbered line as above,
+ # - $underline = a blank 'underline' which is all spaces with the same
+ # number of characters as the numbered line.
+
+ my ( $lineno, $str, $pos ) = @_;
+ my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
+ my $excess = length($str) - $offset - 68;
+ my $numc = ( $excess > 0 ) ? 68 : undef;
+
+ if ( defined($numc) ) {
+ if ( $offset == 0 ) {
+ $str = substr( $str, $offset, $numc - 4 ) . " ...";
+ }
+ else {
+ $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
+ }
+ }