# scan state, id_scan_state. It updates id_scan_state based upon
# current id_scan_state and token, and returns an updated
# id_scan_state and the next index after the identifier.
+
# USES GLOBAL VARIABLES: $context, $last_nonblank_token,
# $last_nonblank_type
my $identifier_begin = $identifier;
my $tok = $tok_begin;
my $message = "";
+ my $tok_is_blank; # a flag to speed things up
my $in_prototype_or_signature = $container_type =~ /^sub\b/;
# these flags will be used to help figure out the type:
- my $saw_alpha = ( $tok =~ /^\w/ );
+ ##my $saw_alpha = ( $tok =~ /^\w/ ); # This was slow
+ my $saw_alpha;
my $saw_type;
# allow old package separator (') except in 'use' statement
my $allow_tick = ( $last_nonblank_token ne 'use' );
+ #########################################################
# get started by defining a type and a state if necessary
- unless ($id_scan_state) {
+ #########################################################
+
+ if ( !$id_scan_state ) {
$context = UNKNOWN_CONTEXT;
# fixup for digraph
}
elsif ( $tok =~ /^\w/ ) {
$id_scan_state = ':';
+ $saw_alpha = 1;
}
elsif ( $tok eq '->' ) {
$id_scan_state = '$';
}
else {
$i--;
- $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
+ $saw_alpha = ( $tok =~ /^\w/ );
+ $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
}
- # now loop to gather the identifier
+ ###############################
+ # loop to gather the identifier
+ ###############################
+
my $i_save = $i;
while ( $i < $max_token_index ) {
- $i_save = $i unless ( $tok =~ /^\s*$/ );
- $tok = $rtokens->[ ++$i ];
+ ##$i_save = $i unless ( $tok =~ /^\s*$/ ); # This was a slow statement
+ if ($tok_is_blank) { $tok_is_blank = undef }
+ else { $i_save = $i }
+ $tok = $rtokens->[ ++$i ];
+
+ # patch to make digraph :: if necessary
if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) {
$tok = '::';
$i++;
}
- if ( $id_scan_state eq '$' ) { # starting variable name
+ ########################
+ # Starting variable name
+ ########################
+
+ if ( $id_scan_state eq '$' ) {
if ( $tok eq '$' ) {
last;
}
}
-
- # POSTDEFREF ->@ ->% ->& ->*
- elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
- $identifier .= $tok;
- }
elsif ( $tok =~ /^\w/ ) { # alphanumeric ..
$saw_alpha = 1;
$id_scan_state = ':'; # now need ::
$identifier .= $tok;
}
+ elsif ( $tok eq '::' ) {
+ $id_scan_state = 'A';
+ $identifier .= $tok;
+ }
+
+ # POSTDEFREF ->@ ->% ->& ->*
+ elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
+ $identifier .= $tok;
+ }
elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
$saw_alpha = 1;
$id_scan_state = ':'; # now need ::
# howdy::123::bubba();
#
}
- elsif ( $tok eq '::' ) {
- $id_scan_state = 'A';
- $identifier .= $tok;
- }
# $# and POSTDEFREF ->$#
elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array
# space ok after leading $ % * & @
elsif ( $tok =~ /^\s*$/ ) {
+ $tok_is_blank = 1;
+
if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
if ( length($identifier) > 1 ) {
last;
}
}
- elsif ( $id_scan_state eq '&' ) { # starting sub call?
- if ( $tok =~ /^[\$\w]/ ) { # alphanumeric ..
- $id_scan_state = ':'; # now need ::
- $saw_alpha = 1;
- $identifier .= $tok;
- }
- elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
- $id_scan_state = ':'; # now need ::
- $saw_alpha = 1;
- $identifier .= $tok;
- }
- elsif ( $tok =~ /^\s*$/ ) { # allow space
- }
- elsif ( $tok eq '::' ) { # leading ::
- $id_scan_state = 'A'; # accept alpha next
- $identifier .= $tok;
- }
- elsif ( $tok eq '{' ) {
- if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
- $i = $i_save;
- $id_scan_state = '';
- last;
- }
- else {
+ ###################################
+ # looking for alphanumeric after ::
+ ###################################
- # punctuation variable?
- # testfile: cunningham4.pl
- #
- # We have to be careful here. If we are in an unknown state,
- # we will reject the punctuation variable. In the following
- # example the '&' is a binary operator but we are in an unknown
- # state because there is no sigil on 'Prima', so we don't
- # know what it is. But it is a bad guess that
- # '&~' is a function variable.
- # $self->{text}->{colorMap}->[
- # Prima::PodView::COLOR_CODE_FOREGROUND
- # & ~tb::COLOR_INDEX ] =
- # $sec->{ColorCode}
- if ( $identifier eq '&' && $expecting ) {
- $identifier .= $tok;
- }
- else {
- $identifier = '';
- $i = $i_save;
- $type = '&';
- }
- $id_scan_state = '';
- last;
- }
- }
- elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::)
+ elsif ( $id_scan_state eq 'A' ) {
+
+ $tok_is_blank = $tok =~ /^\s*$/;
- if ( $tok =~ /^\w/ ) { # found it
+ if ( $tok =~ /^\w/ ) { # found it
$identifier .= $tok;
- $id_scan_state = ':'; # now need ::
+ $id_scan_state = ':'; # now need ::
$saw_alpha = 1;
}
elsif ( $tok eq "'" && $allow_tick ) {
$identifier .= $tok;
- $id_scan_state = ':'; # now need ::
+ $id_scan_state = ':'; # now need ::
$saw_alpha = 1;
}
- elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
+ ##elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
+ elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
$id_scan_state = '(';
$identifier .= $tok;
}
- elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
+ ##elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
+ elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
$id_scan_state = ')';
$identifier .= $tok;
}
last;
}
}
+
+ ###################################
+ # looking for :: after alphanumeric
+ ###################################
+
elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha
+ $tok_is_blank = $tok =~ /^\s*$/;
+
if ( $tok eq '::' ) { # got it
$identifier .= $tok;
$id_scan_state = 'A'; # now require alpha
$identifier .= $tok;
}
}
- elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
+ ##elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
+ elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
$id_scan_state = '(';
$identifier .= $tok;
}
- elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
+ ##elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
+ elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
$id_scan_state = ')';
$identifier .= $tok;
}
last;
}
}
- elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype
- if ( $tok eq '(' ) { # got it
+ ##############################
+ # looking for '(' of prototype
+ ##############################
+
+ elsif ( $id_scan_state eq '(' ) {
+
+ if ( $tok eq '(' ) { # got it
$identifier .= $tok;
- $id_scan_state = ')'; # now find the end of it
+ $id_scan_state = ')'; # now find the end of it
}
- elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
+ elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
$identifier .= $tok;
+ $tok_is_blank = 1;
}
else {
- $id_scan_state = ''; # that's all - no prototype
+ $id_scan_state = ''; # that's all - no prototype
$i = $i_save;
last;
}
}
- elsif ( $id_scan_state eq ')' ) { # looking for ) to end
- if ( $tok eq ')' ) { # got it
+ ##############################
+ # looking for ')' of prototype
+ ##############################
+
+ elsif ( $id_scan_state eq ')' ) {
+
+ $tok_is_blank = $tok =~ /^\s*$/;
+
+ if ( $tok eq ')' ) { # got it
$identifier .= $tok;
- $id_scan_state = ''; # all done
+ $id_scan_state = ''; # all done
last;
}
elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
$identifier .= $tok;
}
}
- else { # can get here due to error in initialization
+
+ ###################
+ # Starting sub call
+ ###################
+
+ elsif ( $id_scan_state eq '&' ) {
+
+ if ( $tok =~ /^[\$\w]/ ) { # alphanumeric ..
+ $id_scan_state = ':'; # now need ::
+ $saw_alpha = 1;
+ $identifier .= $tok;
+ }
+ elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
+ $id_scan_state = ':'; # now need ::
+ $saw_alpha = 1;
+ $identifier .= $tok;
+ }
+ elsif ( $tok =~ /^\s*$/ ) { # allow space
+ $tok_is_blank = 1;
+ }
+ elsif ( $tok eq '::' ) { # leading ::
+ $id_scan_state = 'A'; # accept alpha next
+ $identifier .= $tok;
+ }
+ elsif ( $tok eq '{' ) {
+ if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
+ $i = $i_save;
+ $id_scan_state = '';
+ last;
+ }
+ else {
+
+ # punctuation variable?
+ # testfile: cunningham4.pl
+ #
+ # We have to be careful here. If we are in an unknown state,
+ # we will reject the punctuation variable. In the following
+ # example the '&' is a binary operator but we are in an unknown
+ # state because there is no sigil on 'Prima', so we don't
+ # know what it is. But it is a bad guess that
+ # '&~' is a function variable.
+ # $self->{text}->{colorMap}->[
+ # Prima::PodView::COLOR_CODE_FOREGROUND
+ # & ~tb::COLOR_INDEX ] =
+ # $sec->{ColorCode}
+ if ( $identifier eq '&' && $expecting ) {
+ $identifier .= $tok;
+ }
+ else {
+ $identifier = '';
+ $i = $i_save;
+ $type = '&';
+ }
+ $id_scan_state = '';
+ last;
+ }
+ }
+
+ ######################
+ # unknown state - quit
+ ######################
+
+ else { # can get here due to error in initialization
$id_scan_state = '';
$i = $i_save;
last;
}
- }
+ } ## end of main loop
if ( $id_scan_state eq ')' ) {
warning("Hit end of line while seeking ) to end prototype\n");
$id_scan_state = '';
}
- # The deprecated variable $# does not combine with anything on the next line
+ # Patch: the deprecated variable $# does not combine with anything on the
+ # next line.
if ( $identifier eq '$#' ) { $id_scan_state = '' }
if ( $i < 0 ) { $i = 0 }
- unless ($type) {
+ # Be sure a token type is defined
+ if ( !$type ) {
if ($saw_type) {
} # this can happen on a restart
}
+ # See if we formed an identifier...
if ($identifier) {
$tok = $identifier;
if ($message) { write_logfile_entry($message) }
}
+
+ # did not find an identifier, back up
else {
$tok = $tok_begin;
$i = $i_begin;