#-------
# Blanks
#-------
- # For blanks, the ci should not be important,
- # but to match existing code a rule for blanks seems to be:
- # A blank after closing token has same ci as previous token,
- # Otherwise a blank has same ci as next token;
if ( $type eq 'b' ) {
$ci_next = $ci_this;
- if ( $is_closing_type{$last_type} ) {
- $ci_this = $ci_last;
- }
+
+ # We should never be using the ci of a blank token, but for
+ # reference, here is the rule and code to match the old ci coding:
+
+ # A blank after closing token has same ci as previous token,
+ # Otherwise a blank has same ci as next token;
+ # if ( $is_closing_type{$last_type} ) {
+ # $ci_this = $ci_last;
+ # }
+
$rtoken_K->[_CI_LEVEL_] = $ci_this;
# 'next' to avoid saving last_ values for blanks and commas
? $K_closing_ternary->{$seqno}
: $K_closing_container->{$seqno};
- ##my $Kn = $self->K_next_nonblank($KK);
+ # $Kn = $self->K_next_nonblank($KK);
my $Kn;
if ( $KK < $Klimit ) {
$Kn = $KK + 1;
}
}
- ##my $Kcn = $self->K_next_code($Kc);
+ # $Kcn = $self->K_next_code($Kc);
my $Kcn;
if ( $Kc && $Kc < $Klimit ) {
$Kcn = $Kc + 1;
# now prepare the final list of tokens and types
#-----------------------------------------------
if ( $self->[_calculate_ci_] ) {
- $self->tokenizer_wrapup_line_with_ci($line_of_tokens);
+ $self->OLD_tokenizer_wrapup_line($line_of_tokens);
}
else {
- $self->tokenizer_wrapup_line_no_ci($line_of_tokens);
+ $self->tokenizer_wrapup_line($line_of_tokens);
}
return;
return;
} ## end sub tokenizer_main_loop
- sub tokenizer_wrapup_line_with_ci {
+ sub OLD_tokenizer_wrapup_line {
my ( $self, $line_of_tokens ) = @_;
#---------------------------------------------------------
$line_of_tokens->{_rci_levels} = \@ci_string;
return;
- } ## end sub tokenizer_wrapup_line_with_ci
+ } ## end sub OLD_tokenizer_wrapup_line
- sub tokenizer_wrapup_line_no_ci {
+ sub tokenizer_wrapup_line {
my ( $self, $line_of_tokens ) = @_;
#---------------------------------------------------------
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
+ # Remember starting nesting block string
+ my $nesting_block_string_0 = $nesting_block_string;
+
#-----------------
# Loop over tokens
#-----------------
#--------------------------------
if ( !$routput_type_sequence->[$i] ) {
- # 1.1 blanks and comments
- if ( $type_i eq 'b' || $type_i eq '#' ) {
-
- }
-
- # 1.2 types ';' and 't'
+ # 1.1 types ';' and 't'
# - output anonymous 'sub' as keyword (type 'k')
# - output __END__, __DATA__, and format as type 'k' instead
# of ';' to make html colors correct, etc.
- elsif ( $is_semicolon_or_t{$type_i} ) {
+ if ( $is_semicolon_or_t{$type_i} ) {
my $tok_i = $rtokens->[$i];
if ( $is_END_DATA_format_sub{$tok_i} ) {
$type_i = 'k';
}
}
- # 1.3 Check for an invalid token type..
+ # 1.2 Check for an invalid token type..
# This can happen by running perltidy on non-scripts although
# it could also be bug introduced by programming change. Perl
# silently accepts a 032 (^Z) and takes it as the end
# break BEFORE '?' in a nested ternary
$level_i = $level_in_tokenizer;
$nesting_block_string .= "$nesting_block_flag";
+
}
}
else {
EOM
}
+ # The starting nesting block string, which is used in any .LOG
+ # output, should include the first token of the line
+ if ( !@levels ) {
+ $nesting_block_string_0 = $nesting_block_string;
+ }
+
# Store values for a sequenced token
push( @levels, $level_i );
push( @block_type, $routput_block_type->[$i] );
}
- #-------------------------------------
- # 3. Form and store the PREVIOUS token
- #-------------------------------------
- if ( defined($rtoken_map_im) ) {
- my $numc =
- $rtoken_map->[$i] - $rtoken_map_im; # how many characters
+ }
- if ( $numc > 0 ) {
- push( @tokens,
- substr( $input_line, $rtoken_map_im, $numc ) );
- }
- else {
+ # End loop to over tokens
- # Should not happen unless @{$rtoken_map} is corrupted
- DEVEL_MODE
- && $self->Fault(
- "number of characters is '$numc' but should be >0\n");
- }
- }
+ $line_of_tokens->{_nesting_blocks_0} = $nesting_block_string_0;
- # or grab some values for the leading token (needed for log output)
- else {
- $line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
- }
+ #--------------------------
+ # Form and store the tokens
+ #--------------------------
+ if (@levels) {
- $rtoken_map_im = $rtoken_map->[$i];
- }
+ my $im = shift @{$routput_token_list};
+ my $offset = $rtoken_map->[$im];
+ foreach my $i ( @{$routput_token_list} ) {
+ my $numc = $rtoken_map->[$i] - $offset;
+ push( @tokens, substr( $input_line, $offset, $numc ) );
- #------------------------
- # End loop to over tokens
- #------------------------
+ if ( DEVEL_MODE && $numc <= 0 ) {
- # Form and store the final token of this line
- if ( defined($rtoken_map_im) ) {
- my $numc = length($input_line) - $rtoken_map_im;
- if ( $numc > 0 ) {
- push( @tokens, substr( $input_line, $rtoken_map_im, $numc ) );
+ # Should not happen unless @{$rtoken_map} is corrupted
+ $self->Fault(
+ "number of characters is '$numc' but should be >0\n");
+ }
+ $offset = $rtoken_map->[$i];
}
- else {
- # Should not happen unless @{$rtoken_map} is corrupted
- DEVEL_MODE
- && $self->Fault(
+ # Form and store the final token of this line
+ my $numc = length($input_line) - $offset;
+ push( @tokens, substr( $input_line, $offset, $numc ) );
+
+ if ( DEVEL_MODE && $numc <= 0 ) {
+ $self->Fault(
"Number of Characters is '$numc' but should be >0\n");
}
}
$line_of_tokens->{_rci_levels} = \@ci_levels;
return;
- } ## end sub tokenizer_wrapup_line_no_ci
+ } ## end sub tokenizer_wrapup_line
} ## end tokenize_this_line