# Static hashes initialized in a BEGIN block
%is_assignment,
+ %is_non_list_type,
%is_if_unless_and_or_last_next_redo_return,
%is_if_elsif_else_unless_while_until_for_foreach,
%is_if_unless_while_until_for_foreach,
);
@is_assignment{@q} = (1) x scalar(@q);
+ # a hash needed by break_lists for efficiency:
+ push @q, qw{ ; < > ~ f };
+ @is_non_list_type{@q} = (1) x scalar(@q);
+
@q = qw(is if unless and or err last next redo return);
@is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
my ($self) = @_;
+ #-----------------------------------------------------------------
+ # Define a 'bond strength' for each token pair in an output batch.
+ # See comments above for definition of bond strength.
+ #-----------------------------------------------------------------
+
my $rbond_strength_to_go = [];
my $rLL = $self->[_rLL_];
sub write_line_inner_loop {
my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_;
- # Copy the tokens for this line to their new storage location
+ #---------------------------------------------------------------------
+ # Copy the tokens on one line received from the tokenizer to their new
+ # storage locations.
+ #---------------------------------------------------------------------
+
+ # Input parameters:
+ # $line_of_tokens_old = line received from tokenizer
+ # $line_of_tokens = line of tokens being formed for formatter
my $rtokens = $line_of_tokens_old->{_rtokens};
my $jmax = @{$rtokens} - 1;
# A line which is entirely a quote or pattern must go out
# verbatim. Note: the \n is contained in $input_line.
if ( $jmax <= 0 ) {
- if ( ( $input_line =~ "\t" ) ) {
+ if ( $self->[_save_logfile_] && $input_line =~ /\t/ ) {
my $input_line_number = $line_of_tokens->{_line_number};
$self->note_embedded_tab($input_line_number);
}
my $self = shift;
- # return parameters
+ #--------------------------------------------------------------------------
+ # This routine is called once per file to do as much formatting as possible
+ # before new line breaks are set.
+ #--------------------------------------------------------------------------
+
+ # Return parameters:
+ # Set $severe_error=true if processing must terminate immediately
my ( $severe_error, $rqw_lines );
+ # We change any spaces in --indent-only mode
if ( $rOpts->{'indent-only'} ) {
return ( $severe_error, $rqw_lines );
}
- # This routine is called once per file to do as much formatting as possible
- # before new line breaks are set.
-
- # Set $severe_error=true if processing must terminate immediately
-
# This routine makes all necessary and possible changes to the tokenization
# after the initial tokenization of the file. This is a tedious routine,
# but basically it consists of inserting and deleting whitespace between
# The level and ci_level of newly created spaces should be the
# same as the previous token. Otherwise blinking states can
# be created if the -lp mode is used. See similar coding in
- # sub 'store_token_and_space'. Fixes cases b1109 b1110.
+ # sub 'store_space_and_token'. Fixes cases b1109 b1110.
$rcopy->[_LEVEL_] =
$rLL_new->[-1]->[_LEVEL_];
$rcopy->[_CI_LEVEL_] =
my ( $self, $Kfirst, $Klast, $input_line_number ) = @_;
- #-------------------------------------------------------
- # Loop to copy all tokens on this line, with any changes
- #-------------------------------------------------------
+ #-----------------------------------------------------------------
+ # Loop to copy all tokens on one line, making any spacing changes,
+ # while also collecting information needed by later subs.
+ #-----------------------------------------------------------------
my $type_sequence;
my $rtoken_vars;
foreach my $KK ( $Kfirst .. $Klast ) {
# this)
$token =~ s/\s*$//;
$rtoken_vars->[_TOKEN_] = $token;
- $self->note_embedded_tab($input_line_number)
- if ( $token =~ "\t" );
- $self->store_token_and_space( $rtoken_vars,
- $rwhitespace_flags->[$KK] == WS_YES );
+ if ( $self->[_save_logfile_] && $token =~ /\t/ ) {
+ $self->note_embedded_tab($input_line_number);
+ }
+ if ( $rwhitespace_flags->[$KK] == WS_YES ) {
+ $self->store_space_and_token($rtoken_vars);
+ }
+ else {
+ $self->store_token($rtoken_vars);
+ }
next;
} ## end if ( $type eq 'q' )
# check a quote for problems
elsif ( $type eq 'Q' ) {
- $self->check_Q( $KK, $Kfirst, $input_line_number );
+ $self->check_Q( $KK, $Kfirst, $input_line_number )
+ if ( $self->[_save_logfile_] );
}
# Store this token with possible previous blank
if ( $rwhitespace_flags->[$KK] == WS_YES ) {
- $self->store_token_and_space( $rtoken_vars, 1 );
+ $self->store_space_and_token($rtoken_vars);
}
else {
$self->store_token($rtoken_vars);
} ## end sub set_permanently_broken
sub store_token {
+
my ( $self, $item ) = @_;
+ #------------------------------------------
+ # Store one token during respace operations
+ #------------------------------------------
+
+ # Input parameter:
+ # $item = ref to a token
+
# This will be the index of this item in the new array
my $KK_new = @{$rLL_new};
- #------------------------------------------------------------------
- # NOTE: called once per token so coding efficiency is critical here
- #------------------------------------------------------------------
+ # NOTE: this sub is called once per token so coding efficiency is critical.
# The next multiple assignment statements are significantly faster than
# doing them one-by-one.
return;
} ## end sub store_token
-sub store_token_and_space {
- my ( $self, $item, $want_space ) = @_;
+sub store_space_and_token {
+ my ( $self, $item ) = @_;
# store a token with preceding space if requested and needed
# First store the space
- if ( $want_space
- && @{$rLL_new}
+ if ( @{$rLL_new}
&& $rLL_new->[-1]->[_TYPE_] ne 'b'
&& $rOpts_add_whitespace )
{
# then the token
$self->store_token($item);
return;
-} ## end sub store_token_and_space
+} ## end sub store_space_and_token
sub add_phantom_semicolon {
sub check_Q {
- # Check that a quote looks okay
+ # Check that a quote looks okay, and report possible problems
+ # to the logfile.
+
my ( $self, $KK, $Kfirst, $line_number ) = @_;
my $token = $rLL->[$KK]->[_TOKEN_];
- $self->note_embedded_tab($line_number) if ( $token =~ "\t" );
+ if ( $token =~ /\t/ ) {
+ $self->note_embedded_tab($line_number);
+ }
# The remainder of this routine looks for something like
# '$var = s/xxx/yyy/;'
my $next_type = $rLL->[$Kn]->[_TYPE_];
next
unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
- && $next_token =~ /^q/ );
+ && substr( $next_token, 0, 1 ) eq 'q' );
# The token before the closing container must also be a quote
my $Kouter_closing = $K_closing_container->{$outer_seqno};
my ($self) = @_;
my $radjusted_levels = $self->[_radjusted_levels_];
return unless defined($radjusted_levels) && @{$radjusted_levels};
- my $min = min( @{$radjusted_levels} ); # fast check for min
+ my $min = min( @{$radjusted_levels} ); # fast check for min
if ( $min < 0 ) {
# slow loop, but rarely needed
my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
- # Add one token to the next batch.
+ #-------------------------------------------------------
+ # Token storage utility for sub process_line_of_CODE.
+ # Add one token to the next batch of '_to_go' variables.
+ #-------------------------------------------------------
+
+ # Input parameters:
# $Ktoken_vars = the index K in the global token array
# $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
# unless they are temporarily being overridden
- #------------------------------------------------------------------
# NOTE: called once per token so coding efficiency is critical here
- #------------------------------------------------------------------
my (
$summed_lengths_to_go[$max_index_to_go] + $length;
# Initializations for first token of new batch
- if ( $max_index_to_go == 0 ) {
+ if ( !$max_index_to_go ) {
# Reset flag '$starting_in_quote' for a new batch. It must be set
# to the value of '$in_continued_quote', but here for efficiency we
my ( $self, $has_side_comment ) = @_;
+ #--------------------------------------------------------------------
+ # Loop to move all tokens from an input line to a newly forming batch
+ #--------------------------------------------------------------------
+
# We do not want a leading blank if the previous batch just got output
if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
my ($self) = @_;
+ #-----------------------------------------------------------------
+ # This sub directs the formatting of one complete batch of tokens.
+ # The tokens of the batch are in the '_to_go' arrays.
+ #-----------------------------------------------------------------
+
my $this_batch = $self->[_this_batch_];
$batch_count++;
# Shortcut for block comments
# Note that this shortcut does not work for -lp yet
#--------------------------------------------------
- elsif ( $max_index_to_go == 0 && $types_to_go[0] eq '#' ) {
+ elsif ( !$max_index_to_go && $types_to_go[0] eq '#' ) {
my $ibeg = 0;
$this_batch->[_ri_first_] = [$ibeg];
$this_batch->[_ri_last_] = [$ibeg];
my $called_pad_array_to_go;
# set all forced breakpoints for good list formatting
- my $is_long_line = $max_index_to_go > 0
- && $self->excess_line_length( $imin, $max_index_to_go ) > 0;
-
- my $old_line_count_in_batch = 1;
+ my $is_long_line;
+ my $multiple_old_lines_in_batch;
if ( $max_index_to_go > 0 ) {
+ $is_long_line =
+ $self->excess_line_length( $imin, $max_index_to_go ) > 0;
+
my $Kbeg = $K_to_go[0];
my $Kend = $K_to_go[$max_index_to_go];
- $old_line_count_in_batch +=
+ $multiple_old_lines_in_batch =
$rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
}
my $rbond_strength_bias = [];
if (
$is_long_line
- || $old_line_count_in_batch > 1
+ || $multiple_old_lines_in_batch
# must always call break_lists() with unbalanced batches because
# it is maintaining some stacks
$self->break_all_chain_tokens( $ri_first, $ri_last );
- $self->break_equals( $ri_first, $ri_last );
+ $self->break_equals( $ri_first, $ri_last )
+ if @{$ri_first} >= 3;
# now we do a correction step to clean this up a bit
# (The only time we would not do this is for debugging)
# maximum line length.
#-----------------------------------------------------------
+ my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
+
+ # Input parameters:
+ # $saw_good_break - a flag set by break_lists
+ # $rcolon_list - ref to a list of all the ? and : tokens in the batch,
+ # in order.
+ # $rbond_strength_bias - small bond strength bias values set by break_lists
+
# Output: returns references to the arrays:
# @i_first
# @i_last
# may be updated to be =1 for any index $i after which there must be
# a break. This signals later routines not to undo the breakpoint.
- my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
-
- # @{$rcolon_list} is a list of all the ? and : tokens in the batch, in
- # order.
-
# Method:
# This routine is called if a statement is longer than the maximum line
# length, or if a preliminary scanning located desirable break points.
# which, if possible, does not exceed the maximum line length.
#-----------------------------------------------------------------
+ my (
+ $self, #
+
+ $i_begin,
+ $i_last_break,
+ $imax,
+ $last_break_strength,
+ $line_count,
+ $rbond_strength_to_go,
+ $saw_good_break,
+
+ ) = @_;
+
# Given:
# $i_begin = first index of range
# $i_last_break = index of previous break
# $leading_alignment_type = special token type after break
# $Msg = string of debug info
- my (
- $self, #
-
- $i_begin,
- $i_last_break,
- $imax,
- $last_break_strength,
- $line_count,
- $rbond_strength_to_go,
- $saw_good_break,
-
- ) = @_;
-
my $Msg = EMPTY_STRING;
my $strength = NO_BREAK;
my $i_test = $i_begin - 1;
$last_dot_index[$depth] = $i;
}
- # Turn off alignment if we are sure that this is not a list
+ # Turn off comma alignment if we are sure that this is not a list
# environment. To be safe, we will do this if we see certain
- # non-list tokens, such as ';', and also the environment is
- # not a list. Note that '=' could be in any of the = operators
- # (lextest.t). We can't just use the reported environment
- # because it can be incorrect in some cases.
- elsif ( ( $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type} )
+ # non-list tokens, such as ';', '=', and also the environment is
+ # not a list.
+ ## $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type}
+ elsif ( $is_non_list_type{$type}
&& !$self->is_in_list_by_i($i) )
{
$dont_align[$depth] = 1;
sub set_lp_indentation {
+ my ($self) = @_;
+
#------------------------------------------------------------------
# Define the leading whitespace for all tokens in the current batch
# when the -lp formatting is selected.
#------------------------------------------------------------------
- my ($self) = @_;
-
return unless ($rOpts_line_up_parentheses);
return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
# have been defined. Here we prepare the lines for passing to the vertical
# aligner. We do the following tasks:
# - mark certain vertical alignment tokens, such as '=', in each line
- # - make minor indentation adjustments
+ # - make final indentation adjustments
# - do logical padding: insert extra blank spaces to help display certain
# logical constructions
+ # - send the line to the vertical aligner
my $this_batch = $self->[_this_batch_];
my $ri_first = $this_batch->[_ri_first_];
# --------------------------------------
# get the final indentation of this line
# --------------------------------------
- my ( $indentation, $lev, $level_end, $i_terminal, $is_outdented_line )
- = $self->get_final_indentation( $ibeg, $iend, $rfields,
- $rpatterns, $ri_first, $ri_last,
- $rindentation_list, $ljump, $starting_in_quote,
- $is_static_block_comment );
+ my (
+
+ $indentation,
+ $lev,
+ $level_end,
+ $i_terminal,
+ $is_outdented_line,
+
+ ) = $self->get_final_indentation(
+
+ $ibeg,
+ $iend,
+ $rfields,
+ $rpatterns,
+ $ri_first,
+ $ri_last,
+ $rindentation_list,
+ $ljump,
+ $starting_in_quote,
+ $is_static_block_comment,
+
+ );
# --------------------------------
# define flag 'outdent_long_lines'
sub set_vertical_alignment_markers {
- # This routine takes the first step toward vertical alignment of the
- # lines of output text. It looks for certain tokens which can serve as
- # vertical alignment markers (such as an '=').
- #
+ my ( $self, $ri_first, $ri_last ) = @_;
+
+ #----------------------------------------------------------------------
+ # This routine looks at output lines for certain tokens which can serve
+ # as vertical alignment markers (such as an '=').
+ #----------------------------------------------------------------------
+
+ # Input parameters:
+ # $ri_first = ref to list of starting line indexes in _to_go arrays
+ # $ri_last = ref to list of ending line indexes in _to_go arrays
+
# Method: We look at each token $i in this output batch and set
# $ralignment_type_to_go->[$i] equal to those tokens at which we would
# accept vertical alignment.
- my ( $self, $ri_first, $ri_last ) = @_;
-
my $ralignment_type_to_go;
my $ralignment_counts = [];
my $ralignment_hash_by_line = [];
sub make_alignment_patterns {
- # Here we do some important preliminary work for the
- # vertical aligner. We create four arrays for one
- # output line. These arrays contain strings that can
- # be tested by the vertical aligner to see if
- # consecutive lines can be aligned vertically.
+ my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
+ $ralignment_hash )
+ = @_;
+
+ #------------------------------------------------------------------
+ # This sub creates arrays of vertical alignment info for one output
+ # line.
+ #------------------------------------------------------------------
+
+ # Input parameters:
+ # $ibeg, $iend - index range of this line in the _to_go arrays
+ # $ralignment_type_to_go - alignment type of tokens, like '=', if any
+ # $alignment_count - number of alignment tokens in the line
+ # $ralignment_hash - this contains all of the alignments for this
+ # line. It is not yet used but is available for future coding in
+ # case there is a need to do a preliminary scan of alignment tokens.
+
+ # The arrays which are created contain strings that can be tested by
+ # the vertical aligner to see if consecutive lines can be aligned
+ # vertically.
#
# The four arrays are indexed on the vertical
# alignment fields and are:
# allowed, even when the alignment tokens match.
# @field_lengths - the display width of each field
- my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
- $ralignment_hash )
- = @_;
-
- # The var $ralignment_hash contains all of the alignments for this
- # line. It is not yet used but is available for future coding in case
- # there is a need to do a preliminary scan of the alignment tokens.
if (DEVEL_MODE) {
my $new_count = 0;
if ( defined($ralignment_hash) ) {
sub get_final_indentation {
- #--------------------------------------------------------------------
- # This routine sets the final indentation of a line in the Formatter.
- #--------------------------------------------------------------------
+ my (
+ $self, #
+
+ $ibeg,
+ $iend,
+ $rfields,
+ $rpatterns,
+ $ri_first,
+ $ri_last,
+ $rindentation_list,
+ $level_jump,
+ $starting_in_quote,
+ $is_static_block_comment
+
+ ) = @_;
+
+ #--------------------------------------------------------------
+ # This routine makes any necessary adjustments to get the final
+ # indentation of a line in the Formatter.
+ #--------------------------------------------------------------
# It starts with the basic indentation which has been defined for the
# leading token, and then takes into account any options that the user
# undo_ci, which was processed earlier, so care has to be taken to
# keep them coordinated.
- my (
- $self, $ibeg,
- $iend, $rfields,
- $rpatterns, $ri_first,
- $ri_last, $rindentation_list,
- $level_jump, $starting_in_quote,
- $is_static_block_comment
- ) = @_;
-
# Find the last code token of this line
my $i_terminal = $iend;
my $terminal_type = $types_to_go[$iend];
# This can be tedious so we let a sub do it
(
- $adjust_indentation, $default_adjust_indentation,
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_closing_token_indentation(
+ $adjust_indentation,
+ $default_adjust_indentation,
+ $opening_indentation,
+ $opening_offset,
+ $is_leading,
+ $opening_exists
+
+ ) = $self->get_closing_token_indentation(
$ibeg,
$iend,
$is_semicolon_terminated,
$seqno_qw_closing,
- );
+ );
}
#--------------------------------------------------------
}
}
- return ( $indentation, $lev, $level_end, $i_terminal,
- $is_outdented_line );
+ return (
+
+ $indentation,
+ $lev,
+ $level_end,
+ $i_terminal,
+ $is_outdented_line,
+
+ );
} ## end sub get_final_indentation
sub get_closing_token_indentation {
# token - i.e. one of these: ) ] } :
my (
- $self,
+ $self, #
$ibeg,
$iend,
= $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
$ri_last, $rindentation_list, $seqno_qw_closing );
- my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal);
-
# First set the default behavior:
if (
# require LIST environment; otherwise, we may outdent too much -
# this can happen in calls without parentheses (overload.t);
- && $terminal_is_in_list
+ && $self->is_in_list_by_i($i_terminal)
)
{
$adjust_indentation = 1;
# but right now we do not have that information. For now
# we see if we are in a list, and this works well.
# See test files 'sub*.t' for good test cases.
- if ( $terminal_is_in_list
- && !$rOpts_indent_closing_brace
+ if ( !$rOpts_indent_closing_brace
&& $block_type_beg
- && $block_type_beg =~ /$ASUB_PATTERN/ )
+ && $self->[_ris_asub_block_]->{$seqno_beg}
+ && $self->is_in_list_by_i($i_terminal) )
{
(
$opening_indentation, $opening_offset,
if ($is_leading) { $adjust_indentation = 2; }
}
- return ( $adjust_indentation, $default_adjust_indentation,
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists );
+ return (
+
+ $adjust_indentation,
+ $default_adjust_indentation,
+ $opening_indentation,
+ $opening_offset,
+ $is_leading,
+ $opening_exists,
+
+ );
}
} ## end closure get_final_indentation
return $tokenizer_self->[_last_line_number_];
}
+sub write_logfile_numbered_msg {
+ my ($msg) = @_;
+
+ # write input line number + message to logfile
+ my $input_line_number = get_input_line_number();
+ write_logfile_entry("Line $input_line_number: $msg");
+ return;
+}
+
# returns the next tokenized line
sub get_line {
my $input_line_number = ++$tokenizer_self->[_last_line_number_];
- my $write_logfile_entry = sub {
- my ($msg) = @_;
- write_logfile_entry("Line $input_line_number: $msg");
- return;
- };
-
# Find and remove what characters terminate this line, including any
# control r
my $input_line_separator = EMPTY_STRING;
# for backwards compatibility we keep the line text terminated with
# a newline character
$input_line .= "\n";
- $tokenizer_self->[_line_of_text_] = $input_line; # update
+ $tokenizer_self->[_line_of_text_] = $input_line;
# create a data structure describing this line which will be
# returned to the caller.
_square_bracket_depth => $square_bracket_depth,
_paren_depth => $paren_depth,
_quote_character => EMPTY_STRING,
+## Skip these needless initializations for efficiency:
## _rtoken_type => undef,
## _rtokens => undef,
## _rlevels => undef,
if ( $candidate_target eq $here_doc_target ) {
$tokenizer_self->[_nearly_matched_here_target_at_] = undef;
$line_of_tokens->{_line_type} = 'HERE_END';
- $write_logfile_entry->("Exiting HERE document $here_doc_target\n");
+ write_logfile_numbered_msg(
+ "Exiting HERE document $here_doc_target\n");
my $rhere_target_list = $tokenizer_self->[_rhere_target_list_];
if ( @{$rhere_target_list} ) { # there can be multiple here targets
$tokenizer_self->[_here_doc_target_] = $here_doc_target;
$tokenizer_self->[_here_quote_character_] =
$here_quote_character;
- $write_logfile_entry->(
+ write_logfile_numbered_msg(
"Entering HERE document $here_doc_target\n");
$tokenizer_self->[_nearly_matched_here_target_at_] = undef;
$tokenizer_self->[_started_looking_for_here_target_at_] =
# This is the end when count reaches 0
if ( !$tokenizer_self->[_in_format_] ) {
- $write_logfile_entry->("Exiting format section\n");
+ write_logfile_numbered_msg("Exiting format section\n");
$line_of_tokens->{_line_type} = 'FORMAT_END';
}
}
$line_of_tokens->{_line_type} = 'POD';
if ( $input_line =~ /^=cut/ ) {
$line_of_tokens->{_line_type} = 'POD_END';
- $write_logfile_entry->("Exiting POD section\n");
+ write_logfile_numbered_msg("Exiting POD section\n");
$tokenizer_self->[_in_pod_] = 0;
}
if ( $input_line =~ /^\#\!.*perl\b/ && !$tokenizer_self->[_in_end_] ) {
$line_of_tokens->{_line_type} = 'SKIP';
if ( $input_line =~ /$code_skipping_pattern_end/ ) {
$line_of_tokens->{_line_type} = 'SKIP_END';
- $write_logfile_entry->("Exiting code-skipping section\n");
+ write_logfile_numbered_msg("Exiting code-skipping section\n");
$tokenizer_self->[_in_skipped_] = 0;
}
return $line_of_tokens;
# end of a pod section
if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
$line_of_tokens->{_line_type} = 'POD_START';
- $write_logfile_entry->("Entering POD section\n");
+ write_logfile_numbered_msg("Entering POD section\n");
$tokenizer_self->[_in_pod_] = 1;
return $line_of_tokens;
}
# end of a pod section
if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
$line_of_tokens->{_line_type} = 'POD_START';
- $write_logfile_entry->("Entering POD section\n");
+ write_logfile_numbered_msg("Entering POD section\n");
$tokenizer_self->[_in_pod_] = 1;
return $line_of_tokens;
}
warning(
"=cut starts a pod section .. this can fool pod utilities.\n"
) unless (DEVEL_MODE);
- $write_logfile_entry->("Entering POD section\n");
+ write_logfile_numbered_msg("Entering POD section\n");
}
}
else {
$line_of_tokens->{_line_type} = 'POD_START';
- $write_logfile_entry->("Entering POD section\n");
+ write_logfile_numbered_msg("Entering POD section\n");
}
return $line_of_tokens;
if ( $tokenizer_self->[_in_skipped_] ) {
$line_of_tokens->{_line_type} = 'SKIP';
- $write_logfile_entry->("Entering code-skipping section\n");
+ write_logfile_numbered_msg("Entering code-skipping section\n");
return $line_of_tokens;
}
$tokenizer_self->[_in_here_doc_] = 1;
$tokenizer_self->[_here_doc_target_] = $here_doc_target;
$tokenizer_self->[_here_quote_character_] = $here_quote_character;
- $write_logfile_entry->("Entering HERE document $here_doc_target\n");
+ write_logfile_numbered_msg("Entering HERE document $here_doc_target\n");
$tokenizer_self->[_started_looking_for_here_target_at_] =
$input_line_number;
}
# which are not tokenized (and cannot be read with <DATA> either!).
if ( $tokenizer_self->[_in_data_] ) {
$line_of_tokens->{_line_type} = 'DATA_START';
- $write_logfile_entry->("Starting __DATA__ section\n");
+ write_logfile_numbered_msg("Starting __DATA__ section\n");
$tokenizer_self->[_saw_data_] = 1;
# keep parsing after __DATA__ if use SelfLoader was seen
if ( $tokenizer_self->[_saw_selfloader_] ) {
$tokenizer_self->[_in_data_] = 0;
- $write_logfile_entry->(
+ write_logfile_numbered_msg(
"SelfLoader seen, continuing; -nlsl deactivates\n");
}
elsif ( $tokenizer_self->[_in_end_] ) {
$line_of_tokens->{_line_type} = 'END_START';
- $write_logfile_entry->("Starting __END__ section\n");
+ write_logfile_numbered_msg("Starting __END__ section\n");
$tokenizer_self->[_saw_end_] = 1;
# keep parsing after __END__ if use AutoLoader was seen
if ( $tokenizer_self->[_saw_autoloader_] ) {
$tokenizer_self->[_in_end_] = 0;
- $write_logfile_entry->(
+ write_logfile_numbered_msg(
"AutoLoader seen, continuing; -nlal deactivates\n");
}
return $line_of_tokens;
# Note: if keyword 'format' occurs in this line code, it is still CODE
# (keyword 'format' need not start a line)
if ( $tokenizer_self->[_in_format_] ) {
- $write_logfile_entry->("Entering format section\n");
+ write_logfile_numbered_msg("Entering format section\n");
}
if ( $tokenizer_self->[_in_quote_]
/^\s*$/ )
{
$tokenizer_self->[_line_start_quote_] = $input_line_number;
- $write_logfile_entry->(
+ write_logfile_numbered_msg(
"Start multi-line quote or pattern ending in $quote_target\n");
}
}
&& !$tokenizer_self->[_in_quote_] )
{
$tokenizer_self->[_line_start_quote_] = -1;
- $write_logfile_entry->("End of multi-line quote or pattern\n");
+ write_logfile_numbered_msg("End of multi-line quote or pattern\n");
}
# we are returning a line of CODE
} ## end sub tokenize_this_line
sub tokenizer_main_loop {
+
my ($is_END_or_DATA) = @_;
+ #---------------------------------
+ # Break one input line into tokens
+ #---------------------------------
+
+ # Input parameter:
+ # $is_END_or_DATA is true for a __END__ or __DATA__ line
+
# start by breaking the line into pre-tokens
my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
( $rtokens, $rtoken_map, $rtoken_type ) =
$i = -1;
$i_tok = -1;
- # ------------------------------------------------------------
+ #-----------------------------
# begin main tokenization loop
- # ------------------------------------------------------------
+ #-----------------------------
# we are looking at each pre-token of one line and combining them
# into tokens
sub tokenizer_wrapup_line {
my ($line_of_tokens) = @_;
- # We have broken the current line into tokens. Now we have to wrap up
- # the result for shipping. Most of the remaining work involves
- # defining the two indentation parameters that the formatter needs
- # (structural indentation level and continuation indentation).
+ #---------------------------------------------------------
+ # Package a line of tokens for shipping back to the caller
+ #---------------------------------------------------------
+
+ # Most of the remaining work involves defining the two indentation
+ # parameters that the formatter needs for each token:
+ # - $level = structural indentation level and
+ # - $ci_level = continuation indentation level
# The method for setting the indentation level is straightforward.
# But the method used to define the continuation indentation is
sub pre_tokenize {
+ my ( $str, $max_tokens_wanted ) = @_;
+
+ # Input parameter:
+ # $max_tokens_wanted > 0 to stop on reaching this many tokens.
+ # = 0 means get all tokens
+
# Break a string, $str, into a sequence of preliminary tokens. We
# are interested in these types of tokens:
# words (type='w'), example: 'max_tokens_wanted'
# An advantage of doing this pre-tokenization step is that it keeps almost
# all of the regex work highly localized. A disadvantage is that in some
# very rare instances we will have to go back and split a pre-token.
- my ( $str, $max_tokens_wanted ) = @_;
- # we return references to these 3 arrays:
+ # Return parameters:
my @tokens = (); # array of the tokens themselves
my @token_map = (0); # string position of start of each token
my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
sub valign_input {
- # Place one line in the current vertical group.
+ #---------------------------------------------------------------------
+ # This is the front door of the vertical aligner. On each call
+ # we receive one line of specially marked text for vertical alignment.
+ # We compare the line with the current group, and either:
+ # - the line joins the current group if alignments match, or
+ # - the current group is flushed and a new group is started otherwise
+ #---------------------------------------------------------------------
#
# The key input parameters describing each line are:
# $level = indentation level of this line
# Revert to the starting state if does not fit
if ( $pad > $padding_available ) {
- ################################################
+ #----------------------------------------------
# Line does not fit -- revert to starting state
- ################################################
+ #----------------------------------------------
foreach my $alignment (@alignments) {
$alignment->restore_column();
}
$padding_available -= $pad;
}
- ######################################
+ #-------------------------------------
# The line fits, the match is accepted
- ######################################
+ #-------------------------------------
return 1;
}
"APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
};
- ############################################
+ #-------------------------------------------
# Section 1: Handle a group of COMMENT lines
- ############################################
+ #-------------------------------------------
if ( $group_type eq 'COMMENT' ) {
$self->_flush_comment_lines();
return;
}
- #########################################################################
+ #------------------------------------------------------------------------
# Section 2: Handle line(s) of CODE. Most of the actual work of vertical
# aligning happens here in the following steps:
- #########################################################################
+ #------------------------------------------------------------------------
# STEP 1: Remove most unmatched tokens. They block good alignments.
my ( $max_lev_diff, $saw_side_comment ) =
my $ng_max = @{$rgroups} - 1;
return unless ( $ng_max > 0 );
- ############################################################################
+ #---------------------------------------------------------------------
# Step 1: Loop over groups to find all common leading alignment tokens
- ############################################################################
+ #---------------------------------------------------------------------
my $line;
my $rtokens;
}
return unless @icommon;
- ###########################################################
+ #----------------------------------------------------------
# Step 2: Reorder and consolidate the list into a task list
- ###########################################################
+ #----------------------------------------------------------
# We have to work first from lowest token index to highest, then by group,
# sort our list first on token index then group number
push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
}
- ###############################
+ #------------------------------
# Step 3: Execute the task list
- ###############################
+ #------------------------------
do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
$group_level );
return;
# find number of leading common tokens
- #################################
+ #---------------------------------
# No match to hanging side comment
- #################################
+ #---------------------------------
if ( $line->{'is_hanging_side_comment'} ) {
# Should not get here; HSC's have been filtered out
$imax_align = -1;
}
- ##############################
+ #-----------------------------
# Handle comma-separated lists
- ##############################
+ #-----------------------------
elsif ( $list_type && $list_type eq $list_type_m ) {
# do not align lists across a ci jump with new list method
$imax_align = $i_nomatch - 1;
}
- ##################
+ #-----------------
# Handle non-lists
- ##################
+ #-----------------
else {
my $i_nomatch = $imax_min + 1;
foreach my $i ( 0 .. $imax_min ) {
use constant EXPLAIN_PRUNE => 0;
- ####################################################################
+ #-------------------------------------------------------------------
# Prune Tree Step 1. Start by scanning the lines and collecting info
- ####################################################################
+ #-------------------------------------------------------------------
# Note that the caller had this info but we have to redo this now because
# alignment tokens may have been deleted.
# the patterns and levels of the next line being tested at each depth
my ( @token_patterns_next, @levels_next, @token_indexes_next );
- #########################################################
+ #-----------------------------------------------------------
# define a recursive worker subroutine for tree construction
- #########################################################
+ #-----------------------------------------------------------
# This is a recursive routine which is called if a match condition changes
# at any depth when a new line is encountered. It ends the match node
return;
}; ## end sub end_node
- ######################################################
+ #-----------------------------------------------------
# Prune Tree Step 2. Loop to form the tree of matches.
- ######################################################
+ #-----------------------------------------------------
foreach my $jp ( 0 .. $jmax ) {
# working with two adjacent line indexes, 'm'=minus, 'p'=plus
}
} ## end loop to form tree of matches
- ##########################################################
+ #---------------------------------------------------------
# Prune Tree Step 3. Make links from parent to child nodes
- ##########################################################
+ #---------------------------------------------------------
# It seemed cleaner to do this as a separate step rather than during tree
# construction. The children nodes have links up to the parent node which
}
};
- #######################################################
+ #------------------------------------------------------
# Prune Tree Step 4. Make a list of nodes to be deleted
- #######################################################
+ #------------------------------------------------------
# list of lines with tokens to be deleted:
# [$jbeg, $jend, $level_keep]
@todo_list = @todo_next;
} ## end loop to mark nodes to delete
- #############################################################
+ #------------------------------------------------------------
# Prune Tree Step 5. Loop to delete selected alignment tokens
- #############################################################
+ #------------------------------------------------------------
foreach my $item (@delete_list) {
my ( $jbeg, $jend, $level_keep ) = @{$item};
foreach my $jj ( $jbeg .. $jend ) {
? $extra_indentation_spaces_wanted
: $avail;
- #########################################################
+ #--------------------------------------------------------
# Note: min spaces can be negative; for example with -gnu
# f(
# do { 1; !!(my $x = bless []); }
# );
- #########################################################
+ #--------------------------------------------------------
# The following rule is needed to match older formatting:
# For multiple groups, we will keep spaces non-negative.
# For a single group, we will allow a negative space.
sub valign_output_step_A {
- ###############################################################
+ #------------------------------------------------------------
# This is Step A in writing vertically aligned lines.
# The line is prepared according to the alignments which have
# been found. Then it is shipped to the next step.
- ###############################################################
+ #------------------------------------------------------------
my ( $self, $rinput_hash ) = @_;
sub valign_output_step_B {
- ###############################################################
+ #---------------------------------------------------------
# This is Step B in writing vertically aligned lines.
# Vertical tightness is applied according to preset flags.
# In particular this routine handles stacking of opening
# and closing tokens.
- ###############################################################
+ #---------------------------------------------------------
my ( $self, $rinput ) = @_;
sub valign_output_step_C {
- ###############################################################
+ #-----------------------------------------------------------------------
# This is Step C in writing vertically aligned lines.
# Lines are either stored in a buffer or passed along to the next step.
# The reason for storing lines is that we may later want to reduce their
# indentation when -sot and -sct are both used.
- ###############################################################
+ #-----------------------------------------------------------------------
my (
$self,
$seqno_string,
# Start storing lines when we see a line with multiple stacked
# opening tokens.
# patch for RT #94354, requested by Colin Williams
- if ( $seqno_string =~ /^\d+(\:+\d+)+$/
+ if ( index( $seqno_string, ':' ) >= 0
+ && $seqno_string =~ /^\d+(\:+\d+)+$/
&& $args_to_D[0] !~ /^[\}\)\]\:\?]/ )
{
sub valign_output_step_D {
- ###############################################################
+ #----------------------------------------------------------------
# This is Step D in writing vertically aligned lines.
# It is the end of the vertical alignment pipeline.
# Write one vertically aligned line of code to the output object.
- ###############################################################
+ #----------------------------------------------------------------
my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;