$pkg reports VERSION='$VERSION'.
==============================================================================
EOM
-
- # We shouldn't get here, but this return is to keep Perl-Critic from
- # complaining.
- return;
+ croak "unexpected return from sub Die";
} ## end sub Fault
sub Fault_Warn {
$rLL = $self->[_rLL_] unless ( defined($rLL) );
return unless @{$rLL};
- my $type = $rLL->[0]->[_TYPE_];
- if ( $type ne 'b' && $type ne '#' ) { return 0 }
- return $self->K_next_code(0);
+ my $KK = 0;
+ my $type = $rLL->[$KK]->[_TYPE_];
+ if ( $type ne 'b' && $type ne '#' ) { return $KK }
+ return $self->K_next_code($KK);
} ## end sub K_first_code
sub K_last_code {
sub set_whitespace_flags {
+ my $self = shift;
+
# This routine is called once per file to set whitespace flags for that
# file. This routine examines each pair of nonblank tokens and sets a flag
- # indicating if white space is needed.
+ # indicating if they should be separated by white space.
#
# $rwhitespace_flags->[$j] is a flag indicating whether a white space
# BEFORE token $j is needed, with the following values:
# WS_YES = 1 want a space BEFORE token $j
#
- my $self = shift;
-
my $j_tight_closing_paren = -1;
my $rLL = $self->[_rLL_];
my $K_closing_container = $self->[_K_closing_container_];
$closing_container_inside_ws{$sequence_number} = $ws_flag;
}
}
+ else {
+ DEVEL_MODE
+ && Fault("unexpected token='$word' and seqno='$sequence_number'\n");
+ }
return;
} ## end sub set_container_ws_by_keyword
sub is_essential_whitespace {
+ my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
+
# Essential whitespace means whitespace which cannot be safely deleted
# without risking the introduction of a syntax error.
# to use nytprof to profile with both old and revised coding using the
# -mangle option and check differences.
- my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
-
# This is potentially a very slow routine but the following quick
# filters typically catch and handle over 90% of the calls.
# Added for c140 to make 'w ->' and 'i ->' behave the same
$binary_bond_strength{'w'}{'->'} = 1.45 * STRONG;
- # Note that the following alternative strength would make the break at the
- # '->' rather than opening the '('. Both have advantages and disadvantages.
- # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
+ # Note that the following alternative strength would make the break at
+ # the '->' rather than opening the '('. Both have advantages and
+ # disadvantages.
+ # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
$binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
my ($self) = @_;
- #-----------------------------------------------------------------
# Define a 'bond strength' for each token pair in an output batch.
# See comments above for definition of bond strength.
- #-----------------------------------------------------------------
my $rbond_strength_to_go = [];
sub prepare_cuddled_block_types {
- # the cuddled-else style, if used, is controlled by a hash that
- # we construct here
-
- # Include keywords here which should not be cuddled
+ # Construct a hash needed by the cuddled-else style
my $cuddled_string = EMPTY_STRING;
if ( $rOpts->{'cuddled-else'} ) {
if ($cuddled_block_list) {
$cuddled_string .= SPACE . $cuddled_block_list;
}
-
}
# If we have a cuddled string of the form
sub write_line {
+ my ( $self, $line_of_tokens_input ) = @_;
+
# This routine receives lines one-by-one from the tokenizer and stores
# them in a format suitable for further processing. After the last
# line has been sent, the tokenizer will call sub 'finish_formatting'
# to do the actual formatting.
- my ( $self, $line_of_tokens_old ) = @_;
+ # Given:
+ # $line_of_tokens_input = hash ref of one line from the tokenizer
my $rLL = $self->[_rLL_];
my $line_of_tokens = {};
# copy common hash key values
- @{$line_of_tokens}{@common_keys} = @{$line_of_tokens_old}{@common_keys};
+ @{$line_of_tokens}{@common_keys} =
+ @{$line_of_tokens_input}{@common_keys};
- my $line_type = $line_of_tokens_old->{_line_type};
+ my $line_type = $line_of_tokens_input->{_line_type};
my $tee_output;
my $Klimit = $self->[_Klimit_];
# Handle line of code
else {
- my $rtokens = $line_of_tokens_old->{_rtokens};
+ my $rtokens = $line_of_tokens_input->{_rtokens};
my $jmax = @{$rtokens} - 1;
if ( $jmax >= 0 ) {
#----------------------------
# get the tokens on this line
#----------------------------
- $self->write_line_inner_loop( $line_of_tokens_old,
+ $self->write_line_inner_loop( $line_of_tokens_input,
$line_of_tokens );
# update Klimit for added tokens
if ($tee_output) {
my $fh_tee = $self->[_fh_tee_];
- my $line_text = $line_of_tokens_old->{_line_text};
+ my $line_text = $line_of_tokens_input->{_line_text};
$fh_tee->print($line_text) if ($fh_tee);
}
# We must use the old line because the qw logic may change this flag
- $last_ending_in_quote = $line_of_tokens_old->{_ending_in_quote};
+ $last_ending_in_quote = $line_of_tokens_input->{_ending_in_quote};
return;
} ## end sub write_line
sub write_line_inner_loop {
my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_;
- #---------------------------------------------------------------------
# Copy the tokens on one line received from the tokenizer to their new
# storage locations.
- #---------------------------------------------------------------------
# Input parameters:
# $line_of_tokens_old = line received from tokenizer
# The file has been tokenized and is ready to be formatted.
# All of the relevant data is stored in $self, ready to go.
+ # Given:
+ # $severe_error = true if a severe error was encountered
+
# Returns:
# true if input file was copied verbatim due to errors
# false otherwise
sub find_level_info {
+ my ($self) = @_;
+
# Find level ranges and total variations of all code blocks in this file.
# Returns:
# ref to hash with block info, with seqno as key (see below)
- my ($self) = @_;
-
# The array _rSS_ has the complete container tree for this file.
my $rSS = $self->[_rSS_];
}
# safety check - shouldn't happen
- return unless ( $type eq 'P' );
+ if ( $type ne 'P' ) {
+ DEVEL_MODE && Fault("Expecting type 'P' but found '$type'");
+ return;
+ }
my $level = $item->[_LEVEL_];
return unless ( $level == 0 );
my ( $self, $rline_type_count, $rkeyword_count ) = @_;
# Guess if we are formatting a complete script
+ # Given:
+ # $rline_type_count = hash ref of count of line types
+ # $rkeyword_count = hash ref of count of keywords
# Return: true or false
# Goal: help decide if we should skip certain warning checks when
} ## end sub initialize_warn_hash
sub make_excluded_name_hash {
- my ($option_name) = @_;
+ my ($option_name) = @_;
+
+ # Convert a list of words into a hash ref for an input option
+ # Given:
+ # $option_name = the name of an input option
+ # example: 'warn-variable-exclusion-list'
my $rexcluded_name_hash = {};
my $excluded_names = $rOpts->{$option_name};
if ($excluded_names) {
# Given:
# $wvt_in_args = true if the -wvt parameter was on the command line
# $num_files = number of files on the command line
+ # $line_range_clipped = true if only part of a file is being formatted
my @all_opts = qw(r s p u c);
$rwarn_variable_types =
sub filter_excluded_names {
+ my ( $rwarnings, $rexcluded_name_hash ) = @_;
+
+ # Remove warnings for variable names excluded by user request
+ # for an operation like --warn-variable-types
+
# Given:
# $rwarnigns = ref to list of warning info hashes
# $rexcluded_name_hash = ref to hash with excluded names
# Return updated $rwarnings with excluded names removed
- my ( $rwarnings, $rexcluded_name_hash ) = @_;
if ( @{$rwarnings} && $rexcluded_name_hash ) {
# Check for exact matches
sub delete_side_comments {
my ( $self, $rix_side_comments ) = @_;
- # Given a list of indexes of lines with side comments, handle any
- # requested side comment deletions.
+ # Handle any requested side comment deletions.
+ # Given:
+ # $rix_side_comments = ref to list of indexes of lines with side comments
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $self = shift;
- #--------------------------------------------------------------------------
# This routine is called once per file to do as much formatting as possible
# before new line breaks are set.
- #--------------------------------------------------------------------------
- # Return parameters:
- # Set $severe_error=true if processing must terminate immediately
+ # Returns:
+ # $severe_error = true if processing must terminate immediately
+ # $rqw_lines = ref to list of lines with qw quotes (for -qwaf)
my ( $severe_error, $rqw_lines );
# We do not change any spaces in --indent-only mode
my ( $self, $Kfirst, $Klast, $input_line_number ) = @_;
- #-----------------------------------------------------------------
# Loop to copy all tokens on one line, making any spacing changes,
# while also collecting information needed by later subs.
- #-----------------------------------------------------------------
+
+ # Given:
+ # $Kfirst = index of first token on this line
+ # $Klast = index of last token on this line
+ # $input_line_number = number of this line in input stream
+
my $type;
foreach my $KK ( $Kfirst .. $Klast ) {
my ( $self, $item ) = @_;
- #------------------------------------------
# Store one token during respace operations
- #------------------------------------------
- # Optional input parameter: '$item'
- # if defined => reference to a token to be stored
- # otherwise => make and store a blank space
+ # Given:
+ # $item =
+ # if defined => reference to a token to be stored
+ # if not defined => make and store a blank space
# NOTE: this sub is called once per token so coding efficiency is critical.
sub check_Q {
+ my ( $self, $KK, $Kfirst, $line_number ) = @_;
+
# Check that a quote looks okay, and report possible problems
# to the logfile.
+ # Given:
+ # $KK = index of the quote token
+ # $Kfirst = index of first token on the line
+ # $line_number = number of the line in the input stream
- my ( $self, $KK, $Kfirst, $line_number ) = @_;
my $token = $rLL->[$KK]->[_TOKEN_];
if ( $token =~ /\t/ ) {
$self->note_embedded_tab($line_number);
sub package_info_maker {
+ my ( $self, $rK_package_list ) = @_;
+
# Create a hash of values which can be used to find the package of any
# token. This sub must be called after rLL has been updated because it
# calls parent_seqno_by_K.
- my ( $self, $rK_package_list ) = @_;
# Given:
# @{$rK_package_list} = a simple list of token index K of each 'package'
sub count_list_elements {
my ( $self, $rarg_list ) = @_;
- # Given:
+ # Given call arg hash containing:
# $seqno_list = sequence number of a paren of list to be counted, or
# $K_list_start = starting index of list (for 'return' lists)
# $shift_count_min = starting min arg count items to include
# shouldn't happen:
if ( !defined($K_sub) || $K_sub >= $K_opening_block ) {
if ( !defined($K_sub) ) { $K_sub = 'undef' }
- Fault("Bad K_sub=$K_sub, opening=$K_opening_block\n");
+ DEVEL_MODE && Fault("Bad K_sub=$K_sub, opening=$K_opening_block\n");
return;
}
my ( $self, $rpackage_lookup_list, $rprelim_call_info ) = @_;
+ # Given:
+ # $rpackage_lookup_list = list with info for finding containing package
+ # $rprelim_call_info = hash ref with first try at call info
+
# Returns two hash references:
# \%sub_info_by_seqno,
# \%sub_seqno_by_key,
my ( $self, $rpackage_lookup_list ) = @_;
+ # Given:
+ # $rpackage_lookup_list = list with info for finding containing package
+
# Update the hash of info about the call parameters with arg counts
# and package. It contains the sequence number of each paren and
# type of call, and we must add the arg count and package.
sub sort_warnings {
+ my ($rwarnings) = @_;
+
# Given:
# $rwarnigns = ref to list of warning info hashes
# Return updated $rwarnings
# - Sorted by line number
- my ($rwarnings) = @_;
if ( @{$rwarnings} ) {
# sort by line number
sub stringify_line_range {
my ($rcalls) = @_;
+
+ # Given:
+ # $rcalls = ref to list of call info
+ # Return:
+ # $string = single line of text with just the line range
+
my $string = EMPTY_STRING;
if ( $rcalls && @{$rcalls} ) {
my @sorted =
sub keep_old_line_breaks {
+ my ($self) = @_;
+
# Called once per file to find and mark any old line breaks which
# should be kept. We will be translating the input hashes into
# token indexes.
# = 2 make a soft break (keep building current batch)
# best for something like leading ->
- my ($self) = @_;
-
my $rLL = $self->[_rLL_];
my $rKrange_code_without_comments =
$self->[_rKrange_code_without_comments_];
sub weld_containers {
+ my ($self) = @_;
+
# Called once per file to do any welding operations requested by --weld*
# flags.
- my ($self) = @_;
# This count is used to eliminate needless calls for weld checks elsewhere
$total_weld_count = 0;
} ## end sub weld_containers
sub weld_cuddled_blocks {
+
my ($self) = @_;
# Called once per file to handle cuddled formatting
sub match_paren_control_flag {
+ my ( $self, $seqno, $flag, $rLL ) = @_;
+
+ # Input parameters:
+ # $seqno = sequence number of the container (should be paren)
+ # $flag = the flag which defines what matches
+ # $rLL = an optional alternate token list needed for respace operations
+
# Decide if this paren is excluded by user request:
# undef matches no parens
# '*' matches all parens
# 'F' matches if 'f' does not.
# 'w' matches if either 'k' or 'f' match.
# 'W' matches if 'w' does not.
- my ( $self, $seqno, $flag, $rLL ) = @_;
- # Input parameters:
- # $seqno = sequence number of the container (should be paren)
- # $flag = the flag which defines what matches
- # $rLL = an optional alternate token list needed for respace operations
$rLL = $self->[_rLL_] unless ( defined($rLL) );
return 0 unless ( defined($flag) );
sub is_excluded_weld {
- # decide if this weld is excluded by user request
my ( $self, $KK, $is_leading ) = @_;
+
+ # Decide if this weld is excluded by user request
+
+ # Given:
+ # $KK = index of this weld token
+ # $is_leading = true if this will the outer token of a weld
+
my $rLL = $self->[_rLL_];
my $rtoken_vars = $rLL->[$KK];
my $token = $rtoken_vars->[_TOKEN_];
sub setup_new_weld_measurements {
+ my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
+
# Define quantities to check for excess line lengths when welded.
# Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
- my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
-
- # Given indexes of outer and inner opening containers to be welded:
- # $Kouter_opening, $Kinner_opening
+ # Given:
+ # ($Kouter_opening, $Kinner_opening) = indexes of outer and inner opening
+ # containers to be welded
# Returns these variables:
# $new_weld_ok = true (new weld ok) or false (do not start new weld)
} ## end sub setup_new_weld_measurements
sub excess_line_length_for_Krange {
+
my ( $self, $Kfirst, $Klast ) = @_;
# returns $excess_length =
} ## end sub excess_line_length_for_Krange
sub weld_nested_containers {
+
my ($self) = @_;
# Called once per file for option '--weld-nested-containers'
sub weld_nested_quotes {
+ my $self = shift;
+
# Called once per file for option '--weld-nested-containers'. This
# does welding on qw quotes.
- my $self = shift;
-
# See if quotes are excluded from welding
my $rflags = $weld_nested_exclusion_rules{'q'};
return if ( defined($rflags) && defined( $rflags->[1] ) );
my ( $self, $seqno ) = @_;
- # given a sequence number:
- # return true if it is welded either left or right
- # return false otherwise
+ # Given:
+ # $seqno = a sequence number:
+ # Return:
+ # true if it is welded either left or right
+ # false otherwise
return unless ( $total_weld_count && defined($seqno) );
my $KK_o = $self->[_K_opening_container_]->{$seqno};
return unless defined($KK_o);
sub mark_short_nested_blocks {
+ my $self = shift;
+
# This routine looks at the entire file and marks any short nested blocks
# which should not be broken. The results are stored in the hash
# $rshort_nested->{$type_sequence}
# The flag which is set here will be checked in two places:
# 'sub process_line_of_CODE' and 'sub starting_one_line_block'
- my $self = shift;
return if $rOpts->{'indent-only'};
my $rLL = $self->[_rLL_];
sub clip_adjusted_levels {
+ my ( $self, $min_starting_level ) = @_;
+
# Replace any negative adjusted levels with zero.
# Negative levels can only occur in files with brace errors.
- my ( $self, $min_starting_level ) = @_;
+ # Given:
+ # $min_starting_level = minimum (adjusted) level of the input stream
# Clip the original _LEVEL_ values to zero if necessary
my $rLL = $self->[_rLL_];
sub do_non_indenting_braces {
+ my ($self) = @_;
+
# Called once per file to handle the --non-indenting-braces parameter.
# Remove indentation within marked braces if requested
- my ($self) = @_;
# Any non-indenting braces have been found by sub find_non_indenting_braces
# and are defined by the following hash:
sub extended_ci {
+ my ($self) = @_;
+
# This routine implements the -xci (--extended-continuation-indentation)
# flag. We add CI to interior tokens of a container which itself has CI but
# only if a token does not already have CI.
# The operations to remove unwanted CI are done in sub 'undo_ci'.
- my ($self) = @_;
-
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
==============================================================================
EOM
- # We shouldn't get here, but this return is to keep Perl-Critic from
- # complaining.
- return;
+ croak "unexpected return from sub Die";
} ## end sub Fault
sub make_skipping_pattern {
my ( $rOpts, $opt_name, $default ) = @_;
+
+ # Make regex patterns for the format-skipping and code-skipping options
my $param = $rOpts->{$opt_name};
if ( !$param ) { $param = $default }
- $param =~ s/^\s+//; # allow leading spaces to be like format-skipping
+ $param =~ s/^\s+//;
if ( $param !~ /^#/ ) {
Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
}
sub check_options {
- # Check Tokenizer parameters
+ # Check and pre-process tokenizer parameters
my $rOpts = shift;
%is_sub = ();
my ( $self, $line_source_object ) = @_;
# Convert the source into an array of lines
+ # Given:
+ # $line_source_object = the input source stream
+ # Task:
+ # Convert the source to an array ref and store in $self
+
my $rinput_lines = [];
my $rsource = ref($line_source_object);
sub peek_ahead {
my ( $self, $buffer_index ) = @_;
- # look $buffer_index lines ahead of the current location without disturbing
- # the input
+ # look $buffer_index lines ahead of the current location in the input
+ # stream without disturbing the input
my $line;
my $rinput_lines = $self->[_rinput_lines_];
my $line_index = $buffer_index + $self->[_input_line_index_next_];
return ($level);
} ## end sub guess_old_indentation_level
-# This is a currently unused debug routine
sub dump_functions {
+ # This is an unused debug routine, save for future use
+
my $fh = *STDOUT;
foreach my $pkg ( keys %{$ris_user_function} ) {
$fh->print("\nnon-constant subs in package $pkg\n");
my ( $self, $numc ) = @_;
- # Split the leading $numc characters from the current token (at index=$i)
- # which is pre-type 'w' and insert the remainder back into the pretoken
- # stream with appropriate settings. Since we are splitting a pre-type 'w',
- # there are three cases, depending on if the remainder starts with a digit:
- # Case 1: remainder is type 'd', all digits
- # Case 2: remainder is type 'd' and type 'w': digits and other characters
- # Case 3: remainder is type 'w'
+ # This provides a way to work around the limitations of the
+ # pre-tokenization scheme upon which perltidy is based. It is rarely
+ # needed.
+
+ # Split the leading $numc characters from the current token (at
+ # index=$i) which is pre-type 'w' and insert the remainder back into
+ # the pretoken stream with appropriate settings. Since we are
+ # splitting a pre-type 'w', there are three cases, depending on if the
+ # remainder starts with a digit:
+ # Case 1: remainder is type 'd', all digits
+ # Case 2: remainder is type 'd' and type 'w': digits & other characters
+ # Case 3: remainder is type 'w'
# Examples, for $numc=1:
# $tok => $tok_0 $tok_1 $tok_2
sub peeked_ahead {
my $flag = shift;
- # get/set the closure flag '$peeked_ahead'
+ # get or set the closure flag '$peeked_ahead':
# - set $peeked_ahead to $flag if given, then
# - return current value
$peeked_ahead = defined($flag) ? $flag : $peeked_ahead;
sub scan_simple_identifier {
+ my $self = shift;
+
# This is a wrapper for sub scan_identifier. It does a fast preliminary
# scan for certain common identifiers:
# '$var', '@var', %var, *var, &var, '@{...}', '%{...}'
# |----$i_plus_1 [= a bareword ]
# ---$i_begin [= a sigil]
- my $self = shift;
-
my $i_begin = $i;
my $tok_begin = $tok;
my $i_plus_1 = $i + 1;
sub method_ok_here {
+ my $self = shift;
+
# Return:
# false if this is definitely an invalid method declaration
# true otherwise (even if not sure)
# return;
# };
- my $self = shift;
-
# from do_scan_sub:
my $i_beg = $i + 1;
my $pos_beg = $rtoken_map->[$i_beg];
sub class_ok_here {
+ my $self = shift;
+
# Return:
# false if this is definitely an invalid class declaration
# true otherwise (even if not sure)
#
# class ExtendsBasicAttributes is BasicAttributes{
- my $self = shift;
-
# TEST 1: class stmt can only go where a new statement can start
if ( !new_statement_ok() ) { return }
sub scan_number_fast {
+ my $self = shift;
+
# This is a wrapper for sub scan_number. It does a fast preliminary
# scan for a simple integer. It calls the original scan_number if it
# does not find one.
- my $self = shift;
my $i_begin = $i;
my $tok_begin = $tok;
my $number;
my ( $self, $thing ) = @_;
# Issue warning on error if expecting operator
- # Given: $thing = the unexpected token or issue
- # = undef to use current pre-token
+ # Given:
+ # $thing = the unexpected token or issue
+ # = undef to use current pre-token
if ( $expecting == OPERATOR ) {
if ( !defined($thing) ) { $thing = $tok }
my ( $self, $tok_kw, $next_nonblank_token, $i_next ) = @_;
- # Decide if 'sub :' can be the start of a sub attribute list.
- # We will decide based on if the colon is followed by a
- # bareword which is not a keyword.
- # Changed inext+1 to inext to fixed case b1190.
+ # Decide if a ':' can introduce an attribute. For example,
+ # something like 'sub :'
+
+ # Given:
+ # $tok_kw = a bareword token
+ # $next_nonblank_token = a following ':' being examined
+ # $i_next = the index of the following ':'
+
+ # We will decide based on if the colon is followed by a bareword
+ # which is not a keyword. Changed inext+1 to inext to fixed case
+ # b1190.
my $sub_attribute_ok_here;
if ( $is_sub{$tok_kw}
&& $expecting != OPERATOR
sub tokenize_this_line {
+ my ( $self, $line_of_tokens, $trimmed_input_line ) = @_;
+
# This routine tokenizes one line. The results are stored in
# the hash ref '$line_of_tokens'.
# Returns:
# nothing
- my ( $self, $line_of_tokens, $trimmed_input_line ) = @_;
my $untrimmed_input_line = $line_of_tokens->{_line_text};
# Extract line number for use in error messages
sub operator_expected {
+ my ( $self, $tok, $next_type, $blank_after_Z ) = @_;
+
# Returns a parameter indicating what types of tokens can occur next
# Call format:
# the 'operator_expected' value by a simple hash lookup. If there are
# exceptions, that is an indication that a new type is needed.
- my ( $self, $tok, $next_type, $blank_after_Z ) = @_;
-
#--------------------------------------------
# Section 1: Table lookup will get most cases
#--------------------------------------------
sub code_block_type {
+ my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
+
# Decide if this is a block of code, and its type.
# Must be called only when $type = $token = '{'
# The problem is to distinguish between the start of a block of code
# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
- my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
if ( $last_nonblank_token eq '{'
&& $last_nonblank_type eq $last_nonblank_token )
{
sub increase_nesting_depth {
my ( $self, $aa, $pos ) = @_;
+ # Given:
+ # $aa = integer code of container type, 0-3
+ # $pos = position of character, for error message
+
# USES GLOBAL VARIABLES: $rcurrent_depth,
# $rcurrent_sequence_number, $rdepth_array,
# $rstarting_line_of_current_depth, $statement_type
sub is_balanced_closing_container {
+ my ($aa) = @_;
+
# Return true if a closing container can go here without error
# Return false if not
- my ($aa) = @_;
+ # Given:
+ # $aa = integer code of container type, 0-3
# cannot close if there was no opening
my $cd_aa = $rcurrent_depth->[$aa];
my ( $self, $aa, $pos ) = @_;
+ # Given:
+ # $aa = integer code of container type, 0-3
+ # $pos = position of character, for error message
+
# USES GLOBAL VARIABLES: $rcurrent_depth,
# $rcurrent_sequence_number, $rdepth_array, $rstarting_line_of_current_depth
# $statement_type
sub peek_ahead_for_n_nonblank_pre_tokens {
- # returns next n pretokens if they exist
- # returns undef's if hits eof without seeing any pretokens
- # USES GLOBAL VARIABLES: (none)
my ( $self, $max_pretokens ) = @_;
+
+ # Given:
+ # $max_pretokens = number of pretokens wanted
+ # Return:
+ # next $max_pretokens pretokens if they exist
+ # undef's if hits eof without seeing any pretokens
+
+ # USES GLOBAL VARIABLES: (none)
my $line;
my $i = 0;
my ( $rpre_tokens, $rmap, $rpre_types );
# look ahead for next non-blank, non-comment line of code
sub peek_ahead_for_nonblank_token {
- # USES GLOBAL VARIABLES: (none)
my ( $self, $rtokens, $max_token_index ) = @_;
+
+ # Given:
+ # $rtokens = ref to token array
+ # $max_token_index = index of last token in $rtokens
+ # Task:
+ # Update $rtokens with next nonblank token
+
+ # USES GLOBAL VARIABLES: (none)
my $line;
my $i = 0;
sub guess_if_pattern_or_conditional {
- # this routine is called when we have encountered a ? following an
+ my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map_uu, $max_token_index )
+ = @_;
+
+ # This routine is called when we have encountered a ? following an
# unknown bareword, and we must decide if it starts a pattern or not
- # input parameters:
+ # Given:
# $i - token index of the ? starting possible pattern
- # output parameters:
+ # $rtokens ... = the token arrays
+ # Return:
# $is_pattern = 0 if probably not pattern, =1 if probably a pattern
# msg = a warning or diagnostic message
+
# USES GLOBAL VARIABLES: $last_nonblank_token
- my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map_uu, $max_token_index )
- = @_;
my $is_pattern = 0;
my $msg = "guessing that ? after '$last_nonblank_token' starts a ";
sub guess_if_pattern_or_division {
+ my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map, $max_token_index ) =
+ @_;
+
# This routine is called when we have encountered a / following an
# unknown bareword, and we must decide if it starts a pattern or is a
# division.
- # input parameters:
+ # Given:
# $i - token index of the / starting possible pattern
- # output parameters:
+ # $rtokens ... = the token arrays
+ # Return:
# $is_pattern = 0 if probably division, =1 if probably a pattern
# msg = a warning or diagnostic message
# USES GLOBAL VARIABLES: $last_nonblank_token
- my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map, $max_token_index ) =
- @_;
my $msg = "guessing that / after '$last_nonblank_token' starts a ";
my $ibeg = $i;
my $is_pattern = 0;
return ( $is_pattern, $msg );
} ## end sub guess_if_pattern_or_division
-# try to resolve here-doc vs. shift by looking ahead for
-# non-code or the end token (currently only looks for end token)
-# returns 1 if it is probably a here doc, 0 if not
sub guess_if_here_doc {
my ( $self, $next_token ) = @_;
- # This is how many lines we will search for a target as part of the
- # guessing strategy. It is a constant because there is probably
- # little reason to change it.
+ # Try to resolve here-doc vs. shift by looking ahead for
+ # non-code or the end token (currently only looks for end token)
+
+ # Given:
+ # $next_token = the next token after '<<'
+
+ # Return:
+ # 1 if it is probably a here doc
+ # 0 if not
+
# USES GLOBAL VARIABLES: $current_package $ris_constant,
+
+ # This is how many lines we will search for a target as part of the
+ # guessing strategy. There is probably little reason to change it.
my $HERE_DOC_WINDOW = 40;
my $here_doc_expected = 0;
sub scan_bare_identifier_do {
- # this routine is called to scan a token starting with an alphanumeric
- # variable or package separator, :: or '.
- # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
- # $last_nonblank_type, $rparen_type, $paren_depth
-
my (
$self,
) = @_;
+ # This routine is called to scan a token starting with an alphanumeric
+ # variable or package separator, :: or '.
+
+ # Given:
+ # current scan state variables
+
+ # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
+ # $last_nonblank_type, $rparen_type, $paren_depth
+
my $package = undef;
my $i_beg = $i;
sub scan_id_do {
- # This is the new scanner and will eventually replace scan_identifier.
- # Only type 'sub' and 'package' are implemented.
- # Token types $ * % @ & -> are not yet implemented.
- #
- # Scan identifier following a type token.
- # The type of call depends on $id_scan_state: $id_scan_state = ''
- # for starting call, in which case $tok must be the token defining
- # the type.
- #
- # If the type token is the last nonblank token on the line, a value
- # of $id_scan_state = $tok is returned, indicating that further
- # calls must be made to get the identifier. If the type token is
- # not the last nonblank token on the line, the identifier is
- # scanned and handled and a value of '' is returned.
-
my (
$self,
) = @_;
+ # Scan identifier following a type token.
+ # Given:
+ # current scan state variables
+
+ # This is the new scanner and may eventually replace scan_identifier.
+ # Only type 'sub' and 'package' are implemented.
+ # Token types $ * % @ & -> are not yet implemented.
+ #
+ # The type of call depends on $id_scan_state: $id_scan_state = ''
+ # for starting call, in which case $tok must be the token defining
+ # the type.
+ #
+ # If the type token is the last nonblank token on the line, a value
+ # of $id_scan_state = $tok is returned, indicating that further
+ # calls must be made to get the identifier. If the type token is
+ # not the last nonblank token on the line, the identifier is
+ # scanned and handled and a value of '' is returned.
+
use constant DEBUG_NSCAN => 0;
my $type = EMPTY_STRING;
my $i_beg;
sub check_prototype {
my ( $proto, $package, $subname ) = @_;
+
+ # Classify a sub based on its prototype
return if ( !defined($package) );
return if ( !defined($subname) );
if ( defined($proto) ) {
sub do_scan_package {
- # do_scan_package parses a package name
- # it is called with $i_beg equal to the index of the first nonblank
+ my ( $self, $rcall_hash ) = @_;
+
+ my $input_line = $rcall_hash->{input_line};
+ my $i = $rcall_hash->{i};
+ my $i_beg = $rcall_hash->{i_beg};
+ my $tok = $rcall_hash->{tok};
+ my $type = $rcall_hash->{type};
+ my $rtokens = $rcall_hash->{rtokens};
+ my $rtoken_map = $rcall_hash->{rtoken_map};
+ my $max_token_index = $rcall_hash->{max_token_index};
+
+ # Parse a package name.
+ # This is called with $i_beg equal to the index of the first nonblank
# token following a 'package' token.
# USES GLOBAL VARIABLES: $current_package,
# character and at least three components.
# reference http://perldoc.perl.org/functions/package.html
- my ( $self, $rcall_hash ) = @_;
-
- my $input_line = $rcall_hash->{input_line};
- my $i = $rcall_hash->{i};
- my $i_beg = $rcall_hash->{i_beg};
- my $tok = $rcall_hash->{tok};
- my $type = $rcall_hash->{type};
- my $rtokens = $rcall_hash->{rtokens};
- my $rtoken_map = $rcall_hash->{rtoken_map};
- my $max_token_index = $rcall_hash->{max_token_index};
-
my $package = undef;
my $pos_beg = $rtoken_map->[$i_beg];
pos($input_line) = $pos_beg;
sub scan_complex_identifier {
+ (
+ my $self,
+
+ $i,
+ $id_scan_state,
+ $identifier,
+ $rtokens,
+ $max_token_index,
+ $expecting,
+ $container_type
+
+ ) = @_;
+
# This routine assembles tokens into identifiers. It maintains a
# scan state, id_scan_state. It updates id_scan_state based upon
# current id_scan_state and token, and returns an updated
# attempt to create multiple tokenizers can occur when multiple
# files are processed, causing an error.
- (
- my $self,
-
- $i,
- $id_scan_state,
- $identifier,
- $rtokens,
- $max_token_index,
- $expecting,
- $container_type
-
- ) = @_;
-
# return flag telling caller to split the pretoken
my $split_pretoken_flag;
sub do_scan_sub {
- # do_scan_sub parses a sub name and prototype.
+ my ( $self, $rcall_hash ) = @_;
+
+ my $input_line = $rcall_hash->{input_line};
+ my $i = $rcall_hash->{i};
+ my $i_beg = $rcall_hash->{i_beg};
+ my $tok = $rcall_hash->{tok};
+ my $type = $rcall_hash->{type};
+ my $rtokens = $rcall_hash->{rtokens};
+ my $rtoken_map = $rcall_hash->{rtoken_map};
+ my $id_scan_state = $rcall_hash->{id_scan_state};
+ my $max_token_index = $rcall_hash->{max_token_index};
+
+ # Parse a sub name and prototype.
# At present there are three basic CALL TYPES which are
# distinguished by the starting value of '$tok':
# $rsaw_function_definition,
# $statement_type
- my ( $self, $rcall_hash ) = @_;
-
- my $input_line = $rcall_hash->{input_line};
- my $i = $rcall_hash->{i};
- my $i_beg = $rcall_hash->{i_beg};
- my $tok = $rcall_hash->{tok};
- my $type = $rcall_hash->{type};
- my $rtokens = $rcall_hash->{rtokens};
- my $rtoken_map = $rcall_hash->{rtoken_map};
- my $id_scan_state = $rcall_hash->{id_scan_state};
- my $max_token_index = $rcall_hash->{max_token_index};
-
my $i_entry = $i;
# Determine the CALL TYPE
sub is_possible_numerator {
+ my ( $self, $i, $rtokens, $max_token_index ) = @_;
+
# Look at the next non-comment character and decide if it could be a
# numerator. Returns the following code:
# -1 - division not possible
# 3 - division very probable: number and one of ; ] } follow
# 4 - is division, not pattern: number and ) follow
- my ( $self, $i, $rtokens, $max_token_index ) = @_;
my $divide_possible_code = 0;
my $next_token = $rtokens->[ $i + 1 ];
sub pattern_expected {
+ my ( $self, $i, $rtokens, $max_token_index ) = @_;
+
# This a filter for a possible pattern.
# It looks at the token after a possible pattern and tries to
# determine if that token could end a pattern.
# 1 - yes
# 0 - can't tell
# -1 - no
- my ( $self, $i, $rtokens, $max_token_index ) = @_;
my $is_pattern = 0;
my $next_token = $rtokens->[ $i + 1 ];
sub find_angle_operator_termination {
+ my ( $self, $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index )
+ = @_;
+
# We are looking at a '<' and want to know if it is an angle operator.
- # We are to return:
+ # Return:
# $i = pretoken index of ending '>' if found, current $i otherwise
# $type = 'Q' if found, '>' otherwise
- my ( $self, $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index )
- = @_;
+
my $i = $i_beg;
my $type = '<';
pos($input_line) = 1 + $rtoken_map->[$i];
sub scan_number_do {
- # scan a number in any of the formats that Perl accepts
+ my ( $self, $input_line, $i, $rtoken_map, $input_type, $max_token_index ) =
+ @_;
+
+ # Scan a number in any of the formats that Perl accepts
# Underbars (_) are allowed in decimal numbers.
- # input parameters -
+ # Given:
# $input_line - the string to scan
# $i - pre_token index to start scanning
# $rtoken_map - reference to the pre_token map giving starting
# character position in $input_line of token $i
- # output parameters -
+ # Return:
# $i - last pre_token index of the number just scanned
+ # $type - the token type ('v' or 'n')
# number - the number (characters); or undef if not a number
- my ( $self, $input_line, $i, $rtoken_map, $input_type, $max_token_index ) =
- @_;
my $pos_beg = $rtoken_map->[$i];
my $pos;
##my $i_begin = $i;
sub find_here_doc {
- # find the target of a here document, if any
- # input parameters:
- # $i - token index of the second < of <<
- # ($i must be less than the last token index if this is called)
- # output parameters:
- # $found_target = 0 didn't find target; =1 found target
- # HERE_TARGET - the target string (may be empty string)
- # $i - unchanged if not here doc,
- # or index of the last token of the here target
- # $saw_error - flag noting unbalanced quote on here target
my (
$self,
) = @_;
+ # Find the target of a here document, if any
+ # Given:
+ # $i - token index of the second < of <<
+ # ($i must be less than the last token index if this is called)
+ # Return:
+ # $found_target = 0 didn't find target; =1 found target
+ # HERE_TARGET - the target string (may be empty string)
+ # $i - unchanged if not here doc,
+ # or index of the last token of the here target
+ # $saw_error - flag noting unbalanced quote on here target
my $ibeg = $i;
my $found_target = 0;
my $here_doc_target = EMPTY_STRING;
sub do_quote {
- # follow (or continue following) quoted string(s)
- # $in_quote return code:
- # 0 - ok, found end
- # 1 - still must find end of quote whose target is $quote_character
- # 2 - still looking for end of first of two quotes
- #
- # Returns updated strings:
- # $quoted_string_1 = quoted string seen while in_quote=1
- # $quoted_string_2 = quoted string seen while in_quote=2
my (
$self,
) = @_;
+ # Follow (or continue following) quoted string(s)
+ # $in_quote = return code:
+ # 0 - ok, found end
+ # 1 - still must find end of quote whose target is $quote_character
+ # 2 - still looking for end of first of two quotes
+ #
+ # Returns updated strings:
+ # $quoted_string_1 = quoted string seen while in_quote=1
+ # $quoted_string_2 = quoted string seen while in_quote=2
+
my $quoted_string;
if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
my $ibeg = $i;
sub follow_quoted_string {
- # scan for a specific token, skipping escaped characters
- # if the quote character is blank, use the first non-blank character
- # input parameters:
- # $rtokens = reference to the array of tokens
- # $i = the token index of the first character to search
- # $in_quote = number of quoted strings being followed
- # $beginning_tok = the starting quote character
- # $quote_pos = index to check next for alphanumeric delimiter
- # output parameters:
- # $i = the token index of the ending quote character
- # $in_quote = decremented if found end, unchanged if not
- # $beginning_tok = the starting quote character
- # $quote_pos = index to check next for alphanumeric delimiter
- # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
- # $quoted_string = the text of the quote (without quotation tokens)
my (
$self,
) = @_;
+ # Scan for a specific token, skipping escaped characters.
+ # If the quote character is blank, use the first non-blank character.
+ # Given:
+ # $rtokens = reference to the array of tokens
+ # $i = the token index of the first character to search
+ # $in_quote = number of quoted strings being followed
+ # $beginning_tok = the starting quote character
+ # $quote_pos = index to check next for alphanumeric delimiter
+ # Return:
+ # $i = the token index of the ending quote character
+ # $in_quote = decremented if found end, unchanged if not
+ # $beginning_tok = the starting quote character
+ # $quote_pos = index to check next for alphanumeric delimiter
+ # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
+ # $quoted_string = the text of the quote (without quotation tokens)
my ( $tok, $end_tok );
my $i = $i_beg - 1;
my $quoted_string = EMPTY_STRING;
sub indicate_error {
my ( $self, $msg, $line_number, $input_line, $pos, $caret ) = @_;
+
+ # write input line and line with carat's showing where error was detected
$self->interrupt_logfile();
$self->warning($msg);
$self->write_error_indicator_pair( $line_number, $input_line, $pos,
sub make_numbered_line {
- # Given an input line, its line number, and a character position of
- # interest, create a string not longer than 80 characters of the form
+ my ( $lineno, $str, $pos ) = @_;
+
+ # Given:
+ # $lineno=line number
+ # $str = an input line
+ # $pos = character position of interest
+ # Create a string not longer than 80 characters of the form:
# $lineno: sub_string
- # such that the sub_string of $str contains the position of interest
+ # such that the sub_string of $str contains the position of interest
#
# Here is an example of what we want, in this case we add trailing
# '...' because the line is long.
# - $underline = a blank 'underline' which is all spaces with the same
# number of characters as the numbered line.
- my ( $lineno, $str, $pos ) = @_;
my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
my $excess = length($str) - $offset - 68;
my $numc = ( $excess > 0 ) ? 68 : undef;
sub write_on_underline {
+ my ( $underline, $pos, $pos_chr ) = @_;
+
# The "underline" is a string that shows where an error is; it starts
# out as a string of blanks with the same length as the numbered line of
# code above it, and we have to add marking to show where an error is.
# This is a trivial thing to do with substr, but there is some
# checking to do.
- my ( $underline, $pos, $pos_chr ) = @_;
-
# check for error..shouldn't happen
if ( $pos < 0 || $pos > length($underline) ) {
return $underline;
sub show_tokens {
- # this is an old debug routine
- # not called, but saved for reference
+ # This is an uncalled debug routine, saved for reference
my ( $rtokens, $rtoken_map ) = @_;
my $num = scalar( @{$rtokens} );
$pkg reports VERSION='$VERSION'.
==============================================================================
EOM
-
- # We shouldn't get here, but this return is to keep Perl-Critic from
- # complaining.
- return;
+ croak "unexpected return from sub Die";
} ## end sub Fault
my %valid_LINE_keys;
sub initialize_for_new_group {
my ($self) = @_;
+ # initialize for a new group of lines to be aligned vertically
+
$self->[_rgroup_lines_] = [];
$self->[_group_type_] = EMPTY_STRING;
$self->[_zero_count_] = 0;
sub join_hanging_comment {
+ my ( $new_line, $old_line ) = @_;
+
# Add dummy fields to a hanging side comment to make it look
# like the first line in its potential group. This simplifies
# the coding.
- my ( $new_line, $old_line ) = @_;
+
+ # Given:
+ # $new_line = ref to hash of the line to be possibly changed
+ # $old_line = ref to hash of the previous reference line
+ # Return:
+ # true if new line modified
+ # false otherwise
my $jmax = $new_line->{'jmax'};
my $line = shift;
+ # Given:
+ # $line = ref to hash of values for a line
+ # Task:
+ # Set 'list_type' property
+
# A list will be taken to be a line with a forced break in which all
# of the field separators are commas or comma-arrows (except for the
# trailing #)
# $prev_line = the line just before $new_line
# $group_line_count = number of lines in the current group
- # returns a flag and a value as follows:
+ # Returns: a flag and a value as follows:
# return (0, $imax_align) if the line does not match
# return (1, $imax_align) if the line matches but does not fit
# return (2, $imax_align) if the line matches and fits
use constant MATCH_NO_FIT => 1;
use constant MATCH_AND_FIT => 2;
+ # Return value '$return_value' describes the match with 3 possible values
my $return_value;
- # Returns '$imax_align' which is the index of the maximum matching token.
+ # Return value '$imax_align' is the index of the maximum matching token.
# It will be used in the subsequent left-to-right sweep to align as many
# tokens as possible for lines which partially match.
my $imax_align = -1;
# The new line has alignments identical to the current group. Now we have
# to fit the new line into the group without causing a field to exceed the
# line length limit.
- # return true if successful
- # return false if not successful
+
+ # Given:
+ # $new_line = ref to hash of the new line values
+ # $old_line = ref to hash of the previous line values
+ # Returns:
+ # true if the new line alignments fit the old line
+ # false otherwise
my $jmax = $new_line->{'jmax'};
my $leading_space_count = $new_line->{'leading_space_count'};
my ($new_line) = @_;
+ # Given:
+ # $new_line = ref to hash of a line starting a new group
+ # Task:
+ # setup alignment fields for this line
+
my $jmax = $new_line->{'jmax'};
my $rfield_lengths = $new_line->{'rfield_lengths'};
my $col = $new_line->{'leading_space_count'};
sub level_change {
+ my ( $self, $leading_space_count, $diff, $level ) = @_;
+
# compute decrease in level when we remove $diff spaces from the
# leading spaces
- my ( $self, $leading_space_count, $diff, $level ) = @_;
+
+ # Given:
+ # $leading_space_count = current leading line spaces
+ # $diff = number of spaces to remove
+ # $level = current indentation level
+ # Return:
+ # $level = updated level accounting for the loss of spaces
if ($rOpts_indent_columns) {
my $olev =
sub sweep_top_down {
my ( $self, $rlines, $group_level ) = @_;
+ # This is the first of two major sweeps to find alignments.
+ # The other is sweep_left_to_right.
+
+ # Given:
+ # $rlines = ref to hash of lines in this main alignment group
+ # $group_level = common indentation level of these lines
+ # Return:
+ # $rgroups = ref to hash of subgroups created
+
# Partition the set of lines into final alignment subgroups
# and store the alignments with the lines.
my ( $line_m, $line, $imax_min ) = @_;
+ # Decide if two adjacent, isolated lines should be aligned
+
# Given:
- # two isolated (list) lines
+ # $line_m, $line = two isolated (list) lines
# imax_min = number of common alignment tokens
# Return:
# $pad_max = maximum suggested pad distance
# = 0 if alignment not recommended
- # Note that this is only for two lines which do not have alignment tokens
- # in common with any other lines. It is intended for lists, but it might
- # also be used for two non-list lines with a common leading '='.
# Allow alignment if the difference in the two unpadded line lengths
# is not more than either line length. The idea is to avoid
# 'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
# 1, 0, 0, 0, undef, 0, 0
# ];
+
+ # Note that this is only for two lines which do not have alignment tokens
+ # in common with any other lines. It is intended for lists, but it might
+ # also be used for two non-list lines with a common leading '='.
+
my $rfield_lengths = $line->{'rfield_lengths'};
my $rfield_lengths_m = $line_m->{'rfield_lengths'};
my ( $rlines, $rgroups, $group_level ) = @_;
+ # This is the second of two major sweeps to find alignments.
+ # The other is sweep_top_down.
+
+ # Given:
+ # $rlines = ref to hash of lines in this main alignment group
+ # $rgroups = ref to hash of subgroups
+ # $group_level = common indentation level of these lines
+ # Task:
+ # add leading alignments where possible
+
# So far we have divided the lines into groups having an equal number of
# identical alignments. Here we are going to look for common leading
# alignments between the different groups and align them when possible.
my ( $line_obj, $ridel ) = @_;
- # $line_obj is the line to be modified
- # $ridel is a ref to list of indexes to be deleted
+ # Given:
+ # $line_obj = the line to be modified
+ # $ridel = a ref to list of indexes to be deleted
- # remove an unused alignment token(s) to improve alignment chances
+ # remove unused alignment token(s) to improve alignment chances
return if ( !defined($line_obj) || !defined($ridel) || !@{$ridel} );
sub decode_alignment_token {
+ my ($tok) = @_;
+
# Unpack the values packed in an alignment token
+
+ # Given:
+ # $tok = an alignment token
+ # Returns:
+ # ( $raw_tok, $lev, $tag, $tok_count )
#
# Usage:
# my ( $raw_tok, $lev, $tag, $tok_count ) =
# $nport = $port = shift || $name;
# The first '=' may either be '=0' or '=0.1' [level 0, first equals]
# The second '=' will be '=0.2' [level 0, second equals]
- my ($tok) = @_;
if ( defined( $decoded_token{$tok} ) ) {
return @{ $decoded_token{$tok} };
sub delete_unmatched_tokens {
my ( $rlines, $group_level ) = @_;
- # This is a important first step in vertical alignment in which
- # we remove as many obviously un-needed alignment tokens as possible.
+ # Remove as many obviously un-needed alignment tokens as possible.
# This will prevent them from interfering with the final alignment.
- # Returns:
+ # Given:
+ # $rlines = ref to hash of all lines in this alignment group
+ # $group_level = their comment indentation level
+
+ # Return:
my $max_lev_diff = 0; # used to avoid a call to prune_tree
my $saw_side_comment = 0; # used to avoid a call for side comments
my $saw_signed_number = 0; # used to avoid a call for -vsn
my ( $group_level, $rnew_lines, $saw_side_comment ) = @_;
- #------------------------------------------------------------
- # Loop to create a hash of alignment token info for each line
- #------------------------------------------------------------
+ # Create a hash of alignment token info for each line
+ # This info will be used to find common alignments
+
+ # Given:
+ # $group_level = common indentation level
+ # $rnew_lines = ref to hash of line info
+ # $saw_side_comment = true if there is a side comment
+ # Return:
+ # $rline_hashes = ref to hash with new line vars
+ # \@equals_info = ref to array with info on any '=' tokens
+ # $saw_side_comment = updated side comment flag
+ # $max_lev_diff = maximum level change seen
+
+ #----------------
+ # Loop over lines
+ #----------------
my $rline_hashes = [];
my @equals_info;
my @line_info; # no longer used
my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
# Compare each pair of lines and save information about common matches
- # $rlines = list of lines including hanging side comments
- # $rnew_lines = list of lines without any hanging side comments
- # $rsubgroups = list of subgroups of the new lines
+
+ # Given:
+ # $rlines = list of lines including hanging side comments
+ # $rnew_lines = list of lines without any hanging side comments
+ # $rsubgroups = list of subgroups of the new lines
+ # Return:
+ # $saw_signed_number = true if a field has a signed number
+ # (needed for --valign-signed-numbers)
# TODO:
# Maybe change: imax_pair => pair_match_info = ref to array
my $pat_m = $rcall_hash->{pat_m};
my $pad = $rcall_hash->{pad};
- # helper routine for sub match_line_pairs to decide if patterns in two
- # lines match well enough..Given
+ # This is a helper routine for sub match_line_pairs to decide if patterns
+ # in two lines match well enough
+ # Given:
# $tok_m, $pat_m = token and pattern of first line
# $tok, $pat = token and pattern of second line
# $pad = 0 if no padding is needed, !=0 otherwise
- # return code:
+ # Return code:
# 0 = patterns match, continue
# 1 = no match
# 2 = no match, and lines do not match at all
sub fat_comma_to_comma {
my ($str) = @_;
- # We are changing '=>' to ',' and removing any trailing decimal count
- # because currently fat commas have a count and commas do not.
- # For example, we will change '=>2+{-3.2' into ',2+{-3'
+ # Given:
+ # $str = a decorated fat comma alignment token
+
+ # Change '=>' to ','
+ # and remove any trailing decimal count because currently fat commas have a
+ # count and commas do not.
+
+ # For example, change '=>2+{-3.2' into ',2+{-3'
if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
return $str;
} ## end sub fat_comma_to_comma
sub get_line_token_info {
- # scan lines of tokens and return summary information about the range of
- # levels and patterns.
my ($rlines) = @_;
+ # Given:
+ # $rlines = ref to array of lines in this group
+
+ # Scan lines of tokens and return summary information about the range of
+ # levels and patterns.
+
# First scan to check monotonicity. Here is an example of several
# lines which are monotonic. The = is the lowest level, and
# the commas are all one level deeper. So this is not nonmonotonic.
sub prune_alignment_tree {
my ($rlines) = @_;
+
+ # Given:
+ # $rlines = ref to array of lines in this group
+
+ # Prune the tree of alignments to limit depth of alignments
+
my $jmax = @{$rlines} - 1;
return if ( $jmax <= 0 );
# a previous side comment should be forgotten. This involves
# checking several rules.
- # Return true to KEEP old comment location
- # Return false to FORGET old comment location
+ # Given:
+ # $line = ref to info hash for the line of interest
+ # $line_number = number of this line in the output stream
+ # $level = indentation level of this line
+ # $num5 = ..see comments below
+
+ # Return:
+ # true to KEEP old comment location
+ # false to FORGET old comment location
my $KEEP = 1;
my $FORGET = 0;
sub combine_fields {
+ my ( $line_0, $line_1, $imax_align ) = @_;
+
+ # Given:
+ # $line_0, $line_1 = two adjacent lines
+ # $imax_align = index of last alignment wanted
+
+ # Task:
# We have a group of two lines for which we do not want to align tokens
# between index $imax_align and the side comment. So we will delete fields
# between $imax_align and the side comment. Alignments have already
# been set so we have to adjust them.
- my ( $line_0, $line_1, $imax_align ) = @_;
-
if ( !defined($imax_align) ) { $imax_align = -1 }
# First delete the unwanted tokens
my ( $self, $rinput, $leading_string, $leading_string_length ) = @_;
+ # handle a cached line ..
+ # either append the current line to it or write it out
+
# The cached line will either be:
# - passed along to step_C, or
# - or combined with the current line