sub keyword_group_scan {
my $self = shift;
+ # Called once per file to process the --keyword-group-blanks-* parameters.
+
# Manipulate blank lines around keyword groups (kgb* flags)
# Scan all lines looking for runs of consecutive lines beginning with
# selected keywords. Example keywords are 'my', 'our', 'local', ... but
# end of loop over all lines
$end_group->();
return $rhash_of_desires;
-}
+
+} ## end sub keyword_group_scan
sub process_all_lines {
- # Loop over old lines to set new line break points
+ # Main loop over all lines of a file.
+ # Lines are processed according to type.
my $self = shift;
my $rlines = $self->[_rlines_];
}
}
return;
-}
+
+} ## end sub process_all_lines
{ ## begin closure check_line_hashes
+ # This code checks that no autovivification occurs in the 'line' hash
+
my %valid_line_hash;
BEGIN {
sub write_line {
- # We are caching tokenized lines as they arrive and converting them to the
- # format needed for the final formatting.
+ # This routine originally received lines of code and immediately processed
+ # them. That was efficient when memory was limited, but now it just saves
+ # the lines it receives. They get processed all together after the last
+ # line is received.
+
+ # As tokenized lines are received they are converted to the format needed
+ # for the final formatting.
my ( $self, $line_of_tokens_old ) = @_;
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
sub initialize_whitespace_hashes {
- # initialize these global hashes, which control the use of
- # whitespace around tokens:
+ # This is called once before formatting begins to initialize these global
+ # hashes, which control the use of whitespace around tokens:
#
# %binary_ws_rules
# %want_left_space
sub set_whitespace_flags {
- # This routine examines each pair of nonblank tokens and
- # sets a flag indicating if white space is needed.
+ # This routine is called once per file to set whitespace flags for that
+ # file. This routine examines each pair of nonblank tokens and sets a flag
+ # indicating if white space is needed.
#
- # $rwhitespace_flags->[$j] is a flag indicating whether a white space
- # BEFORE token $j is needed, with the following values:
+ # $rwhitespace_flags->[$j] is a flag indicating whether a white space
+ # BEFORE token $j is needed, with the following values:
#
- # WS_NO = -1 do not want a space before token $j
+ # WS_NO = -1 do not want a space BEFORE token $j
# WS_OPTIONAL= 0 optional space or $j is a whitespace
- # WS_YES = 1 want a space before token $j
+ # WS_YES = 1 want a space BEFORE token $j
#
my $self = shift;
my $self = shift;
return if $rOpts->{'indent-only'};
+ # This routine is called once per file to do as much formatting as possible
+ # before new line breaks are set.
+
# This routine makes all necessary and possible changes to the tokenization
# after the initial tokenization of the file. This is a tedious routine,
# but basically it consists of inserting and deleting whitespace between
{ ## begin closure scan_comments
+ # This routine is called once per file at the start of processing to
+ # make a pass through the lines, looking at lines of CODE and identifying
+ # special processing needs, such format skipping sections marked by
+ # special comments.
+
my $Last_line_had_side_comment;
my $In_format_skipping_section;
my $Saw_VERSION_in_this_file;
sub find_nested_pairs {
my $self = shift;
+ # This routine is called once per file to do preliminary work needed for
+ # the --weld-nested option. This information is also needed for adding
+ # semicolons.
+
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
}
sub get_old_line_index {
+
+ # return index of the original line that token K was on
my ( $self, $K ) = @_;
my $rLL = $self->[_rLL_];
return 0 unless defined($K);
}
sub get_old_line_count {
+
+ # return number of input lines separating two tokens
my ( $self, $Kbeg, $Kend ) = @_;
my $rLL = $self->[_rLL_];
return 0 unless defined($Kbeg);
sub weld_containers {
- # do any welding operations
+ # Called once per file to do any welding operations requested by --weld*
+ # flags.
my ($self) = @_;
return if ( $rOpts->{'indent-only'} );
sub weld_cuddled_blocks {
my ($self) = @_;
+ # Called once per file to handle cuddled formatting
+
my $rweld_len_right_closing = $self->[_rweld_len_right_closing_];
# This routine implements the -cb flag by finding the appropriate
sub weld_nested_containers {
my ($self) = @_;
+ # Called once per file for option '--weld-nested-containers'
+
my $rweld_len_left_closing = $self->[_rweld_len_left_closing_];
my $rweld_len_left_opening = $self->[_rweld_len_left_opening_];
my $rweld_len_right_closing = $self->[_rweld_len_right_closing_];
}
sub weld_nested_quotes {
+
+ # Called once per file for option '--weld-nested-containers'. This
+ # does welding on qw quotes.
+
my $self = shift;
my $rweld_len_left_closing = $self->[_rweld_len_left_closing_];
my ($self) = @_;
+ # Called once per file to do special indentation adjustments.
# These routines adjust levels either by changing _CI_LEVEL_ directly or
# by setting modified levels in the array $self->[_radjusted_levels_].
# They will create this array if they are active, and otherwise it will be
my ($self) = @_;
# Initialize _radjusted_levels if it has not yet been initialized.
+ # It is only needed when certain special adjustments are done.
my $radjusted_levels = $self->[_radjusted_levels_];
my $rLL = $self->[_rLL_];
my $Kmax = @{$rLL} - 1;
sub non_indenting_braces {
+ # Called once per file to handle the --non-indenting-braces parameter.
# Remove indentation within marked braces if requested
# NOTE: This must be the first routine to reference $radjusted_levels;
my ($self) = @_;
sub whitespace_cycle_adjustment {
my $self = shift;
- my $rLL = $self->[_rLL_];
+
+ # Called once per file to implement the --whitespace-cycle option
+ my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $radjusted_levels = $self->[_radjusted_levels_];
sub adjust_container_indentation {
- # adjust continuation indentation for certain tokens if requested
- # by these flags:
+ # Called once per file to implement the -bbhb* and related flags:
# -bbhbi=n
# -bbsbi=n
# -bbpi=n
+ # where:
+
# n=0 default indentation (usually one ci)
# n=1 outdent one ci
# n=2 indent one level (minus one ci)
sub bli_adjustment {
- # if -bli is set, adds one continuation indentation for certain braces
+ # Called once per file to implement the --brace-left-and-indent option.
+ # If -bli is set, adds one continuation indentation for certain braces
my $self = shift;
return unless ( $rOpts->{'brace-left-and-indent'} );
my $rLL = $self->[_rLL_];
{ ## begin closure set_leading_whitespace (for -lp indentation)
+ # These routines are called batch-by-batch to handle the -lp indentation
+ # option. The coding is rather complex, but is only for -lp.
+
my $gnu_position_predictor;
my $gnu_sequence_number;
my $line_start_index_to_go;
sub wrapup {
- # flush buffer and write any informative messages
+ # This is the last routine called when a file is formatted.
+ # Flush buffer and write any informative messages
my $self = shift;
$self->flush();
sub check_options {
- # This routine is called to check the Opts hash after it is defined
- # and to configure the control hashes to the selected run parameters.
+ # This routine is called to check the user-supplied run parameters
+ # and to configure the control hashes to them.
$rOpts = shift;
initialize_whitespace_hashes();
|| $typel eq 'J' && $typer eq 'J'
; # the value of this long logic sequence is the result we want
-##if ($typel eq 'j') {print STDERR "typel=$typel typer=$typer result='$result'\n"}
return $result;
}
} ## end closure is_essential_whitespace
}
sub copy_token_as_type {
+
+ # This provides a quick way to create a new token by
+ # slightly modifying an existing token.
my ( $rold_token, $type, $token ) = @_;
if ( $type eq 'b' ) {
$token = " " unless defined($token);
{ ## begin closure process_line_of_CODE
+ # The routines in this closure receive lines of code and combine them into
+ # 'batches' and send them along. A 'batch' is the unit of code which can be
+ # processed further as a unit. It has the property that it is the largest
+ # amount of code into which which perltidy is free to place one or more
+ # line breaks within it without violating any constraints.
+
+ # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
+
# flags needed by the store routine
my $line_of_tokens;
my $no_internal_newlines;
{ ## begin closure grind_batch_of_CODE
- # Keep track of consecutive nonblank lines so that we can insert occasional
- # blanks
+ # The routines in this closure begin the processing of a 'batch' of code.
+
+ # A variable to keep track of consecutive nonblank lines so that we can
+ # insert occasional blanks
my @nonblank_lines_at_depth;
- # remember maximum size of previous batches; this is needed by the logical
- # padding routine
+ # A variable to remember maximum size of previous batches; this is needed
+ # by the logical padding routine
my $peak_batch_size;
my $batch_count;
{ ## begin closure accumulate_csc_text
- # Variables related to forming closing side comments.
+# These routines are called once per batch when the --closing-side-comments flag
+# has been set.
my %block_leading_text;
my %block_opening_line_number;
{ ## begin closure balance_csc_text
+ # Some additional routines for handling the --closing-side-comments option
+
my %matching_char;
BEGIN {
return ( $closing_side_comment, $cscw_block_comment );
}
-sub previous_nonblank_token {
+sub make_paren_name {
my ( $self, $i ) = @_;
+
+ # The token at index $i is a '('.
+ # Create an alignment name for it to avoid incorrect alignments.
+
+ # Start with the name of the previous nonblank token...
my $name = "";
my $im = $i - 1;
return "" if ( $im < 0 );
return "" if ( $im < 0 );
$name = $tokens_to_go[$im];
- # prepend any sub name to an isolated -> to avoid unwanted alignments
+ # Prepend any sub name to an isolated -> to avoid unwanted alignments
# [test case is test8/penco.pl]
if ( $name eq '->' ) {
$im--;
$name = $tokens_to_go[$im] . $name;
}
}
+
+ # Finally, remove any leading arrows
+ $name =~ s/^->//;
return $name;
}
my $name = $tok;
if ( $tok eq '(' ) {
- $name = $self->previous_nonblank_token($i);
- $name =~ s/^->//;
+ $name = $self->make_paren_name($i);
}
$container_name{$depth} = "+" . $name;
{ ## begin closure set_bond_strengths
+ # These routines and variables are involved in deciding where to break very
+ # long lines.
+
my %is_good_keyword_breakpoint;
my %is_lt_gt_le_ge;
sub pad_array_to_go {
- # to simplify coding in scan_list and set_bond_strengths, it helps
+ # To simplify coding in scan_list and set_bond_strengths, it helps
# to create some extra blank tokens at the end of the arrays
+ # FIXME: it would be nice to eliminate the need for this routine.
my ($self) = @_;
$tokens_to_go[ $max_index_to_go + 1 ] = '';
$tokens_to_go[ $max_index_to_go + 2 ] = '';
{ ## begin closure scan_list
+ # These routines and variables are involved in finding good
+ # places to break long lists.
+
my (
$block_type, $current_depth,
$depth, $i,
# items can be vertically aligned. The output of this routine is
# stored in the array @forced_breakpoint_to_go, which is used to set
# final breakpoints.
+
+ # It is called once per batch if the batch is a list.
my $rOpts_break_at_old_attribute_breakpoints =
$rOpts->{'break-at-old-attribute-breakpoints'};
my $rOpts_break_at_old_comma_breakpoints =
"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
};
+ ######################################################################
# NOTE: if we call set_closing_breakpoint below it will then call this
# routing back. So there is the possibility of an infinite loop if a
# programming error is made. As a precaution, I have added a check on
# the forced_breakpoint flag, so that we won't keep trying to set it.
# That will give additional protection against a loop.
+ ######################################################################
if ( $i_nonblank >= 0
&& $nobreak_to_go[$i_nonblank] == 0
&& !$forced_breakpoint_to_go[$i_nonblank] )
{ ## begin closure recombine_breakpoints
+ # This routine is called once per batch to see if it would be better
+ # to combine some of the lines into which the batch has been broken.
+
my %is_amp_amp;
my %is_ternary;
my %is_math_op;
my ( $self, $ri_left, $ri_right ) = @_;
+ # This routine is called once per batch to implement the parameters
+ # --break-before-hash-brace, etc.
+
+ # Nothing to do if none of these parameters has been set
return unless %break_before_container_types;
my $nmax = @{$ri_right} - 1;
my ( $self, $ri_left, $ri_right ) = @_;
+ # Called once per batch to look for and do any final line breaks for
+ # long ternary chains
+
my $nmax = @{$ri_right} - 1;
# scan the left and right end tokens of all lines
# Check to see if tokens at K1 and K2 are in the same container,
# and not separated by certain characters: => , ? : || or
- # This version uses the newer $rLL data structure
+ # This version uses the newer $rLL data structure.
my ( $self, $K1, $K2 ) = @_;
if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) }
sub set_continuation_breaks {
+ # Called once per batch to set breaks in long lines.
+
# Define an array of indexes for inserting newline characters to
# keep the line lengths below the maximum desired length. There is
# an implied break after the last token, so it need not be included.
sub compare_indentation_levels {
- # check to see if output line tabbing agrees with input line
+ # Check to see if output line tabbing agrees with input line
# this can be very useful for debugging a script which has an extra
- # or missing brace
+ # or missing brace.
+
my ( $self, $guessed_indentation_level, $structural_indentation_level,
$line_number )
= @_;