From f369ade216f8dea16a04eeca66f63dc18c37a543 Mon Sep 17 00:00:00 2001 From: Steve Hancock Date: Thu, 24 Sep 2020 06:53:56 -0700 Subject: [PATCH] update comments --- lib/Perl/Tidy/Formatter.pm | 165 +++++++++++++++++++++++++++++-------- 1 file changed, 130 insertions(+), 35 deletions(-) diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index 014fc63f..54d68a81 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -860,6 +860,8 @@ sub prepare_for_next_batch { sub keyword_group_scan { my $self = shift; + # Called once per file to process the --keyword-group-blanks-* parameters. + # Manipulate blank lines around keyword groups (kgb* flags) # Scan all lines looking for runs of consecutive lines beginning with # selected keywords. Example keywords are 'my', 'our', 'local', ... but @@ -1348,11 +1350,13 @@ EOM # end of loop over all lines $end_group->(); return $rhash_of_desires; -} + +} ## end sub keyword_group_scan sub process_all_lines { - # Loop over old lines to set new line break points + # Main loop over all lines of a file. + # Lines are processed according to type. my $self = shift; my $rlines = $self->[_rlines_]; @@ -1513,10 +1517,13 @@ sub process_all_lines { } } return; -} + +} ## end sub process_all_lines { ## begin closure check_line_hashes + # This code checks that no autovivification occurs in the 'line' hash + my %valid_line_hash; BEGIN { @@ -1562,8 +1569,13 @@ sub process_all_lines { sub write_line { - # We are caching tokenized lines as they arrive and converting them to the - # format needed for the final formatting. + # This routine originally received lines of code and immediately processed + # them. That was efficient when memory was limited, but now it just saves + # the lines it receives. They get processed all together after the last + # line is received. + + # As tokenized lines are received they are converted to the format needed + # for the final formatting. my ( $self, $line_of_tokens_old ) = @_; my $rLL = $self->[_rLL_]; my $Klimit = $self->[_Klimit_]; @@ -1674,8 +1686,8 @@ sub write_line { sub initialize_whitespace_hashes { - # initialize these global hashes, which control the use of - # whitespace around tokens: + # This is called once before formatting begins to initialize these global + # hashes, which control the use of whitespace around tokens: # # %binary_ws_rules # %want_left_space @@ -1806,15 +1818,16 @@ sub initialize_whitespace_hashes { sub set_whitespace_flags { - # This routine examines each pair of nonblank tokens and - # sets a flag indicating if white space is needed. + # This routine is called once per file to set whitespace flags for that + # file. This routine examines each pair of nonblank tokens and sets a flag + # indicating if white space is needed. # - # $rwhitespace_flags->[$j] is a flag indicating whether a white space - # BEFORE token $j is needed, with the following values: + # $rwhitespace_flags->[$j] is a flag indicating whether a white space + # BEFORE token $j is needed, with the following values: # - # WS_NO = -1 do not want a space before token $j + # WS_NO = -1 do not want a space BEFORE token $j # WS_OPTIONAL= 0 optional space or $j is a whitespace - # WS_YES = 1 want a space before token $j + # WS_YES = 1 want a space BEFORE token $j # my $self = shift; @@ -2339,6 +2352,9 @@ sub respace_tokens { my $self = shift; return if $rOpts->{'indent-only'}; + # This routine is called once per file to do as much formatting as possible + # before new line breaks are set. + # This routine makes all necessary and possible changes to the tokenization # after the initial tokenization of the file. This is a tedious routine, # but basically it consists of inserting and deleting whitespace between @@ -3218,6 +3234,11 @@ sub respace_tokens { { ## begin closure scan_comments + # This routine is called once per file at the start of processing to + # make a pass through the lines, looking at lines of CODE and identifying + # special processing needs, such format skipping sections marked by + # special comments. + my $Last_line_had_side_comment; my $In_format_skipping_section; my $Saw_VERSION_in_this_file; @@ -3439,6 +3460,10 @@ sub respace_tokens { sub find_nested_pairs { my $self = shift; + # This routine is called once per file to do preliminary work needed for + # the --weld-nested option. This information is also needed for adding + # semicolons. + my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); @@ -3585,6 +3610,8 @@ sub Debug_dump_tokens { } sub get_old_line_index { + + # return index of the original line that token K was on my ( $self, $K ) = @_; my $rLL = $self->[_rLL_]; return 0 unless defined($K); @@ -3592,6 +3619,8 @@ sub get_old_line_index { } sub get_old_line_count { + + # return number of input lines separating two tokens my ( $self, $Kbeg, $Kend ) = @_; my $rLL = $self->[_rLL_]; return 0 unless defined($Kbeg); @@ -3840,7 +3869,8 @@ sub mark_short_nested_blocks { sub weld_containers { - # do any welding operations + # Called once per file to do any welding operations requested by --weld* + # flags. my ($self) = @_; return if ( $rOpts->{'indent-only'} ); @@ -3892,6 +3922,8 @@ sub cumulative_length_after_K { sub weld_cuddled_blocks { my ($self) = @_; + # Called once per file to handle cuddled formatting + my $rweld_len_right_closing = $self->[_rweld_len_right_closing_]; # This routine implements the -cb flag by finding the appropriate @@ -4061,6 +4093,8 @@ sub weld_cuddled_blocks { sub weld_nested_containers { my ($self) = @_; + # Called once per file for option '--weld-nested-containers' + my $rweld_len_left_closing = $self->[_rweld_len_left_closing_]; my $rweld_len_left_opening = $self->[_rweld_len_left_opening_]; my $rweld_len_right_closing = $self->[_rweld_len_right_closing_]; @@ -4396,6 +4430,10 @@ EOM } sub weld_nested_quotes { + + # Called once per file for option '--weld-nested-containers'. This + # does welding on qw quotes. + my $self = shift; my $rweld_len_left_closing = $self->[_rweld_len_left_closing_]; @@ -4585,6 +4623,7 @@ sub adjust_indentation_levels { my ($self) = @_; + # Called once per file to do special indentation adjustments. # These routines adjust levels either by changing _CI_LEVEL_ directly or # by setting modified levels in the array $self->[_radjusted_levels_]. # They will create this array if they are active, and otherwise it will be @@ -4615,6 +4654,7 @@ sub initialize_adjusted_levels { my ($self) = @_; # Initialize _radjusted_levels if it has not yet been initialized. + # It is only needed when certain special adjustments are done. my $radjusted_levels = $self->[_radjusted_levels_]; my $rLL = $self->[_rLL_]; my $Kmax = @{$rLL} - 1; @@ -4639,6 +4679,7 @@ sub clip_adjusted_levels { sub non_indenting_braces { + # Called once per file to handle the --non-indenting-braces parameter. # Remove indentation within marked braces if requested # NOTE: This must be the first routine to reference $radjusted_levels; my ($self) = @_; @@ -4708,7 +4749,9 @@ sub non_indenting_braces { sub whitespace_cycle_adjustment { my $self = shift; - my $rLL = $self->[_rLL_]; + + # Called once per file to implement the --whitespace-cycle option + my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); my $radjusted_levels = $self->[_radjusted_levels_]; @@ -4774,13 +4817,14 @@ sub whitespace_cycle_adjustment { sub adjust_container_indentation { - # adjust continuation indentation for certain tokens if requested - # by these flags: + # Called once per file to implement the -bbhb* and related flags: # -bbhbi=n # -bbsbi=n # -bbpi=n + # where: + # n=0 default indentation (usually one ci) # n=1 outdent one ci # n=2 indent one level (minus one ci) @@ -4868,7 +4912,8 @@ sub adjust_container_indentation { sub bli_adjustment { - # if -bli is set, adds one continuation indentation for certain braces + # Called once per file to implement the --brace-left-and-indent option. + # If -bli is set, adds one continuation indentation for certain braces my $self = shift; return unless ( $rOpts->{'brace-left-and-indent'} ); my $rLL = $self->[_rLL_]; @@ -5107,6 +5152,9 @@ sub get_available_spaces_to_go { { ## begin closure set_leading_whitespace (for -lp indentation) + # These routines are called batch-by-batch to handle the -lp indentation + # option. The coding is rather complex, but is only for -lp. + my $gnu_position_predictor; my $gnu_sequence_number; my $line_start_index_to_go; @@ -5834,7 +5882,8 @@ sub excess_line_length { sub wrapup { - # flush buffer and write any informative messages + # This is the last routine called when a file is formatted. + # Flush buffer and write any informative messages my $self = shift; $self->flush(); @@ -5949,8 +5998,8 @@ sub wrapup { sub check_options { - # This routine is called to check the Opts hash after it is defined - # and to configure the control hashes to the selected run parameters. + # This routine is called to check the user-supplied run parameters + # and to configure the control hashes to them. $rOpts = shift; initialize_whitespace_hashes(); @@ -7161,7 +7210,6 @@ EOM || $typel eq 'J' && $typer eq 'J' ; # the value of this long logic sequence is the result we want -##if ($typel eq 'j') {print STDERR "typel=$typel typer=$typer result='$result'\n"} return $result; } } ## end closure is_essential_whitespace @@ -7385,6 +7433,9 @@ sub tight_paren_follows { } sub copy_token_as_type { + + # This provides a quick way to create a new token by + # slightly modifying an existing token. my ( $rold_token, $type, $token ) = @_; if ( $type eq 'b' ) { $token = " " unless defined($token); @@ -7416,6 +7467,14 @@ sub copy_token_as_type { { ## begin closure process_line_of_CODE + # The routines in this closure receive lines of code and combine them into + # 'batches' and send them along. A 'batch' is the unit of code which can be + # processed further as a unit. It has the property that it is the largest + # amount of code into which which perltidy is free to place one or more + # line breaks within it without violating any constraints. + + # When a new batch is formed it is sent to sub 'grind_batch_of_code'. + # flags needed by the store routine my $line_of_tokens; my $no_internal_newlines; @@ -8472,12 +8531,14 @@ sub consecutive_nonblank_lines { { ## begin closure grind_batch_of_CODE - # Keep track of consecutive nonblank lines so that we can insert occasional - # blanks + # The routines in this closure begin the processing of a 'batch' of code. + + # A variable to keep track of consecutive nonblank lines so that we can + # insert occasional blanks my @nonblank_lines_at_depth; - # remember maximum size of previous batches; this is needed by the logical - # padding routine + # A variable to remember maximum size of previous batches; this is needed + # by the logical padding routine my $peak_batch_size; my $batch_count; @@ -10085,7 +10146,8 @@ sub correct_lp_indentation { { ## begin closure accumulate_csc_text - # Variables related to forming closing side comments. +# These routines are called once per batch when the --closing-side-comments flag +# has been set. my %block_leading_text; my %block_opening_line_number; @@ -10470,6 +10532,8 @@ sub correct_lp_indentation { { ## begin closure balance_csc_text + # Some additional routines for handling the --closing-side-comments option + my %matching_char; BEGIN { @@ -10720,8 +10784,13 @@ sub add_closing_side_comment { return ( $closing_side_comment, $cscw_block_comment ); } -sub previous_nonblank_token { +sub make_paren_name { my ( $self, $i ) = @_; + + # The token at index $i is a '('. + # Create an alignment name for it to avoid incorrect alignments. + + # Start with the name of the previous nonblank token... my $name = ""; my $im = $i - 1; return "" if ( $im < 0 ); @@ -10729,7 +10798,7 @@ sub previous_nonblank_token { return "" if ( $im < 0 ); $name = $tokens_to_go[$im]; - # prepend any sub name to an isolated -> to avoid unwanted alignments + # Prepend any sub name to an isolated -> to avoid unwanted alignments # [test case is test8/penco.pl] if ( $name eq '->' ) { $im--; @@ -10737,6 +10806,9 @@ sub previous_nonblank_token { $name = $tokens_to_go[$im] . $name; } } + + # Finally, remove any leading arrows + $name =~ s/^->//; return $name; } @@ -11263,8 +11335,7 @@ sub send_lines_to_vertical_aligner { my $name = $tok; if ( $tok eq '(' ) { - $name = $self->previous_nonblank_token($i); - $name =~ s/^->//; + $name = $self->make_paren_name($i); } $container_name{$depth} = "+" . $name; @@ -13059,6 +13130,9 @@ sub get_seqno { { ## begin closure set_bond_strengths + # These routines and variables are involved in deciding where to break very + # long lines. + my %is_good_keyword_breakpoint; my %is_lt_gt_le_ge; @@ -13965,8 +14039,9 @@ sub get_seqno { sub pad_array_to_go { - # to simplify coding in scan_list and set_bond_strengths, it helps + # To simplify coding in scan_list and set_bond_strengths, it helps # to create some extra blank tokens at the end of the arrays + # FIXME: it would be nice to eliminate the need for this routine. my ($self) = @_; $tokens_to_go[ $max_index_to_go + 1 ] = ''; $tokens_to_go[ $max_index_to_go + 2 ] = ''; @@ -14001,6 +14076,9 @@ sub pad_array_to_go { { ## begin closure scan_list + # These routines and variables are involved in finding good + # places to break long lists. + my ( $block_type, $current_depth, $depth, $i, @@ -14273,6 +14351,8 @@ sub pad_array_to_go { # items can be vertically aligned. The output of this routine is # stored in the array @forced_breakpoint_to_go, which is used to set # final breakpoints. + + # It is called once per batch if the batch is a list. my $rOpts_break_at_old_attribute_breakpoints = $rOpts->{'break-at-old-attribute-breakpoints'}; my $rOpts_break_at_old_comma_breakpoints = @@ -16393,11 +16473,13 @@ sub set_nobreaks { "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; }; + ###################################################################### # NOTE: if we call set_closing_breakpoint below it will then call this # routing back. So there is the possibility of an infinite loop if a # programming error is made. As a precaution, I have added a check on # the forced_breakpoint flag, so that we won't keep trying to set it. # That will give additional protection against a loop. + ###################################################################### if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 && !$forced_breakpoint_to_go[$i_nonblank] ) @@ -16468,6 +16550,9 @@ sub set_nobreaks { { ## begin closure recombine_breakpoints + # This routine is called once per batch to see if it would be better + # to combine some of the lines into which the batch has been broken. + my %is_amp_amp; my %is_ternary; my %is_math_op; @@ -17925,6 +18010,10 @@ sub insert_breaks_before_list_opening_containers { my ( $self, $ri_left, $ri_right ) = @_; + # This routine is called once per batch to implement the parameters + # --break-before-hash-brace, etc. + + # Nothing to do if none of these parameters has been set return unless %break_before_container_types; my $nmax = @{$ri_right} - 1; @@ -18035,6 +18124,9 @@ sub insert_final_ternary_breaks { my ( $self, $ri_left, $ri_right ) = @_; + # Called once per batch to look for and do any final line breaks for + # long ternary chains + my $nmax = @{$ri_right} - 1; # scan the left and right end tokens of all lines @@ -18128,7 +18220,7 @@ sub in_same_container_i { # Check to see if tokens at K1 and K2 are in the same container, # and not separated by certain characters: => , ? : || or - # This version uses the newer $rLL data structure + # This version uses the newer $rLL data structure. my ( $self, $K1, $K2 ) = @_; if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) } @@ -18179,6 +18271,8 @@ sub in_same_container_i { sub set_continuation_breaks { + # Called once per batch to set breaks in long lines. + # Define an array of indexes for inserting newline characters to # keep the line lengths below the maximum desired length. There is # an implied break after the last token, so it need not be included. @@ -18872,9 +18966,10 @@ sub insert_additional_breaks { sub compare_indentation_levels { - # check to see if output line tabbing agrees with input line + # Check to see if output line tabbing agrees with input line # this can be very useful for debugging a script which has an extra - # or missing brace + # or missing brace. + my ( $self, $guessed_indentation_level, $structural_indentation_level, $line_number ) = @_; -- 2.39.5