From: Steve Hancock Date: Sat, 16 Nov 2024 17:13:20 +0000 (-0800) Subject: update comments X-Git-Tag: 20240903.07~19 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=60db5adf11cdba36f46a6766f14c90a5c87378c0;p=perltidy.git update comments --- diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index 87aa0b88..59b9568f 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -145,10 +145,7 @@ This is probably an error introduced by a recent programming change. $pkg reports VERSION='$VERSION'. ============================================================================== EOM - - # We shouldn't get here, but this return is to keep Perl-Critic from - # complaining. - return; + croak "unexpected return from sub Die"; } ## end sub Fault sub Fault_Warn { @@ -1698,9 +1695,10 @@ sub K_first_code { $rLL = $self->[_rLL_] unless ( defined($rLL) ); return unless @{$rLL}; - my $type = $rLL->[0]->[_TYPE_]; - if ( $type ne 'b' && $type ne '#' ) { return 0 } - return $self->K_next_code(0); + my $KK = 0; + my $type = $rLL->[$KK]->[_TYPE_]; + if ( $type ne 'b' && $type ne '#' ) { return $KK } + return $self->K_next_code($KK); } ## end sub K_first_code sub K_last_code { @@ -4090,9 +4088,11 @@ my %closing_container_inside_ws; sub set_whitespace_flags { + my $self = shift; + # This routine is called once per file to set whitespace flags for that # file. This routine examines each pair of nonblank tokens and sets a flag - # indicating if white space is needed. + # indicating if they should be separated by white space. # # $rwhitespace_flags->[$j] is a flag indicating whether a white space # BEFORE token $j is needed, with the following values: @@ -4102,8 +4102,6 @@ sub set_whitespace_flags { # WS_YES = 1 want a space BEFORE token $j # - my $self = shift; - my $j_tight_closing_paren = -1; my $rLL = $self->[_rLL_]; my $K_closing_container = $self->[_K_closing_container_]; @@ -4732,6 +4730,10 @@ sub set_container_ws_by_keyword { $closing_container_inside_ws{$sequence_number} = $ws_flag; } } + else { + DEVEL_MODE + && Fault("unexpected token='$word' and seqno='$sequence_number'\n"); + } return; } ## end sub set_container_ws_by_keyword @@ -4953,6 +4955,8 @@ EOM sub is_essential_whitespace { + my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_; + # Essential whitespace means whitespace which cannot be safely deleted # without risking the introduction of a syntax error. @@ -4973,8 +4977,6 @@ EOM # to use nytprof to profile with both old and revised coding using the # -mangle option and check differences. - my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_; - # This is potentially a very slow routine but the following quick # filters typically catch and handle over 90% of the calls. @@ -5664,9 +5666,10 @@ EOM # Added for c140 to make 'w ->' and 'i ->' behave the same $binary_bond_strength{'w'}{'->'} = 1.45 * STRONG; - # Note that the following alternative strength would make the break at the - # '->' rather than opening the '('. Both have advantages and disadvantages. - # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; # + # Note that the following alternative strength would make the break at + # the '->' rather than opening the '('. Both have advantages and + # disadvantages. + # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; # $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; @@ -5800,10 +5803,8 @@ EOM my ($self) = @_; - #----------------------------------------------------------------- # Define a 'bond strength' for each token pair in an output batch. # See comments above for definition of bond strength. - #----------------------------------------------------------------- my $rbond_strength_to_go = []; @@ -6356,10 +6357,7 @@ sub bad_pattern { sub prepare_cuddled_block_types { - # the cuddled-else style, if used, is controlled by a hash that - # we construct here - - # Include keywords here which should not be cuddled + # Construct a hash needed by the cuddled-else style my $cuddled_string = EMPTY_STRING; if ( $rOpts->{'cuddled-else'} ) { @@ -6376,7 +6374,6 @@ sub bad_pattern { if ($cuddled_block_list) { $cuddled_string .= SPACE . $cuddled_block_list; } - } # If we have a cuddled string of the form @@ -7187,20 +7184,24 @@ EOM sub write_line { + my ( $self, $line_of_tokens_input ) = @_; + # This routine receives lines one-by-one from the tokenizer and stores # them in a format suitable for further processing. After the last # line has been sent, the tokenizer will call sub 'finish_formatting' # to do the actual formatting. - my ( $self, $line_of_tokens_old ) = @_; + # Given: + # $line_of_tokens_input = hash ref of one line from the tokenizer my $rLL = $self->[_rLL_]; my $line_of_tokens = {}; # copy common hash key values - @{$line_of_tokens}{@common_keys} = @{$line_of_tokens_old}{@common_keys}; + @{$line_of_tokens}{@common_keys} = + @{$line_of_tokens_input}{@common_keys}; - my $line_type = $line_of_tokens_old->{_line_type}; + my $line_type = $line_of_tokens_input->{_line_type}; my $tee_output; my $Klimit = $self->[_Klimit_]; @@ -7221,7 +7222,7 @@ EOM # Handle line of code else { - my $rtokens = $line_of_tokens_old->{_rtokens}; + my $rtokens = $line_of_tokens_input->{_rtokens}; my $jmax = @{$rtokens} - 1; if ( $jmax >= 0 ) { @@ -7231,7 +7232,7 @@ EOM #---------------------------- # get the tokens on this line #---------------------------- - $self->write_line_inner_loop( $line_of_tokens_old, + $self->write_line_inner_loop( $line_of_tokens_input, $line_of_tokens ); # update Klimit for added tokens @@ -7271,12 +7272,12 @@ EOM if ($tee_output) { my $fh_tee = $self->[_fh_tee_]; - my $line_text = $line_of_tokens_old->{_line_text}; + my $line_text = $line_of_tokens_input->{_line_text}; $fh_tee->print($line_text) if ($fh_tee); } # We must use the old line because the qw logic may change this flag - $last_ending_in_quote = $line_of_tokens_old->{_ending_in_quote}; + $last_ending_in_quote = $line_of_tokens_input->{_ending_in_quote}; return; } ## end sub write_line @@ -7589,10 +7590,8 @@ EOM sub write_line_inner_loop { my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_; - #--------------------------------------------------------------------- # Copy the tokens on one line received from the tokenizer to their new # storage locations. - #--------------------------------------------------------------------- # Input parameters: # $line_of_tokens_old = line received from tokenizer @@ -7863,6 +7862,9 @@ sub finish_formatting { # The file has been tokenized and is ready to be formatted. # All of the relevant data is stored in $self, ready to go. + # Given: + # $severe_error = true if a severe error was encountered + # Returns: # true if input file was copied verbatim due to errors # false otherwise @@ -8023,13 +8025,13 @@ BEGIN { sub find_level_info { + my ($self) = @_; + # Find level ranges and total variations of all code blocks in this file. # Returns: # ref to hash with block info, with seqno as key (see below) - my ($self) = @_; - # The array _rSS_ has the complete container tree for this file. my $rSS = $self->[_rSS_]; @@ -10006,7 +10008,10 @@ sub has_complete_package { } # safety check - shouldn't happen - return unless ( $type eq 'P' ); + if ( $type ne 'P' ) { + DEVEL_MODE && Fault("Expecting type 'P' but found '$type'"); + return; + } my $level = $item->[_LEVEL_]; return unless ( $level == 0 ); @@ -10034,6 +10039,9 @@ sub is_complete_script { my ( $self, $rline_type_count, $rkeyword_count ) = @_; # Guess if we are formatting a complete script + # Given: + # $rline_type_count = hash ref of count of line types + # $rkeyword_count = hash ref of count of keywords # Return: true or false # Goal: help decide if we should skip certain warning checks when @@ -11680,7 +11688,12 @@ sub initialize_warn_hash { } ## end sub initialize_warn_hash sub make_excluded_name_hash { - my ($option_name) = @_; + my ($option_name) = @_; + + # Convert a list of words into a hash ref for an input option + # Given: + # $option_name = the name of an input option + # example: 'warn-variable-exclusion-list' my $rexcluded_name_hash = {}; my $excluded_names = $rOpts->{$option_name}; if ($excluded_names) { @@ -11786,6 +11799,7 @@ sub initialize_warn_variable_types { # Given: # $wvt_in_args = true if the -wvt parameter was on the command line # $num_files = number of files on the command line + # $line_range_clipped = true if only part of a file is being formatted my @all_opts = qw(r s p u c); $rwarn_variable_types = @@ -11821,11 +11835,15 @@ sub initialize_warn_variable_types { sub filter_excluded_names { + my ( $rwarnings, $rexcluded_name_hash ) = @_; + + # Remove warnings for variable names excluded by user request + # for an operation like --warn-variable-types + # Given: # $rwarnigns = ref to list of warning info hashes # $rexcluded_name_hash = ref to hash with excluded names # Return updated $rwarnings with excluded names removed - my ( $rwarnings, $rexcluded_name_hash ) = @_; if ( @{$rwarnings} && $rexcluded_name_hash ) { # Check for exact matches @@ -12468,8 +12486,9 @@ sub interbracket_arrow_check { sub delete_side_comments { my ( $self, $rix_side_comments ) = @_; - # Given a list of indexes of lines with side comments, handle any - # requested side comment deletions. + # Handle any requested side comment deletions. + # Given: + # $rix_side_comments = ref to list of indexes of lines with side comments my $rLL = $self->[_rLL_]; my $rlines = $self->[_rlines_]; @@ -12824,13 +12843,12 @@ sub respace_tokens { my $self = shift; - #-------------------------------------------------------------------------- # This routine is called once per file to do as much formatting as possible # before new line breaks are set. - #-------------------------------------------------------------------------- - # Return parameters: - # Set $severe_error=true if processing must terminate immediately + # Returns: + # $severe_error = true if processing must terminate immediately + # $rqw_lines = ref to list of lines with qw quotes (for -qwaf) my ( $severe_error, $rqw_lines ); # We do not change any spaces in --indent-only mode @@ -13038,10 +13056,14 @@ sub respace_tokens_inner_loop { my ( $self, $Kfirst, $Klast, $input_line_number ) = @_; - #----------------------------------------------------------------- # Loop to copy all tokens on one line, making any spacing changes, # while also collecting information needed by later subs. - #----------------------------------------------------------------- + + # Given: + # $Kfirst = index of first token on this line + # $Klast = index of last token on this line + # $input_line_number = number of this line in input stream + my $type; foreach my $KK ( $Kfirst .. $Klast ) { @@ -13923,13 +13945,12 @@ sub store_token { my ( $self, $item ) = @_; - #------------------------------------------ # Store one token during respace operations - #------------------------------------------ - # Optional input parameter: '$item' - # if defined => reference to a token to be stored - # otherwise => make and store a blank space + # Given: + # $item = + # if defined => reference to a token to be stored + # if not defined => make and store a blank space # NOTE: this sub is called once per token so coding efficiency is critical. @@ -15397,10 +15418,15 @@ sub store_new_token { sub check_Q { + my ( $self, $KK, $Kfirst, $line_number ) = @_; + # Check that a quote looks okay, and report possible problems # to the logfile. + # Given: + # $KK = index of the quote token + # $Kfirst = index of first token on the line + # $line_number = number of the line in the input stream - my ( $self, $KK, $Kfirst, $line_number ) = @_; my $token = $rLL->[$KK]->[_TOKEN_]; if ( $token =~ /\t/ ) { $self->note_embedded_tab($line_number); @@ -15638,10 +15664,11 @@ EOM sub package_info_maker { + my ( $self, $rK_package_list ) = @_; + # Create a hash of values which can be used to find the package of any # token. This sub must be called after rLL has been updated because it # calls parent_seqno_by_K. - my ( $self, $rK_package_list ) = @_; # Given: # @{$rK_package_list} = a simple list of token index K of each 'package' @@ -15875,7 +15902,7 @@ BEGIN { sub count_list_elements { my ( $self, $rarg_list ) = @_; - # Given: + # Given call arg hash containing: # $seqno_list = sequence number of a paren of list to be counted, or # $K_list_start = starting index of list (for 'return' lists) # $shift_count_min = starting min arg count items to include @@ -16575,7 +16602,7 @@ sub count_sub_input_args { # shouldn't happen: if ( !defined($K_sub) || $K_sub >= $K_opening_block ) { if ( !defined($K_sub) ) { $K_sub = 'undef' } - Fault("Bad K_sub=$K_sub, opening=$K_opening_block\n"); + DEVEL_MODE && Fault("Bad K_sub=$K_sub, opening=$K_opening_block\n"); return; } @@ -17234,6 +17261,10 @@ sub sub_def_info_maker { my ( $self, $rpackage_lookup_list, $rprelim_call_info ) = @_; + # Given: + # $rpackage_lookup_list = list with info for finding containing package + # $rprelim_call_info = hash ref with first try at call info + # Returns two hash references: # \%sub_info_by_seqno, # \%sub_seqno_by_key, @@ -17345,6 +17376,9 @@ sub update_sub_call_paren_info { my ( $self, $rpackage_lookup_list ) = @_; + # Given: + # $rpackage_lookup_list = list with info for finding containing package + # Update the hash of info about the call parameters with arg counts # and package. It contains the sequence number of each paren and # type of call, and we must add the arg count and package. @@ -18457,11 +18491,12 @@ EOM sub sort_warnings { + my ($rwarnings) = @_; + # Given: # $rwarnigns = ref to list of warning info hashes # Return updated $rwarnings # - Sorted by line number - my ($rwarnings) = @_; if ( @{$rwarnings} ) { # sort by line number @@ -18477,6 +18512,12 @@ sub sort_warnings { sub stringify_line_range { my ($rcalls) = @_; + + # Given: + # $rcalls = ref to list of call info + # Return: + # $string = single line of text with just the line range + my $string = EMPTY_STRING; if ( $rcalls && @{$rcalls} ) { my @sorted = @@ -18658,6 +18699,8 @@ EOM sub keep_old_line_breaks { + my ($self) = @_; + # Called once per file to find and mark any old line breaks which # should be kept. We will be translating the input hashes into # token indexes. @@ -18668,8 +18711,6 @@ sub keep_old_line_breaks { # = 2 make a soft break (keep building current batch) # best for something like leading -> - my ($self) = @_; - my $rLL = $self->[_rLL_]; my $rKrange_code_without_comments = $self->[_rKrange_code_without_comments_]; @@ -18748,9 +18789,10 @@ sub keep_old_line_breaks { sub weld_containers { + my ($self) = @_; + # Called once per file to do any welding operations requested by --weld* # flags. - my ($self) = @_; # This count is used to eliminate needless calls for weld checks elsewhere $total_weld_count = 0; @@ -18871,6 +18913,7 @@ EOM } ## end sub weld_containers sub weld_cuddled_blocks { + my ($self) = @_; # Called once per file to handle cuddled formatting @@ -19272,6 +19315,13 @@ sub find_nested_pairs { sub match_paren_control_flag { + my ( $self, $seqno, $flag, $rLL ) = @_; + + # Input parameters: + # $seqno = sequence number of the container (should be paren) + # $flag = the flag which defines what matches + # $rLL = an optional alternate token list needed for respace operations + # Decide if this paren is excluded by user request: # undef matches no parens # '*' matches all parens @@ -19283,12 +19333,7 @@ sub match_paren_control_flag { # 'F' matches if 'f' does not. # 'w' matches if either 'k' or 'f' match. # 'W' matches if 'w' does not. - my ( $self, $seqno, $flag, $rLL ) = @_; - # Input parameters: - # $seqno = sequence number of the container (should be paren) - # $flag = the flag which defines what matches - # $rLL = an optional alternate token list needed for respace operations $rLL = $self->[_rLL_] unless ( defined($rLL) ); return 0 unless ( defined($flag) ); @@ -19331,8 +19376,14 @@ EOM sub is_excluded_weld { - # decide if this weld is excluded by user request my ( $self, $KK, $is_leading ) = @_; + + # Decide if this weld is excluded by user request + + # Given: + # $KK = index of this weld token + # $is_leading = true if this will the outer token of a weld + my $rLL = $self->[_rLL_]; my $rtoken_vars = $rLL->[$KK]; my $token = $rtoken_vars->[_TOKEN_]; @@ -19364,13 +19415,14 @@ use constant DEBUG_WELD => 0; sub setup_new_weld_measurements { + my ( $self, $Kouter_opening, $Kinner_opening ) = @_; + # Define quantities to check for excess line lengths when welded. # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes' - my ( $self, $Kouter_opening, $Kinner_opening ) = @_; - - # Given indexes of outer and inner opening containers to be welded: - # $Kouter_opening, $Kinner_opening + # Given: + # ($Kouter_opening, $Kinner_opening) = indexes of outer and inner opening + # containers to be welded # Returns these variables: # $new_weld_ok = true (new weld ok) or false (do not start new weld) @@ -19589,6 +19641,7 @@ EOM } ## end sub setup_new_weld_measurements sub excess_line_length_for_Krange { + my ( $self, $Kfirst, $Klast ) = @_; # returns $excess_length = @@ -19628,6 +19681,7 @@ sub excess_line_length_for_Krange { } ## end sub excess_line_length_for_Krange sub weld_nested_containers { + my ($self) = @_; # Called once per file for option '--weld-nested-containers' @@ -20295,11 +20349,11 @@ EOM sub weld_nested_quotes { + my $self = shift; + # Called once per file for option '--weld-nested-containers'. This # does welding on qw quotes. - my $self = shift; - # See if quotes are excluded from welding my $rflags = $weld_nested_exclusion_rules{'q'}; return if ( defined($rflags) && defined( $rflags->[1] ) ); @@ -20518,9 +20572,11 @@ sub is_welded_at_seqno { my ( $self, $seqno ) = @_; - # given a sequence number: - # return true if it is welded either left or right - # return false otherwise + # Given: + # $seqno = a sequence number: + # Return: + # true if it is welded either left or right + # false otherwise return unless ( $total_weld_count && defined($seqno) ); my $KK_o = $self->[_K_opening_container_]->{$seqno}; return unless defined($KK_o); @@ -20530,6 +20586,8 @@ sub is_welded_at_seqno { sub mark_short_nested_blocks { + my $self = shift; + # This routine looks at the entire file and marks any short nested blocks # which should not be broken. The results are stored in the hash # $rshort_nested->{$type_sequence} @@ -20550,7 +20608,6 @@ sub mark_short_nested_blocks { # The flag which is set here will be checked in two places: # 'sub process_line_of_CODE' and 'sub starting_one_line_block' - my $self = shift; return if $rOpts->{'indent-only'}; my $rLL = $self->[_rLL_]; @@ -20711,9 +20768,12 @@ sub special_indentation_adjustments { sub clip_adjusted_levels { + my ( $self, $min_starting_level ) = @_; + # Replace any negative adjusted levels with zero. # Negative levels can only occur in files with brace errors. - my ( $self, $min_starting_level ) = @_; + # Given: + # $min_starting_level = minimum (adjusted) level of the input stream # Clip the original _LEVEL_ values to zero if necessary my $rLL = $self->[_rLL_]; @@ -20738,9 +20798,10 @@ sub clip_adjusted_levels { sub do_non_indenting_braces { + my ($self) = @_; + # Called once per file to handle the --non-indenting-braces parameter. # Remove indentation within marked braces if requested - my ($self) = @_; # Any non-indenting braces have been found by sub find_non_indenting_braces # and are defined by the following hash: @@ -21280,6 +21341,8 @@ use constant DEBUG_XCI => 0; sub extended_ci { + my ($self) = @_; + # This routine implements the -xci (--extended-continuation-indentation) # flag. We add CI to interior tokens of a container which itself has CI but # only if a token does not already have CI. @@ -21300,8 +21363,6 @@ sub extended_ci { # The operations to remove unwanted CI are done in sub 'undo_ci'. - my ($self) = @_; - my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index 1dbd945f..9325f07b 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -335,16 +335,16 @@ $pkg reports VERSION='$VERSION'. ============================================================================== EOM - # We shouldn't get here, but this return is to keep Perl-Critic from - # complaining. - return; + croak "unexpected return from sub Die"; } ## end sub Fault sub make_skipping_pattern { my ( $rOpts, $opt_name, $default ) = @_; + + # Make regex patterns for the format-skipping and code-skipping options my $param = $rOpts->{$opt_name}; if ( !$param ) { $param = $default } - $param =~ s/^\s+//; # allow leading spaces to be like format-skipping + $param =~ s/^\s+//; if ( $param !~ /^#/ ) { Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n"); } @@ -361,7 +361,7 @@ sub make_skipping_pattern { sub check_options { - # Check Tokenizer parameters + # Check and pre-process tokenizer parameters my $rOpts = shift; %is_sub = (); @@ -668,6 +668,11 @@ sub make_source_array { my ( $self, $line_source_object ) = @_; # Convert the source into an array of lines + # Given: + # $line_source_object = the input source stream + # Task: + # Convert the source to an array ref and store in $self + my $rinput_lines = []; my $rsource = ref($line_source_object); @@ -746,8 +751,8 @@ EOM sub peek_ahead { my ( $self, $buffer_index ) = @_; - # look $buffer_index lines ahead of the current location without disturbing - # the input + # look $buffer_index lines ahead of the current location in the input + # stream without disturbing the input my $line; my $rinput_lines = $self->[_rinput_lines_]; my $line_index = $buffer_index + $self->[_input_line_index_next_]; @@ -1830,9 +1835,10 @@ sub guess_old_indentation_level { return ($level); } ## end sub guess_old_indentation_level -# This is a currently unused debug routine sub dump_functions { + # This is an unused debug routine, save for future use + my $fh = *STDOUT; foreach my $pkg ( keys %{$ris_user_function} ) { $fh->print("\nnon-constant subs in package $pkg\n"); @@ -2182,13 +2188,18 @@ sub prepare_for_a_new_file { my ( $self, $numc ) = @_; - # Split the leading $numc characters from the current token (at index=$i) - # which is pre-type 'w' and insert the remainder back into the pretoken - # stream with appropriate settings. Since we are splitting a pre-type 'w', - # there are three cases, depending on if the remainder starts with a digit: - # Case 1: remainder is type 'd', all digits - # Case 2: remainder is type 'd' and type 'w': digits and other characters - # Case 3: remainder is type 'w' + # This provides a way to work around the limitations of the + # pre-tokenization scheme upon which perltidy is based. It is rarely + # needed. + + # Split the leading $numc characters from the current token (at + # index=$i) which is pre-type 'w' and insert the remainder back into + # the pretoken stream with appropriate settings. Since we are + # splitting a pre-type 'w', there are three cases, depending on if the + # remainder starts with a digit: + # Case 1: remainder is type 'd', all digits + # Case 2: remainder is type 'd' and type 'w': digits & other characters + # Case 3: remainder is type 'w' # Examples, for $numc=1: # $tok => $tok_0 $tok_1 $tok_2 @@ -2272,7 +2283,7 @@ EOM sub peeked_ahead { my $flag = shift; - # get/set the closure flag '$peeked_ahead' + # get or set the closure flag '$peeked_ahead': # - set $peeked_ahead to $flag if given, then # - return current value $peeked_ahead = defined($flag) ? $flag : $peeked_ahead; @@ -2534,6 +2545,8 @@ EOM sub scan_simple_identifier { + my $self = shift; + # This is a wrapper for sub scan_identifier. It does a fast preliminary # scan for certain common identifiers: # '$var', '@var', %var, *var, &var, '@{...}', '%{...}' @@ -2551,8 +2564,6 @@ EOM # |----$i_plus_1 [= a bareword ] # ---$i_begin [= a sigil] - my $self = shift; - my $i_begin = $i; my $tok_begin = $tok; my $i_plus_1 = $i + 1; @@ -2678,6 +2689,8 @@ EOM sub method_ok_here { + my $self = shift; + # Return: # false if this is definitely an invalid method declaration # true otherwise (even if not sure) @@ -2690,8 +2703,6 @@ EOM # return; # }; - my $self = shift; - # from do_scan_sub: my $i_beg = $i + 1; my $pos_beg = $rtoken_map->[$i_beg]; @@ -2749,6 +2760,8 @@ EOM sub class_ok_here { + my $self = shift; + # Return: # false if this is definitely an invalid class declaration # true otherwise (even if not sure) @@ -2767,8 +2780,6 @@ EOM # # class ExtendsBasicAttributes is BasicAttributes{ - my $self = shift; - # TEST 1: class stmt can only go where a new statement can start if ( !new_statement_ok() ) { return } @@ -2853,11 +2864,12 @@ EOM sub scan_number_fast { + my $self = shift; + # This is a wrapper for sub scan_number. It does a fast preliminary # scan for a simple integer. It calls the original scan_number if it # does not find one. - my $self = shift; my $i_begin = $i; my $tok_begin = $tok; my $number; @@ -2967,8 +2979,9 @@ EOM my ( $self, $thing ) = @_; # Issue warning on error if expecting operator - # Given: $thing = the unexpected token or issue - # = undef to use current pre-token + # Given: + # $thing = the unexpected token or issue + # = undef to use current pre-token if ( $expecting == OPERATOR ) { if ( !defined($thing) ) { $thing = $tok } @@ -4712,10 +4725,17 @@ EOM my ( $self, $tok_kw, $next_nonblank_token, $i_next ) = @_; - # Decide if 'sub :' can be the start of a sub attribute list. - # We will decide based on if the colon is followed by a - # bareword which is not a keyword. - # Changed inext+1 to inext to fixed case b1190. + # Decide if a ':' can introduce an attribute. For example, + # something like 'sub :' + + # Given: + # $tok_kw = a bareword token + # $next_nonblank_token = a following ':' being examined + # $i_next = the index of the following ':' + + # We will decide based on if the colon is followed by a bareword + # which is not a keyword. Changed inext+1 to inext to fixed case + # b1190. my $sub_attribute_ok_here; if ( $is_sub{$tok_kw} && $expecting != OPERATOR @@ -5447,6 +5467,8 @@ EOM sub tokenize_this_line { + my ( $self, $line_of_tokens, $trimmed_input_line ) = @_; + # This routine tokenizes one line. The results are stored in # the hash ref '$line_of_tokens'. @@ -5458,7 +5480,6 @@ EOM # Returns: # nothing - my ( $self, $line_of_tokens, $trimmed_input_line ) = @_; my $untrimmed_input_line = $line_of_tokens->{_line_text}; # Extract line number for use in error messages @@ -6491,6 +6512,8 @@ use constant DEBUG_OPERATOR_EXPECTED => 0; sub operator_expected { + my ( $self, $tok, $next_type, $blank_after_Z ) = @_; + # Returns a parameter indicating what types of tokens can occur next # Call format: @@ -6542,8 +6565,6 @@ sub operator_expected { # the 'operator_expected' value by a simple hash lookup. If there are # exceptions, that is an indication that a new type is needed. - my ( $self, $tok, $next_type, $blank_after_Z ) = @_; - #-------------------------------------------- # Section 1: Table lookup will get most cases #-------------------------------------------- @@ -6857,6 +6878,8 @@ sub new_statement_ok { sub code_block_type { + my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_; + # Decide if this is a block of code, and its type. # Must be called only when $type = $token = '{' # The problem is to distinguish between the start of a block of code @@ -6871,7 +6894,6 @@ sub code_block_type { # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; - my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_; if ( $last_nonblank_token eq '{' && $last_nonblank_type eq $last_nonblank_token ) { @@ -7300,6 +7322,10 @@ sub is_non_structural_brace { sub increase_nesting_depth { my ( $self, $aa, $pos ) = @_; + # Given: + # $aa = integer code of container type, 0-3 + # $pos = position of character, for error message + # USES GLOBAL VARIABLES: $rcurrent_depth, # $rcurrent_sequence_number, $rdepth_array, # $rstarting_line_of_current_depth, $statement_type @@ -7350,9 +7376,12 @@ sub increase_nesting_depth { sub is_balanced_closing_container { + my ($aa) = @_; + # Return true if a closing container can go here without error # Return false if not - my ($aa) = @_; + # Given: + # $aa = integer code of container type, 0-3 # cannot close if there was no opening my $cd_aa = $rcurrent_depth->[$aa]; @@ -7373,6 +7402,10 @@ sub decrease_nesting_depth { my ( $self, $aa, $pos ) = @_; + # Given: + # $aa = integer code of container type, 0-3 + # $pos = position of character, for error message + # USES GLOBAL VARIABLES: $rcurrent_depth, # $rcurrent_sequence_number, $rdepth_array, $rstarting_line_of_current_depth # $statement_type @@ -7512,10 +7545,15 @@ EOM sub peek_ahead_for_n_nonblank_pre_tokens { - # returns next n pretokens if they exist - # returns undef's if hits eof without seeing any pretokens - # USES GLOBAL VARIABLES: (none) my ( $self, $max_pretokens ) = @_; + + # Given: + # $max_pretokens = number of pretokens wanted + # Return: + # next $max_pretokens pretokens if they exist + # undef's if hits eof without seeing any pretokens + + # USES GLOBAL VARIABLES: (none) my $line; my $i = 0; my ( $rpre_tokens, $rmap, $rpre_types ); @@ -7534,8 +7572,15 @@ sub peek_ahead_for_n_nonblank_pre_tokens { # look ahead for next non-blank, non-comment line of code sub peek_ahead_for_nonblank_token { - # USES GLOBAL VARIABLES: (none) my ( $self, $rtokens, $max_token_index ) = @_; + + # Given: + # $rtokens = ref to token array + # $max_token_index = index of last token in $rtokens + # Task: + # Update $rtokens with next nonblank token + + # USES GLOBAL VARIABLES: (none) my $line; my $i = 0; @@ -7563,17 +7608,20 @@ sub peek_ahead_for_nonblank_token { sub guess_if_pattern_or_conditional { - # this routine is called when we have encountered a ? following an + my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map_uu, $max_token_index ) + = @_; + + # This routine is called when we have encountered a ? following an # unknown bareword, and we must decide if it starts a pattern or not - # input parameters: + # Given: # $i - token index of the ? starting possible pattern - # output parameters: + # $rtokens ... = the token arrays + # Return: # $is_pattern = 0 if probably not pattern, =1 if probably a pattern # msg = a warning or diagnostic message + # USES GLOBAL VARIABLES: $last_nonblank_token - my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map_uu, $max_token_index ) - = @_; my $is_pattern = 0; my $msg = "guessing that ? after '$last_nonblank_token' starts a "; @@ -7667,17 +7715,19 @@ BEGIN { sub guess_if_pattern_or_division { + my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map, $max_token_index ) = + @_; + # This routine is called when we have encountered a / following an # unknown bareword, and we must decide if it starts a pattern or is a # division. - # input parameters: + # Given: # $i - token index of the / starting possible pattern - # output parameters: + # $rtokens ... = the token arrays + # Return: # $is_pattern = 0 if probably division, =1 if probably a pattern # msg = a warning or diagnostic message # USES GLOBAL VARIABLES: $last_nonblank_token - my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map, $max_token_index ) = - @_; my $msg = "guessing that / after '$last_nonblank_token' starts a "; my $ibeg = $i; my $is_pattern = 0; @@ -7811,17 +7861,24 @@ sub guess_if_pattern_or_division { return ( $is_pattern, $msg ); } ## end sub guess_if_pattern_or_division -# try to resolve here-doc vs. shift by looking ahead for -# non-code or the end token (currently only looks for end token) -# returns 1 if it is probably a here doc, 0 if not sub guess_if_here_doc { my ( $self, $next_token ) = @_; - # This is how many lines we will search for a target as part of the - # guessing strategy. It is a constant because there is probably - # little reason to change it. + # Try to resolve here-doc vs. shift by looking ahead for + # non-code or the end token (currently only looks for end token) + + # Given: + # $next_token = the next token after '<<' + + # Return: + # 1 if it is probably a here doc + # 0 if not + # USES GLOBAL VARIABLES: $current_package $ris_constant, + + # This is how many lines we will search for a target as part of the + # guessing strategy. There is probably little reason to change it. my $HERE_DOC_WINDOW = 40; my $here_doc_expected = 0; @@ -7869,11 +7926,6 @@ sub guess_if_here_doc { sub scan_bare_identifier_do { - # this routine is called to scan a token starting with an alphanumeric - # variable or package separator, :: or '. - # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, - # $last_nonblank_type, $rparen_type, $paren_depth - my ( $self, @@ -7888,6 +7940,15 @@ sub scan_bare_identifier_do { ) = @_; + # This routine is called to scan a token starting with an alphanumeric + # variable or package separator, :: or '. + + # Given: + # current scan state variables + + # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, + # $last_nonblank_type, $rparen_type, $paren_depth + my $package = undef; my $i_beg = $i; @@ -8104,21 +8165,6 @@ sub scan_bare_identifier_do { sub scan_id_do { - # This is the new scanner and will eventually replace scan_identifier. - # Only type 'sub' and 'package' are implemented. - # Token types $ * % @ & -> are not yet implemented. - # - # Scan identifier following a type token. - # The type of call depends on $id_scan_state: $id_scan_state = '' - # for starting call, in which case $tok must be the token defining - # the type. - # - # If the type token is the last nonblank token on the line, a value - # of $id_scan_state = $tok is returned, indicating that further - # calls must be made to get the identifier. If the type token is - # not the last nonblank token on the line, the identifier is - # scanned and handled and a value of '' is returned. - my ( $self, @@ -8133,6 +8179,24 @@ sub scan_id_do { ) = @_; + # Scan identifier following a type token. + # Given: + # current scan state variables + + # This is the new scanner and may eventually replace scan_identifier. + # Only type 'sub' and 'package' are implemented. + # Token types $ * % @ & -> are not yet implemented. + # + # The type of call depends on $id_scan_state: $id_scan_state = '' + # for starting call, in which case $tok must be the token defining + # the type. + # + # If the type token is the last nonblank token on the line, a value + # of $id_scan_state = $tok is returned, indicating that further + # calls must be made to get the identifier. If the type token is + # not the last nonblank token on the line, the identifier is + # scanned and handled and a value of '' is returned. + use constant DEBUG_NSCAN => 0; my $type = EMPTY_STRING; my $i_beg; @@ -8244,6 +8308,8 @@ EOM sub check_prototype { my ( $proto, $package, $subname ) = @_; + + # Classify a sub based on its prototype return if ( !defined($package) ); return if ( !defined($subname) ); if ( defined($proto) ) { @@ -8281,8 +8347,19 @@ sub check_prototype { sub do_scan_package { - # do_scan_package parses a package name - # it is called with $i_beg equal to the index of the first nonblank + my ( $self, $rcall_hash ) = @_; + + my $input_line = $rcall_hash->{input_line}; + my $i = $rcall_hash->{i}; + my $i_beg = $rcall_hash->{i_beg}; + my $tok = $rcall_hash->{tok}; + my $type = $rcall_hash->{type}; + my $rtokens = $rcall_hash->{rtokens}; + my $rtoken_map = $rcall_hash->{rtoken_map}; + my $max_token_index = $rcall_hash->{max_token_index}; + + # Parse a package name. + # This is called with $i_beg equal to the index of the first nonblank # token following a 'package' token. # USES GLOBAL VARIABLES: $current_package, @@ -8299,17 +8376,6 @@ sub do_scan_package { # character and at least three components. # reference http://perldoc.perl.org/functions/package.html - my ( $self, $rcall_hash ) = @_; - - my $input_line = $rcall_hash->{input_line}; - my $i = $rcall_hash->{i}; - my $i_beg = $rcall_hash->{i_beg}; - my $tok = $rcall_hash->{tok}; - my $type = $rcall_hash->{type}; - my $rtokens = $rcall_hash->{rtokens}; - my $rtoken_map = $rcall_hash->{rtoken_map}; - my $max_token_index = $rcall_hash->{max_token_index}; - my $package = undef; my $pos_beg = $rtoken_map->[$i_beg]; pos($input_line) = $pos_beg; @@ -8990,6 +9056,19 @@ sub do_scan_package { sub scan_complex_identifier { + ( + my $self, + + $i, + $id_scan_state, + $identifier, + $rtokens, + $max_token_index, + $expecting, + $container_type + + ) = @_; + # This routine assembles tokens into identifiers. It maintains a # scan state, id_scan_state. It updates id_scan_state based upon # current id_scan_state and token, and returns an updated @@ -9004,19 +9083,6 @@ sub do_scan_package { # attempt to create multiple tokenizers can occur when multiple # files are processed, causing an error. - ( - my $self, - - $i, - $id_scan_state, - $identifier, - $rtokens, - $max_token_index, - $expecting, - $container_type - - ) = @_; - # return flag telling caller to split the pretoken my $split_pretoken_flag; @@ -9301,7 +9367,19 @@ EOM sub do_scan_sub { - # do_scan_sub parses a sub name and prototype. + my ( $self, $rcall_hash ) = @_; + + my $input_line = $rcall_hash->{input_line}; + my $i = $rcall_hash->{i}; + my $i_beg = $rcall_hash->{i_beg}; + my $tok = $rcall_hash->{tok}; + my $type = $rcall_hash->{type}; + my $rtokens = $rcall_hash->{rtokens}; + my $rtoken_map = $rcall_hash->{rtoken_map}; + my $id_scan_state = $rcall_hash->{id_scan_state}; + my $max_token_index = $rcall_hash->{max_token_index}; + + # Parse a sub name and prototype. # At present there are three basic CALL TYPES which are # distinguished by the starting value of '$tok': @@ -9341,18 +9419,6 @@ EOM # $rsaw_function_definition, # $statement_type - my ( $self, $rcall_hash ) = @_; - - my $input_line = $rcall_hash->{input_line}; - my $i = $rcall_hash->{i}; - my $i_beg = $rcall_hash->{i_beg}; - my $tok = $rcall_hash->{tok}; - my $type = $rcall_hash->{type}; - my $rtokens = $rcall_hash->{rtokens}; - my $rtoken_map = $rcall_hash->{rtoken_map}; - my $id_scan_state = $rcall_hash->{id_scan_state}; - my $max_token_index = $rcall_hash->{max_token_index}; - my $i_entry = $i; # Determine the CALL TYPE @@ -9744,6 +9810,8 @@ sub find_next_noncomment_token { sub is_possible_numerator { + my ( $self, $i, $rtokens, $max_token_index ) = @_; + # Look at the next non-comment character and decide if it could be a # numerator. Returns the following code: # -1 - division not possible @@ -9753,7 +9821,6 @@ sub is_possible_numerator { # 3 - division very probable: number and one of ; ] } follow # 4 - is division, not pattern: number and ) follow - my ( $self, $i, $rtokens, $max_token_index ) = @_; my $divide_possible_code = 0; my $next_token = $rtokens->[ $i + 1 ]; @@ -9817,6 +9884,8 @@ sub is_possible_numerator { sub pattern_expected { + my ( $self, $i, $rtokens, $max_token_index ) = @_; + # This a filter for a possible pattern. # It looks at the token after a possible pattern and tries to # determine if that token could end a pattern. @@ -9824,7 +9893,6 @@ sub is_possible_numerator { # 1 - yes # 0 - can't tell # -1 - no - my ( $self, $i, $rtokens, $max_token_index ) = @_; my $is_pattern = 0; my $next_token = $rtokens->[ $i + 1 ]; @@ -9879,12 +9947,14 @@ sub find_next_nonblank_token_on_this_line { sub find_angle_operator_termination { + my ( $self, $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) + = @_; + # We are looking at a '<' and want to know if it is an angle operator. - # We are to return: + # Return: # $i = pretoken index of ending '>' if found, current $i otherwise # $type = 'Q' if found, '>' otherwise - my ( $self, $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) - = @_; + my $i = $i_beg; my $type = '<'; pos($input_line) = 1 + $rtoken_map->[$i]; @@ -10097,19 +10167,21 @@ EOM sub scan_number_do { - # scan a number in any of the formats that Perl accepts + my ( $self, $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = + @_; + + # Scan a number in any of the formats that Perl accepts # Underbars (_) are allowed in decimal numbers. - # input parameters - + # Given: # $input_line - the string to scan # $i - pre_token index to start scanning # $rtoken_map - reference to the pre_token map giving starting # character position in $input_line of token $i - # output parameters - + # Return: # $i - last pre_token index of the number just scanned + # $type - the token type ('v' or 'n') # number - the number (characters); or undef if not a number - my ( $self, $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = - @_; my $pos_beg = $rtoken_map->[$i]; my $pos; ##my $i_begin = $i; @@ -10257,16 +10329,6 @@ sub inverse_pretoken_map { sub find_here_doc { - # find the target of a here document, if any - # input parameters: - # $i - token index of the second < of << - # ($i must be less than the last token index if this is called) - # output parameters: - # $found_target = 0 didn't find target; =1 found target - # HERE_TARGET - the target string (may be empty string) - # $i - unchanged if not here doc, - # or index of the last token of the here target - # $saw_error - flag noting unbalanced quote on here target my ( $self, @@ -10280,6 +10342,16 @@ sub find_here_doc { ) = @_; + # Find the target of a here document, if any + # Given: + # $i - token index of the second < of << + # ($i must be less than the last token index if this is called) + # Return: + # $found_target = 0 didn't find target; =1 found target + # HERE_TARGET - the target string (may be empty string) + # $i - unchanged if not here doc, + # or index of the last token of the here target + # $saw_error - flag noting unbalanced quote on here target my $ibeg = $i; my $found_target = 0; my $here_doc_target = EMPTY_STRING; @@ -10396,15 +10468,6 @@ sub find_here_doc { sub do_quote { - # follow (or continue following) quoted string(s) - # $in_quote return code: - # 0 - ok, found end - # 1 - still must find end of quote whose target is $quote_character - # 2 - still looking for end of first of two quotes - # - # Returns updated strings: - # $quoted_string_1 = quoted string seen while in_quote=1 - # $quoted_string_2 = quoted string seen while in_quote=2 my ( $self, @@ -10423,6 +10486,16 @@ sub do_quote { ) = @_; + # Follow (or continue following) quoted string(s) + # $in_quote = return code: + # 0 - ok, found end + # 1 - still must find end of quote whose target is $quote_character + # 2 - still looking for end of first of two quotes + # + # Returns updated strings: + # $quoted_string_1 = quoted string seen while in_quote=1 + # $quoted_string_2 = quoted string seen while in_quote=2 + my $quoted_string; if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow my $ibeg = $i; @@ -10509,21 +10582,6 @@ BEGIN { sub follow_quoted_string { - # scan for a specific token, skipping escaped characters - # if the quote character is blank, use the first non-blank character - # input parameters: - # $rtokens = reference to the array of tokens - # $i = the token index of the first character to search - # $in_quote = number of quoted strings being followed - # $beginning_tok = the starting quote character - # $quote_pos = index to check next for alphanumeric delimiter - # output parameters: - # $i = the token index of the ending quote character - # $in_quote = decremented if found end, unchanged if not - # $beginning_tok = the starting quote character - # $quote_pos = index to check next for alphanumeric delimiter - # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. - # $quoted_string = the text of the quote (without quotation tokens) my ( $self, @@ -10539,6 +10597,21 @@ sub follow_quoted_string { ) = @_; + # Scan for a specific token, skipping escaped characters. + # If the quote character is blank, use the first non-blank character. + # Given: + # $rtokens = reference to the array of tokens + # $i = the token index of the first character to search + # $in_quote = number of quoted strings being followed + # $beginning_tok = the starting quote character + # $quote_pos = index to check next for alphanumeric delimiter + # Return: + # $i = the token index of the ending quote character + # $in_quote = decremented if found end, unchanged if not + # $beginning_tok = the starting quote character + # $quote_pos = index to check next for alphanumeric delimiter + # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. + # $quoted_string = the text of the quote (without quotation tokens) my ( $tok, $end_tok ); my $i = $i_beg - 1; my $quoted_string = EMPTY_STRING; @@ -10709,6 +10782,8 @@ sub follow_quoted_string { sub indicate_error { my ( $self, $msg, $line_number, $input_line, $pos, $caret ) = @_; + + # write input line and line with carat's showing where error was detected $self->interrupt_logfile(); $self->warning($msg); $self->write_error_indicator_pair( $line_number, $input_line, $pos, @@ -10730,10 +10805,15 @@ sub write_error_indicator_pair { sub make_numbered_line { - # Given an input line, its line number, and a character position of - # interest, create a string not longer than 80 characters of the form + my ( $lineno, $str, $pos ) = @_; + + # Given: + # $lineno=line number + # $str = an input line + # $pos = character position of interest + # Create a string not longer than 80 characters of the form: # $lineno: sub_string - # such that the sub_string of $str contains the position of interest + # such that the sub_string of $str contains the position of interest # # Here is an example of what we want, in this case we add trailing # '...' because the line is long. @@ -10758,7 +10838,6 @@ sub make_numbered_line { # - $underline = a blank 'underline' which is all spaces with the same # number of characters as the numbered line. - my ( $lineno, $str, $pos ) = @_; my $offset = ( $pos < 60 ) ? 0 : $pos - 40; my $excess = length($str) - $offset - 68; my $numc = ( $excess > 0 ) ? 68 : undef; @@ -10789,6 +10868,8 @@ sub make_numbered_line { sub write_on_underline { + my ( $underline, $pos, $pos_chr ) = @_; + # The "underline" is a string that shows where an error is; it starts # out as a string of blanks with the same length as the numbered line of # code above it, and we have to add marking to show where an error is. @@ -10807,8 +10888,6 @@ sub write_on_underline { # This is a trivial thing to do with substr, but there is some # checking to do. - my ( $underline, $pos, $pos_chr ) = @_; - # check for error..shouldn't happen if ( $pos < 0 || $pos > length($underline) ) { return $underline; @@ -10883,8 +10962,7 @@ sub pre_tokenize { sub show_tokens { - # this is an old debug routine - # not called, but saved for reference + # This is an uncalled debug routine, saved for reference my ( $rtokens, $rtoken_map ) = @_; my $num = scalar( @{$rtokens} ); diff --git a/lib/Perl/Tidy/VerticalAligner.pm b/lib/Perl/Tidy/VerticalAligner.pm index 7eebc14a..91158827 100644 --- a/lib/Perl/Tidy/VerticalAligner.pm +++ b/lib/Perl/Tidy/VerticalAligner.pm @@ -124,10 +124,7 @@ This is probably an error introduced by a recent programming change. $pkg reports VERSION='$VERSION'. ============================================================================== EOM - - # We shouldn't get here, but this return is to keep Perl-Critic from - # complaining. - return; + croak "unexpected return from sub Die"; } ## end sub Fault my %valid_LINE_keys; @@ -428,6 +425,8 @@ sub flush { sub initialize_for_new_group { my ($self) = @_; + # initialize for a new group of lines to be aligned vertically + $self->[_rgroup_lines_] = []; $self->[_group_type_] = EMPTY_STRING; $self->[_zero_count_] = 0; @@ -1031,10 +1030,18 @@ sub valign_input { sub join_hanging_comment { + my ( $new_line, $old_line ) = @_; + # Add dummy fields to a hanging side comment to make it look # like the first line in its potential group. This simplifies # the coding. - my ( $new_line, $old_line ) = @_; + + # Given: + # $new_line = ref to hash of the line to be possibly changed + # $old_line = ref to hash of the previous reference line + # Return: + # true if new line modified + # false otherwise my $jmax = $new_line->{'jmax'}; @@ -1091,6 +1098,11 @@ sub join_hanging_comment { my $line = shift; + # Given: + # $line = ref to hash of values for a line + # Task: + # Set 'list_type' property + # A list will be taken to be a line with a forced break in which all # of the field separators are commas or comma-arrows (except for the # trailing #) @@ -1405,7 +1417,7 @@ sub check_match { # $prev_line = the line just before $new_line # $group_line_count = number of lines in the current group - # returns a flag and a value as follows: + # Returns: a flag and a value as follows: # return (0, $imax_align) if the line does not match # return (1, $imax_align) if the line matches but does not fit # return (2, $imax_align) if the line matches and fits @@ -1414,9 +1426,10 @@ sub check_match { use constant MATCH_NO_FIT => 1; use constant MATCH_AND_FIT => 2; + # Return value '$return_value' describes the match with 3 possible values my $return_value; - # Returns '$imax_align' which is the index of the maximum matching token. + # Return value '$imax_align' is the index of the maximum matching token. # It will be used in the subsequent left-to-right sweep to align as many # tokens as possible for lines which partially match. my $imax_align = -1; @@ -1524,8 +1537,13 @@ sub check_fit { # The new line has alignments identical to the current group. Now we have # to fit the new line into the group without causing a field to exceed the # line length limit. - # return true if successful - # return false if not successful + + # Given: + # $new_line = ref to hash of the new line values + # $old_line = ref to hash of the previous line values + # Returns: + # true if the new line alignments fit the old line + # false otherwise my $jmax = $new_line->{'jmax'}; my $leading_space_count = $new_line->{'leading_space_count'}; @@ -1592,6 +1610,11 @@ sub install_new_alignments { my ($new_line) = @_; + # Given: + # $new_line = ref to hash of a line starting a new group + # Task: + # setup alignment fields for this line + my $jmax = $new_line->{'jmax'}; my $rfield_lengths = $new_line->{'rfield_lengths'}; my $col = $new_line->{'leading_space_count'}; @@ -1626,9 +1649,17 @@ sub dump_array { sub level_change { + my ( $self, $leading_space_count, $diff, $level ) = @_; + # compute decrease in level when we remove $diff spaces from the # leading spaces - my ( $self, $leading_space_count, $diff, $level ) = @_; + + # Given: + # $leading_space_count = current leading line spaces + # $diff = number of spaces to remove + # $level = current indentation level + # Return: + # $level = updated level accounting for the loss of spaces if ($rOpts_indent_columns) { my $olev = @@ -1925,6 +1956,15 @@ sub _flush_group_lines { sub sweep_top_down { my ( $self, $rlines, $group_level ) = @_; + # This is the first of two major sweeps to find alignments. + # The other is sweep_left_to_right. + + # Given: + # $rlines = ref to hash of lines in this main alignment group + # $group_level = common indentation level of these lines + # Return: + # $rgroups = ref to hash of subgroups created + # Partition the set of lines into final alignment subgroups # and store the alignments with the lines. @@ -2121,15 +2161,14 @@ sub two_line_pad { my ( $line_m, $line, $imax_min ) = @_; + # Decide if two adjacent, isolated lines should be aligned + # Given: - # two isolated (list) lines + # $line_m, $line = two isolated (list) lines # imax_min = number of common alignment tokens # Return: # $pad_max = maximum suggested pad distance # = 0 if alignment not recommended - # Note that this is only for two lines which do not have alignment tokens - # in common with any other lines. It is intended for lists, but it might - # also be used for two non-list lines with a common leading '='. # Allow alignment if the difference in the two unpadded line lengths # is not more than either line length. The idea is to avoid @@ -2139,6 +2178,11 @@ sub two_line_pad { # 'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1, # 1, 0, 0, 0, undef, 0, 0 # ]; + + # Note that this is only for two lines which do not have alignment tokens + # in common with any other lines. It is intended for lists, but it might + # also be used for two non-list lines with a common leading '='. + my $rfield_lengths = $line->{'rfield_lengths'}; my $rfield_lengths_m = $line_m->{'rfield_lengths'}; @@ -2186,6 +2230,16 @@ sub sweep_left_to_right { my ( $rlines, $rgroups, $group_level ) = @_; + # This is the second of two major sweeps to find alignments. + # The other is sweep_top_down. + + # Given: + # $rlines = ref to hash of lines in this main alignment group + # $rgroups = ref to hash of subgroups + # $group_level = common indentation level of these lines + # Task: + # add leading alignments where possible + # So far we have divided the lines into groups having an equal number of # identical alignments. Here we are going to look for common leading # alignments between the different groups and align them when possible. @@ -2675,10 +2729,11 @@ sub delete_selected_tokens { my ( $line_obj, $ridel ) = @_; - # $line_obj is the line to be modified - # $ridel is a ref to list of indexes to be deleted + # Given: + # $line_obj = the line to be modified + # $ridel = a ref to list of indexes to be deleted - # remove an unused alignment token(s) to improve alignment chances + # remove unused alignment token(s) to improve alignment chances return if ( !defined($line_obj) || !defined($ridel) || !@{$ridel} ); @@ -2803,7 +2858,14 @@ EOM sub decode_alignment_token { + my ($tok) = @_; + # Unpack the values packed in an alignment token + + # Given: + # $tok = an alignment token + # Returns: + # ( $raw_tok, $lev, $tag, $tok_count ) # # Usage: # my ( $raw_tok, $lev, $tag, $tok_count ) = @@ -2822,7 +2884,6 @@ EOM # $nport = $port = shift || $name; # The first '=' may either be '=0' or '=0.1' [level 0, first equals] # The second '=' will be '=0.2' [level 0, second equals] - my ($tok) = @_; if ( defined( $decoded_token{$tok} ) ) { return @{ $decoded_token{$tok} }; @@ -2868,11 +2929,14 @@ EOM sub delete_unmatched_tokens { my ( $rlines, $group_level ) = @_; - # This is a important first step in vertical alignment in which - # we remove as many obviously un-needed alignment tokens as possible. + # Remove as many obviously un-needed alignment tokens as possible. # This will prevent them from interfering with the final alignment. - # Returns: + # Given: + # $rlines = ref to hash of all lines in this alignment group + # $group_level = their comment indentation level + + # Return: my $max_lev_diff = 0; # used to avoid a call to prune_tree my $saw_side_comment = 0; # used to avoid a call for side comments my $saw_signed_number = 0; # used to avoid a call for -vsn @@ -2947,9 +3011,22 @@ EOM my ( $group_level, $rnew_lines, $saw_side_comment ) = @_; - #------------------------------------------------------------ - # Loop to create a hash of alignment token info for each line - #------------------------------------------------------------ + # Create a hash of alignment token info for each line + # This info will be used to find common alignments + + # Given: + # $group_level = common indentation level + # $rnew_lines = ref to hash of line info + # $saw_side_comment = true if there is a side comment + # Return: + # $rline_hashes = ref to hash with new line vars + # \@equals_info = ref to array with info on any '=' tokens + # $saw_side_comment = updated side comment flag + # $max_lev_diff = maximum level change seen + + #---------------- + # Loop over lines + #---------------- my $rline_hashes = []; my @equals_info; my @line_info; # no longer used @@ -3328,9 +3405,14 @@ sub match_line_pairs { my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_; # Compare each pair of lines and save information about common matches - # $rlines = list of lines including hanging side comments - # $rnew_lines = list of lines without any hanging side comments - # $rsubgroups = list of subgroups of the new lines + + # Given: + # $rlines = list of lines including hanging side comments + # $rnew_lines = list of lines without any hanging side comments + # $rsubgroups = list of subgroups of the new lines + # Return: + # $saw_signed_number = true if a field has a signed number + # (needed for --valign-signed-numbers) # TODO: # Maybe change: imax_pair => pair_match_info = ref to array @@ -3514,12 +3596,13 @@ sub compare_patterns { my $pat_m = $rcall_hash->{pat_m}; my $pad = $rcall_hash->{pad}; - # helper routine for sub match_line_pairs to decide if patterns in two - # lines match well enough..Given + # This is a helper routine for sub match_line_pairs to decide if patterns + # in two lines match well enough + # Given: # $tok_m, $pat_m = token and pattern of first line # $tok, $pat = token and pattern of second line # $pad = 0 if no padding is needed, !=0 otherwise - # return code: + # Return code: # 0 = patterns match, continue # 1 = no match # 2 = no match, and lines do not match at all @@ -3631,19 +3714,28 @@ sub compare_patterns { sub fat_comma_to_comma { my ($str) = @_; - # We are changing '=>' to ',' and removing any trailing decimal count - # because currently fat commas have a count and commas do not. - # For example, we will change '=>2+{-3.2' into ',2+{-3' + # Given: + # $str = a decorated fat comma alignment token + + # Change '=>' to ',' + # and remove any trailing decimal count because currently fat commas have a + # count and commas do not. + + # For example, change '=>2+{-3.2' into ',2+{-3' if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 } return $str; } ## end sub fat_comma_to_comma sub get_line_token_info { - # scan lines of tokens and return summary information about the range of - # levels and patterns. my ($rlines) = @_; + # Given: + # $rlines = ref to array of lines in this group + + # Scan lines of tokens and return summary information about the range of + # levels and patterns. + # First scan to check monotonicity. Here is an example of several # lines which are monotonic. The = is the lowest level, and # the commas are all one level deeper. So this is not nonmonotonic. @@ -3792,6 +3884,12 @@ sub get_line_token_info { sub prune_alignment_tree { my ($rlines) = @_; + + # Given: + # $rlines = ref to array of lines in this group + + # Prune the tree of alignments to limit depth of alignments + my $jmax = @{$rlines} - 1; return if ( $jmax <= 0 ); @@ -4628,8 +4726,15 @@ sub is_good_side_comment_column { # a previous side comment should be forgotten. This involves # checking several rules. - # Return true to KEEP old comment location - # Return false to FORGET old comment location + # Given: + # $line = ref to info hash for the line of interest + # $line_number = number of this line in the output stream + # $level = indentation level of this line + # $num5 = ..see comments below + + # Return: + # true to KEEP old comment location + # false to FORGET old comment location my $KEEP = 1; my $FORGET = 0; @@ -6244,13 +6349,18 @@ sub valign_output_step_A { sub combine_fields { + my ( $line_0, $line_1, $imax_align ) = @_; + + # Given: + # $line_0, $line_1 = two adjacent lines + # $imax_align = index of last alignment wanted + + # Task: # We have a group of two lines for which we do not want to align tokens # between index $imax_align and the side comment. So we will delete fields # between $imax_align and the side comment. Alignments have already # been set so we have to adjust them. - my ( $line_0, $line_1, $imax_align ) = @_; - if ( !defined($imax_align) ) { $imax_align = -1 } # First delete the unwanted tokens @@ -6391,6 +6501,9 @@ sub get_output_line_number { my ( $self, $rinput, $leading_string, $leading_string_length ) = @_; + # handle a cached line .. + # either append the current line to it or write it out + # The cached line will either be: # - passed along to step_C, or # - or combined with the current line