+ return;
+} ## end sub delete_null_alignments
+
+sub match_line_pairs {
+ my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
+
+ # Compare each pair of lines and save information about common matches
+ # $rlines = list of lines including hanging side comments
+ # $rnew_lines = list of lines without any hanging side comments
+ # $rsubgroups = list of subgroups of the new lines
+
+ # TODO:
+ # Maybe change: imax_pair => pair_match_info = ref to array
+ # = [$imax_align, $rMsg, ... ]
+ # This may eventually have multi-level match info
+
+ # Previous line vars
+ my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
+ $list_type_m, $ci_level_m );
+
+ # Current line vars
+ my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
+ $ci_level );
+
+ use constant EXPLAIN_COMPARE_PATTERNS => 0;
+
+ my $compare_patterns = sub {
+
+ # helper routine to decide if patterns match well enough..
+ # return code:
+ # 0 = patterns match, continue
+ # 1 = no match
+ # 2 = no match, and lines do not match at all
+
+ my ( $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
+ my $GoToMsg = "";
+ my $return_code = 1;
+
+ my ( $alignment_token, $lev, $tag, $tok_count ) =
+ decode_alignment_token($tok);
+
+ # We have to be very careful about aligning commas
+ # when the pattern's don't match, because it can be
+ # worse to create an alignment where none is needed
+ # than to omit one. Here's an example where the ','s
+ # are not in named containers. The first line below
+ # should not match the next two:
+ # ( $a, $b ) = ( $b, $r );
+ # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
+ # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
+ if ( $alignment_token eq ',' ) {
+
+ # do not align commas unless they are in named
+ # containers
+ $GoToMsg = "do not align commas in unnamed containers";
+ goto NO_MATCH unless ( $tok =~ /[A-Za-z]/ );
+ }
+
+ # do not align parens unless patterns match;
+ # large ugly spaces can occur in math expressions.
+ elsif ( $alignment_token eq '(' ) {
+
+ # But we can allow a match if the parens don't
+ # require any padding.
+ $GoToMsg = "do not align '(' unless patterns match or pad=0";
+ if ( $pad != 0 ) { goto NO_MATCH }
+ }
+
+ # Handle an '=' alignment with different patterns to
+ # the left.
+ elsif ( $alignment_token eq '=' ) {
+
+ # It is best to be a little restrictive when
+ # aligning '=' tokens. Here is an example of
+ # two lines that we will not align:
+ # my $variable=6;
+ # $bb=4;
+ # The problem is that one is a 'my' declaration,
+ # and the other isn't, so they're not very similar.
+ # We will filter these out by comparing the first
+ # letter of the pattern. This is crude, but works
+ # well enough.
+ if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
+ $GoToMsg = "first character before equals differ";
+ goto NO_MATCH;
+ }
+
+ # The introduction of sub 'prune_alignment_tree'
+ # enabled alignment of lists left of the equals with
+ # other scalar variables. For example:
+ # my ( $D, $s, $e ) = @_;
+ # my $d = length $D;
+ # my $c = $e - $s - $d;
+
+ # But this would change formatting of a lot of scripts,
+ # so for now we prevent alignment of comma lists on the
+ # left with scalars on the left. We will also prevent
+ # any partial alignments.
+
+ # set return code 2 if the = is at line level, but
+ # set return code 1 if the = is below line level, i.e.
+ # sub new { my ( $p, $v ) = @_; bless \$v, $p }
+ # sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
+
+ elsif (
+ ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) )
+ {
+ $GoToMsg = "mixed commas/no-commas before equals";
+ if ( $lev eq $group_level ) {
+ $return_code = 2;
+ }
+ goto NO_MATCH;
+ }
+ }
+
+ MATCH:
+ return ( 0, \$GoToMsg );
+
+ NO_MATCH:
+
+ EXPLAIN_COMPARE_PATTERNS
+ && print STDERR "no match because $GoToMsg\n";
+
+ return ( $return_code, \$GoToMsg );
+
+ }; ## end of $compare_patterns->()
+
+ # loop over subgroups
+ foreach my $item ( @{$rsubgroups} ) {
+ my ( $jbeg, $jend ) = @{$item};
+ my $nlines = $jend - $jbeg + 1;
+ next unless ( $nlines > 1 );
+
+ # loop over lines in a subgroup
+ for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
+
+ $line_m = $line;
+ $rtokens_m = $rtokens;
+ $rpatterns_m = $rpatterns;
+ $rfield_lengths_m = $rfield_lengths;
+ $imax_m = $imax;
+ $list_type_m = $list_type;
+ $ci_level_m = $ci_level;
+
+ $line = $rnew_lines->[$jj];
+ $rtokens = $line->get_rtokens();
+ $rpatterns = $line->get_rpatterns();
+ $rfield_lengths = $line->get_rfield_lengths();
+ $imax = @{$rtokens} - 2;
+ $list_type = $line->get_list_type();
+ $ci_level = $line->get_ci_level();
+
+ # nothing to do for first line
+ next if ( $jj == $jbeg );
+
+ my $ci_jump = $ci_level - $ci_level_m;
+
+ my $imax_min = $imax_m < $imax ? $imax_m : $imax;
+
+ my $imax_align = -1;
+
+ # find number of leading common tokens
+
+ #################################
+ # No match to hanging side comment
+ #################################
+ if ( $line->get_is_hanging_side_comment() ) {
+
+ # Should not get here; HSC's have been filtered out
+ $imax_align = -1;
+ }
+
+ ##############################
+ # Handle comma-separated lists
+ ##############################
+ elsif ( $list_type && $list_type eq $list_type_m ) {
+
+ # do not align lists across a ci jump with new list method
+ if ($ci_jump) { $imax_min = -1 }
+
+ my $i_nomatch = $imax_min + 1;
+ for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ my $tok = $rtokens->[$i];
+ my $tok_m = $rtokens_m->[$i];
+ if ( $tok ne $tok_m ) {
+ $i_nomatch = $i;
+ last;
+ }
+ }
+
+ $imax_align = $i_nomatch - 1;
+ }
+
+ ##################
+ # Handle non-lists
+ ##################
+ else {
+ my $i_nomatch = $imax_min + 1;
+ for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ my $tok = $rtokens->[$i];
+ my $tok_m = $rtokens_m->[$i];
+ if ( $tok ne $tok_m ) {
+ $i_nomatch = $i;
+ last;
+ }
+
+ my $pat = $rpatterns->[$i];
+ my $pat_m = $rpatterns_m->[$i];
+
+ # If patterns don't match, we have to be careful...
+ if ( $pat_m ne $pat ) {
+ my $pad =
+ $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
+ my ( $match_code, $rmsg ) = $compare_patterns->(
+ $tok, $tok_m, $pat, $pat_m, $pad
+ );
+ if ($match_code) {
+ if ( $match_code eq 1 ) { $i_nomatch = $i }
+ elsif ( $match_code eq 2 ) { $i_nomatch = 0 }
+ last;
+ }
+ }
+ }
+ $imax_align = $i_nomatch - 1;
+ }
+
+ $line_m->set_imax_pair($imax_align);
+
+ } ## end loop over lines
+
+ # Put fence at end of subgroup
+ $line->set_imax_pair(-1);
+
+ } ## end loop over subgroups
+
+ # if there are hanging side comments, propagate the pair info down to them
+ # so that lines can just look back one line for their pair info.
+ if ( @{$rlines} > @{$rnew_lines} ) {
+ my $last_pair_info = -1;
+ foreach my $line ( @{$rlines} ) {
+ if ( $line->get_is_hanging_side_comment() ) {
+ $line->set_imax_pair($last_pair_info);
+ }
+ else {
+ $last_pair_info = $line->get_imax_pair();
+ }
+ }
+ }
+ return;
+}
+
+sub fat_comma_to_comma {
+ my ($str) = @_;
+
+ # We are changing '=>' to ',' and removing any trailing decimal count
+ # because currently fat commas have a count and commas do not.
+ # For example, we will change '=>2+{-3.2' into ',2+{-3'
+ if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
+ return $str;
+}
+
+sub get_line_token_info {
+
+ # scan lines of tokens and return summary information about the range of
+ # levels and patterns.
+ my ($rlines) = @_;
+
+ # First scan to check monotonicity. Here is an example of several
+ # lines which are monotonic. The = is the lowest level, and
+ # the commas are all one level deeper. So this is not nonmonotonic.
+ # $$d{"weeks"} = [ "w", "wk", "wks", "week", "weeks" ];
+ # $$d{"days"} = [ "d", "day", "days" ];
+ # $$d{"hours"} = [ "h", "hr", "hrs", "hour", "hours" ];
+ my @all_token_info;
+ my $all_monotonic = 1;
+ for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
+ my ($line) = $rlines->[$jj];
+ my $rtokens = $line->get_rtokens();
+ my $last_lev;
+ my $is_monotonic = 1;
+ my $i = -1;
+ foreach my $tok ( @{$rtokens} ) {
+ $i++;
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ decode_alignment_token($tok);
+ push @{ $all_token_info[$jj] },
+ [ $raw_tok, $lev, $tag, $tok_count ];
+ last if ( $tok eq '#' );
+ if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
+ $last_lev = $lev;
+ }
+ if ( !$is_monotonic ) { $all_monotonic = 0 }
+ }
+
+ my $rline_values = [];
+ for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
+ my ($line) = $rlines->[$jj];
+
+ my $rtokens = $line->get_rtokens();
+ my $i = -1;
+ my ( $lev_min, $lev_max );
+ my $token_pattern_max = "";
+ my %saw_level;
+ my @token_info;
+ my $is_monotonic = 1;
+
+ # find the index of the last token before the side comment
+ my $imax = @{$rtokens} - 2;
+ my $imax_true = $imax;
+
+ # If the entire group is monotonic, and the line ends in a comma list,
+ # walk it back to the first such comma. this will have the effect of
+ # making all trailing ragged comma lists match in the prune tree
+ # routine. these trailing comma lists can better be handled by later
+ # alignment rules.
+
+ # Treat fat commas the same as commas here by converting them to
+ # commas. This will improve the chance of aligning the leading parts
+ # of ragged lists.
+
+ my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
+ if ( $all_monotonic && $tok_end =~ /^,/ ) {
+ my $i = $imax - 1;
+ while ( $i >= 0
+ && fat_comma_to_comma( $rtokens->[$i] ) eq $tok_end )
+ {
+ $imax = $i;
+ $i--;
+ }
+ }
+
+ # make a first pass to find level range
+ my $last_lev;
+ foreach my $tok ( @{$rtokens} ) {
+ $i++;
+ last if ( $i > $imax );
+ last if ( $tok eq '#' );
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ @{ $all_token_info[$jj]->[$i] };
+
+ last if ( $tok eq '#' );
+ $token_pattern_max .= $tok;
+ $saw_level{$lev}++;
+ if ( !defined($lev_min) ) {
+ $lev_min = $lev;
+ $lev_max = $lev;
+ }
+ else {
+ if ( $lev < $lev_min ) { $lev_min = $lev; }
+ if ( $lev > $lev_max ) { $lev_max = $lev; }
+ if ( $lev < $last_lev ) { $is_monotonic = 0 }
+ }
+ $last_lev = $lev;
+ }
+
+ # handle no levels
+ my $rtoken_patterns = {};
+ my $rtoken_indexes = {};
+ my @levs = sort keys %saw_level;
+ if ( !defined($lev_min) ) {
+ $lev_min = -1;
+ $lev_max = -1;
+ $levs[0] = -1;
+ $rtoken_patterns->{$lev_min} = "";
+ $rtoken_indexes->{$lev_min} = [];
+ }
+
+ # handle one level
+ elsif ( $lev_max == $lev_min ) {
+ $rtoken_patterns->{$lev_max} = $token_pattern_max;
+ $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
+ }
+
+ # handle multiple levels
+ else {
+ $rtoken_patterns->{$lev_max} = $token_pattern_max;
+ $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
+
+ my $debug = 0;
+ my $lev_top = pop @levs; # alread did max level
+ my $itok = -1;
+ foreach my $tok ( @{$rtokens} ) {
+ $itok++;
+ last if ( $itok > $imax );
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ @{ $all_token_info[$jj]->[$itok] };
+ last if ( $raw_tok eq '#' );
+ foreach my $lev_test (@levs) {
+ next if ( $lev > $lev_test );
+ $rtoken_patterns->{$lev_test} .= $tok;
+ push @{ $rtoken_indexes->{$lev_test} }, $itok;
+ }
+ }
+ push @levs, $lev_top;
+ }
+
+ push @{$rline_values},
+ [
+ $lev_min, $lev_max, $rtoken_patterns, \@levs,
+ $rtoken_indexes, $is_monotonic, $imax_true, $imax,
+ ];
+
+ # debug
+ 0 && do {
+ local $" = ')(';
+ print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
+ foreach my $key ( sort keys %{$rtoken_patterns} ) {
+ print "$key => $rtoken_patterns->{$key}\n";
+ print "$key => @{$rtoken_indexes->{$key}}\n";
+ }
+ };
+ } ## end loop over lines
+ return ( $rline_values, $all_monotonic );
+}
+
+sub prune_alignment_tree {
+ my ($rlines) = @_;
+ my $jmax = @{$rlines} - 1;
+ return unless $jmax > 0;
+
+ # Vertical alignment in perltidy is done as an iterative process. The
+ # starting point is to mark all possible alignment tokens ('=', ',', '=>',
+ # etc) for vertical alignment. Then we have to delete all alignments
+ # which, if actually made, would detract from overall alignment. This
+ # is done in several phases of which this is one.
+
+ # In this routine we look at the alignments of a group of lines as a
+ # hierarchical tree. We will 'prune' the tree to limited depths if that
+ # will improve overall alignment at the lower depths.
+ # For each line we will be looking at its alignment patterns down to
+ # different fixed depths. For each depth, we include all lower depths and
+ # ignore all higher depths. We want to see if we can get alignment of a
+ # larger group of lines if we ignore alignments at some lower depth.
+ # Here is an # example:
+
+ # for (
+ # [ '$var', sub { join $_, "bar" }, 0, "bar" ],
+ # [ 'CONSTANT', sub { join "foo", "bar" }, 0, "bar" ],
+ # [ 'CONSTANT', sub { join "foo", "bar", 3 }, 1, "barfoo3" ],
+ # [ '$myvar', sub { my $var; join $var, "bar" }, 0, "bar" ],
+ # );
+
+ # In the above example, all lines have three commas at the lowest depth
+ # (zero), so if there were no other alignements, these lines would all
+ # align considering only the zero depth alignment token. But some lines
+ # have additional comma alignments at the next depth, so we need to decide
+ # if we should drop those to keep the top level alignments, or keep those
+ # for some additional low level alignments at the expense losing some top
+ # level alignments. In this case we will drop the deeper level commas to
+ # keep the entire collection aligned. But in some cases the decision could
+ # go the other way.
+
+ # The tree for this example at the zero depth has one node containing
+ # all four lines, since they are identical at zero level (three commas).
+ # At depth one, there are three 'children' nodes, namely:
+ # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
+ # - line 3, which has 2 commas at depth 1
+ # - line4, which has a ';' and a ',' at depth 1
+ # There are no deeper alignments in this example.
+ # so the tree structure for this example is:
+ #
+ # depth 0 depth 1 depth 2
+ # [lines 1-4] -- [line 1-2] - (empty)
+ # | [line 3] - (empty)
+ # | [line 4] - (empty)
+
+ # We can carry this to any depth, but it is not really useful to go below
+ # depth 2. To cleanly stop there, we will consider depth 2 to contain all
+ # alignments at depth >=2.
+
+ use constant EXPLAIN_PRUNE => 0;
+
+ ####################################################################
+ # Prune Tree Step 1. Start by scanning the lines and collecting info
+ ####################################################################
+
+ # Note that the caller had this info but we have to redo this now because
+ # alignment tokens may have been deleted.
+ my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
+
+ # If all the lines have levels which increase monotonically from left to
+ # right, then the sweep-left-to-right pass can do a better job of alignment
+ # than pruning, and without deleting alignments.
+ return if ($all_monotonic);
+
+ # Contents of $rline_values
+ # [
+ # $lev_min, $lev_max, $rtoken_patterns, \@levs,
+ # $rtoken_indexes, $is_monotonic, $imax_true, $imax,
+ # ];
+
+ # We can work to any depth, but there is little advantage to working
+ # to a a depth greater than 2
+ my $MAX_DEPTH = 2;
+
+ # This arrays will hold the tree of alignment tokens at different depths
+ # for these lines.
+ my @match_tree;
+
+ # Tree nodes contain these values:
+ # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
+ # $nc_beg_p, $nc_end_p, $rindexes];
+ # where
+ # $depth = 0,1,2 = index of depth of the match
+
+ # $jbeg beginning index j of the range of lines in this match
+ # $jend ending index j of the range of lines in this match
+ # $n_parent = index of the containing group at $depth-1, if it exists
+ # $level = actual level of code being matched in this group
+ # $pattern = alignment pattern being matched
+ # $nc_beg_p = first child
+ # $nc_end_p = last child
+ # $rindexes = ref to token indexes
+
+ # the patterns and levels of the current group being formed at each depth
+ my ( @token_patterns_current, @levels_current, @token_indexes_current );
+
+ # the patterns and levels of the next line being tested at each depth
+ my ( @token_patterns_next, @levels_next, @token_indexes_next );
+
+ #########################################################
+ # define a recursive worker subroutine for tree construction
+ #########################################################
+
+ # This is a recursive routine which is called if a match condition changes
+ # at any depth when a new line is encountered. It ends the match node
+ # which changed plus all deeper nodes attached to it.
+ my $end_node;
+ $end_node = sub {
+ my ( $depth, $jl, $n_parent ) = @_;
+
+ # $depth is the tree depth
+ # $jl is the index of the line
+ # $n_parent is index of the parent node of this node
+
+ return if ( $depth > $MAX_DEPTH );
+
+ # end any current group at this depth
+ if ( $jl >= 0
+ && defined( $match_tree[$depth] )
+ && @{ $match_tree[$depth] }
+ && defined( $levels_current[$depth] ) )
+ {
+ $match_tree[$depth]->[-1]->[1] = $jl;
+ }
+
+ # Define the index of the node we will create below
+ my $ng_self = 0;
+ if ( defined( $match_tree[$depth] ) ) {
+ $ng_self = @{ $match_tree[$depth] };
+ }
+
+ # end any next deeper child node(s)
+ $end_node->( $depth + 1, $jl, $ng_self );
+
+ # update the levels being matched
+ $token_patterns_current[$depth] = $token_patterns_next[$depth];
+ $token_indexes_current[$depth] = $token_indexes_next[$depth];
+ $levels_current[$depth] = $levels_next[$depth];
+
+ # Do not start a new group at this level if it is not being used
+ if ( !defined( $levels_next[$depth] )
+ || $depth > 0
+ && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
+ {
+ return;
+ }
+
+ # Create a node for the next group at this depth. We initially assume
+ # that it will continue to $jmax, and correct that later if the node
+ # ends earlier.
+ push @{ $match_tree[$depth] },
+ [
+ $jl + 1, $jmax, $n_parent, $levels_current[$depth],
+ $token_patterns_current[$depth],
+ undef, undef, $token_indexes_current[$depth],
+ ];
+
+ return;
+ }; ## end sub end_node
+
+ ######################################################
+ # Prune Tree Step 2. Loop to form the tree of matches.
+ ######################################################
+ for ( my $jp = 0 ; $jp <= $jmax ; $jp++ ) {
+
+ # working with two adjacent line indexes, 'm'=minus, 'p'=plus
+ my $jm = $jp - 1;
+
+ # Pull out needed values for the next line
+ my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
+ $is_monotonic, $imax_true, $imax )
+ = @{ $rline_values->[$jp] };
+
+ # Transfer levels and patterns for this line to the working arrays.
+ # If the number of levels differs from our chosen MAX_DEPTH ...
+ # if fewer than MAX_DEPTH: leave levels at missing depths undefined
+ # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
+ @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
+ if ( @{$rlevs} > $MAX_DEPTH ) {
+ $levels_next[$MAX_DEPTH] = $rlevs->[-1];
+ }
+ my $depth = 0;
+ foreach (@levels_next) {
+ $token_patterns_next[$depth] =
+ defined($_) ? $rtoken_patterns->{$_} : undef;
+ $token_indexes_next[$depth] =
+ defined($_) ? $rtoken_indexes->{$_} : undef;
+ $depth++;
+ }
+
+ # Look for a change in match groups...
+
+ # Initialize on the first line
+ if ( $jp == 0 ) {
+ my $n_parent;
+ $end_node->( 0, $jm, $n_parent );
+ }
+
+ # End groups if a hard flag has been set
+ elsif ( $rlines->[$jm]->get_end_group() ) {
+ my $n_parent;
+ $end_node->( 0, $jm, $n_parent );
+ }
+
+ # Continue at hanging side comment
+ elsif ( $rlines->[$jp]->get_is_hanging_side_comment() ) {
+ next;
+ }