1 package Perl::Tidy::VerticalAligner;
5 use English qw( -no_match_vars );
6 our $VERSION = '20230309';
7 use Perl::Tidy::VerticalAligner::Alignment;
8 use Perl::Tidy::VerticalAligner::Line;
10 use constant DEVEL_MODE => 0;
11 use constant EMPTY_STRING => q{};
12 use constant SPACE => q{ };
14 # The Perl::Tidy::VerticalAligner package collects output lines and
15 # attempts to line up certain common tokens, such as => and #, which are
16 # identified by the calling routine.
19 # - Initiate an object with a call to new().
20 # - Write lines one-by-one with calls to valign_input().
21 # - Make a final call to flush() to empty the pipeline.
23 # The sub valign_input collects lines into groups. When a group reaches
24 # the maximum possible size it is processed for alignment and output.
25 # The maximum group size is reached whenever there is a change in indentation
26 # level, a blank line, a block comment, or an external flush call. The calling
27 # routine may also force a break in alignment at any time.
29 # If the calling routine needs to interrupt the output and send other text to
30 # the output, it must first call flush() to empty the output pipeline. This
31 # might occur for example if a block of pod text needs to be sent to the output
32 # between blocks of code.
34 # It is essential that a final call to flush() be made. Otherwise some
35 # final lines of text will be lost.
38 # CODE SECTION 1: Preliminary code, global definitions and sub new
40 # CODE SECTION 2: Some Basic Utilities
41 # CODE SECTION 3: Code to accept input and form groups
43 # CODE SECTION 4: Code to process comment lines
44 # sub _flush_comment_lines
45 # CODE SECTION 5: Code to process groups of code lines
46 # sub _flush_group_lines
47 # CODE SECTION 6: Output Step A
48 # sub valign_output_step_A
49 # CODE SECTION 7: Output Step B
50 # sub valign_output_step_B
51 # CODE SECTION 8: Output Step C
52 # sub valign_output_step_C
53 # CODE SECTION 9: Output Step D
54 # sub valign_output_step_D
55 # CODE SECTION 10: Summary
56 # sub report_anything_unusual
58 ##################################################################
59 # CODE SECTION 1: Preliminary code, global definitions and sub new
60 ##################################################################
64 # Catch any undefined sub calls so that we are sure to get
65 # some diagnostic information. This sub should never be called
66 # except for a programming error.
68 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
69 my ( $pkg, $fname, $lno ) = caller();
70 my $my_package = __PACKAGE__;
72 ======================================================================
73 Error detected in package '$my_package', version $VERSION
74 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
75 Called from package: '$pkg'
76 Called from File '$fname' at line '$lno'
77 This error is probably due to a recent programming change
78 ======================================================================
85 # required to avoid call to AUTOLOAD in some versions of perl
90 Perl::Tidy::Die($msg);
91 croak "unexpected return from Perl::Tidy::Die";
97 # This routine is called for errors that really should not occur
98 # except if there has been a bug introduced by a recent program change.
99 # Please add comments at calls to Fault to explain why the call
100 # should not occur, and where to look to fix it.
101 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
102 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
103 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
104 my $pkg = __PACKAGE__;
106 my $input_stream_name = get_input_stream_name();
109 ==============================================================================
110 While operating on input stream with name: '$input_stream_name'
111 A fault was detected at line $line0 of sub '$subroutine1'
113 which was called from line $line1 of sub '$subroutine2'
115 This is probably an error introduced by a recent programming change.
116 $pkg reports VERSION='$VERSION'.
117 ==============================================================================
120 # We shouldn't get here, but this return is to keep Perl-Critic from
129 # define valid keys in a line object
141 is_hanging_side_comment
143 rvertical_tightness_flags
156 @valid_LINE_keys{@q} = (1) x scalar(@q);
161 # Define the fixed indexes for variables in $self, which is an array
162 # reference. Note the convention of leading and trailing underscores to
164 # Do not combine with other BEGIN blocks (c101).
167 _file_writer_object_ => $i++,
168 _logger_object_ => $i++,
169 _diagnostics_object_ => $i++,
170 _length_function_ => $i++,
173 _rOpts_indent_columns_ => $i++,
174 _rOpts_tabs_ => $i++,
175 _rOpts_entab_leading_whitespace_ => $i++,
176 _rOpts_fixed_position_side_comment_ => $i++,
177 _rOpts_minimum_space_to_comment_ => $i++,
178 _rOpts_valign_code_ => $i++,
179 _rOpts_valign_block_comments_ => $i++,
180 _rOpts_valign_side_comments_ => $i++,
182 _last_level_written_ => $i++,
183 _last_side_comment_column_ => $i++,
184 _last_side_comment_line_number_ => $i++,
185 _last_side_comment_length_ => $i++,
186 _last_side_comment_level_ => $i++,
187 _outdented_line_count_ => $i++,
188 _first_outdented_line_at_ => $i++,
189 _last_outdented_line_at_ => $i++,
190 _consecutive_block_comments_ => $i++,
192 _rgroup_lines_ => $i++,
193 _group_level_ => $i++,
194 _group_type_ => $i++,
195 _group_maximum_line_length_ => $i++,
196 _zero_count_ => $i++,
197 _last_leading_space_count_ => $i++,
198 _comment_leading_space_count_ => $i++,
201 # Debug flag. This is a relic from the original program development
202 # looking for problems with tab characters. Caution: this debug flag can
203 # produce a lot of output It should be 0 except when debugging small
206 use constant DEBUG_TABS => 0;
208 my $debug_warning = sub {
209 print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
213 DEBUG_TABS && $debug_warning->('TABS');
219 %valign_control_hash,
220 $valign_control_default,
226 # This routine is called to check the user-supplied run parameters
227 # and to configure the control hashes to them.
230 # All alignments are done by default
231 %valign_control_hash = ();
232 $valign_control_default = 1;
234 # If -vil=s is entered without -vxl, assume -vxl='*'
235 if ( !$rOpts->{'valign-exclusion-list'}
236 && $rOpts->{'valign-inclusion-list'} )
238 $rOpts->{'valign-exclusion-list'} = '*';
241 # See if the user wants to exclude any alignment types ...
242 if ( $rOpts->{'valign-exclusion-list'} ) {
244 # The inclusion list is only relevant if there is an exclusion list
245 if ( $rOpts->{'valign-inclusion-list'} ) {
246 my @vil = split /\s+/, $rOpts->{'valign-inclusion-list'};
247 @valign_control_hash{@vil} = (1) x scalar(@vil);
250 # Note that the -vxl list is done after -vil, so -vxl has priority
251 # in the event of duplicate entries.
252 my @vxl = split /\s+/, $rOpts->{'valign-exclusion-list'};
253 @valign_control_hash{@vxl} = (0) x scalar(@vxl);
255 # Optimization: revert to defaults if no exclusions.
256 # This could happen with -vxl=' ' and any -vil list
258 %valign_control_hash = ();
261 # '$valign_control_default' applies to types not in the hash:
262 # - If a '*' was entered then set it to be that default type
263 # - Otherwise, leave it set it to 1
264 if ( defined( $valign_control_hash{'*'} ) ) {
265 $valign_control_default = $valign_control_hash{'*'};
268 # Side comments are controlled separately and must be removed
269 # if given in a list.
270 if (%valign_control_hash) {
271 $valign_control_hash{'#'} = 1;
276 } ## end sub check_options
279 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
281 # Check the keys of a hash:
282 # $rtest = ref to hash to test
283 # $rvalid = ref to hash with valid keys
285 # $msg = a message to write in case of error
286 # $exact_match defines the type of check:
287 # = false: test hash must not have unknown key
288 # = true: test hash must have exactly same keys as known hash
290 grep { !exists $rvalid->{$_} } keys %{$rtest};
292 grep { !exists $rtest->{$_} } keys %{$rvalid};
293 my $error = @unknown_keys;
294 if ($exact_match) { $error ||= @missing_keys }
296 local $LIST_SEPARATOR = ')(';
297 my @expected_keys = sort keys %{$rvalid};
298 @unknown_keys = sort @unknown_keys;
300 ------------------------------------------------------------------------
301 Program error detected checking hash keys
303 Expected keys: (@expected_keys)
304 Unknown key(s): (@unknown_keys)
305 Missing key(s): (@missing_keys)
306 ------------------------------------------------------------------------
310 } ## end sub check_keys
314 my ( $class, @args ) = @_;
318 file_writer_object => undef,
319 logger_object => undef,
320 diagnostics_object => undef,
321 length_function => sub { return length( $_[0] ) },
323 my %args = ( %defaults, @args );
325 # Initialize other caches and buffers
326 initialize_step_B_cache();
327 initialize_valign_buffer();
328 initialize_leading_string_cache();
330 set_logger_object( $args{logger_object} );
332 # Initialize all variables in $self.
333 # To add an item to $self, first define a new constant index in the BEGIN
338 $self->[_file_writer_object_] = $args{file_writer_object};
339 $self->[_logger_object_] = $args{logger_object};
340 $self->[_diagnostics_object_] = $args{diagnostics_object};
341 $self->[_length_function_] = $args{length_function};
343 # shortcuts to user options
344 my $rOpts = $args{rOpts};
346 $self->[_rOpts_] = $rOpts;
347 $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
348 $self->[_rOpts_tabs_] = $rOpts->{'tabs'};
349 $self->[_rOpts_entab_leading_whitespace_] =
350 $rOpts->{'entab-leading-whitespace'};
351 $self->[_rOpts_fixed_position_side_comment_] =
352 $rOpts->{'fixed-position-side-comment'};
353 $self->[_rOpts_minimum_space_to_comment_] =
354 $rOpts->{'minimum-space-to-comment'};
355 $self->[_rOpts_valign_code_] = $rOpts->{'valign-code'};
356 $self->[_rOpts_valign_block_comments_] = $rOpts->{'valign-block-comments'};
357 $self->[_rOpts_valign_side_comments_] = $rOpts->{'valign-side-comments'};
359 # Batch of lines being collected
360 $self->[_rgroup_lines_] = [];
361 $self->[_group_level_] = 0;
362 $self->[_group_type_] = EMPTY_STRING;
363 $self->[_group_maximum_line_length_] = undef;
364 $self->[_zero_count_] = 0;
365 $self->[_comment_leading_space_count_] = 0;
366 $self->[_last_leading_space_count_] = 0;
368 # Memory of what has been processed
369 $self->[_last_level_written_] = -1;
370 $self->[_last_side_comment_column_] = 0;
371 $self->[_last_side_comment_line_number_] = 0;
372 $self->[_last_side_comment_length_] = 0;
373 $self->[_last_side_comment_level_] = -1;
374 $self->[_outdented_line_count_] = 0;
375 $self->[_first_outdented_line_at_] = 0;
376 $self->[_last_outdented_line_at_] = 0;
377 $self->[_consecutive_block_comments_] = 0;
383 #################################
384 # CODE SECTION 2: Basic Utilities
385 #################################
389 # flush() is the external call to completely empty the pipeline.
392 # push things out the pipeline...
394 # push out any current group lines
395 $self->_flush_group_lines();
397 # then anything left in the cache of step_B
398 $self->_flush_step_B_cache();
400 # then anything left in the buffer of step_C
401 $self->dump_valign_buffer();
406 sub initialize_for_new_group {
409 $self->[_rgroup_lines_] = [];
410 $self->[_group_type_] = EMPTY_STRING;
411 $self->[_zero_count_] = 0;
412 $self->[_comment_leading_space_count_] = 0;
413 $self->[_last_leading_space_count_] = 0;
414 $self->[_group_maximum_line_length_] = undef;
416 # Note that the value for _group_level_ is
417 # handled separately in sub valign_input
419 } ## end sub initialize_for_new_group
421 sub group_line_count {
422 return +@{ $_[0]->[_rgroup_lines_] };
425 # interface to Perl::Tidy::Diagnostics routines
426 # For debugging; not currently used
427 sub write_diagnostics {
428 my ( $self, $msg ) = @_;
429 my $diagnostics_object = $self->[_diagnostics_object_];
430 if ($diagnostics_object) {
431 $diagnostics_object->write_diagnostics($msg);
434 } ## end sub write_diagnostics
436 { ## begin closure for logger routines
439 # Called once per file to initialize the logger object
440 sub set_logger_object {
441 $logger_object = shift;
445 sub get_logger_object {
446 return $logger_object;
449 sub get_input_stream_name {
450 my $input_stream_name = EMPTY_STRING;
451 if ($logger_object) {
452 $input_stream_name = $logger_object->get_input_stream_name();
454 return $input_stream_name;
455 } ## end sub get_input_stream_name
459 if ($logger_object) {
460 $logger_object->warning($msg);
465 sub write_logfile_entry {
467 if ($logger_object) {
468 $logger_object->write_logfile_entry($msg);
471 } ## end sub write_logfile_entry
474 sub get_cached_line_count {
476 return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
479 sub get_recoverable_spaces {
481 # return the number of spaces (+ means shift right, - means shift left)
482 # that we would like to shift a group of lines with the same indentation
483 # to get them to line up with their opening parens
484 my $indentation = shift;
485 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
486 } ## end sub get_recoverable_spaces
488 ######################################################
489 # CODE SECTION 3: Code to accept input and form groups
490 ######################################################
492 use constant DEBUG_VALIGN => 0;
493 use constant SC_LONG_LINE_DIFF => 12;
495 my %is_closing_token;
499 @is_closing_token{@q} = (1) x scalar(@q);
502 #--------------------------------------------
503 # VTFLAGS: Vertical tightness types and flags
504 #--------------------------------------------
505 # Vertical tightness is controlled by a 'type' and associated 'flags' for each
506 # line. These values are set by sub Formatter::set_vertical_tightness_flags.
507 # These are defined as follows:
509 # Vertical Tightness Line Type Codes:
510 # Type 0, no vertical tightness condition
511 # Type 1, last token of this line is a non-block opening token
512 # Type 2, first token of next line is a non-block closing
513 # Type 3, isolated opening block brace
514 # type 4, isolated closing block brace
516 # Opening token flag values are the vertical tightness flags
517 # 0 do not join with next line
518 # 1 just one join per line
519 # 2 any number of joins
521 # Closing token flag values indicate spacing:
522 # 0 = no space added before closing token
523 # 1 = single space added before closing token
527 #---------------------------------------------------------------------
528 # This is the front door of the vertical aligner. On each call
529 # we receive one line of specially marked text for vertical alignment.
530 # We compare the line with the current group, and either:
531 # - the line joins the current group if alignments match, or
532 # - the current group is flushed and a new group is started otherwise
533 #---------------------------------------------------------------------
535 # The key input parameters describing each line are:
536 # $level = indentation level of this line
537 # $rfields = ref to array of fields
538 # $rpatterns = ref to array of patterns, one per field
539 # $rtokens = ref to array of tokens starting fields 1,2,..
540 # $rfield_lengths = ref to array of field display widths
542 # Here is an example of what this package does. In this example,
543 # we are trying to line up both the '=>' and the '#'.
545 # '18' => 'grave', # \`
546 # '19' => 'acute', # `'
547 # '20' => 'caron', # \v
548 # <-tabs-><f1-><--field 2 ---><-f3->
551 # col1 col2 col3 col4
553 # The calling routine has already broken the entire line into 3 fields as
554 # indicated. (So the work of identifying promising common tokens has
555 # already been done).
557 # In this example, there will be 2 tokens being matched: '=>' and '#'.
558 # They are the leading parts of fields 2 and 3, but we do need to know
559 # what they are so that we can dump a group of lines when these tokens
562 # The fields contain the actual characters of each field. The patterns
563 # are like the fields, but they contain mainly token types instead
564 # of tokens, so they have fewer characters. They are used to be
565 # sure we are matching fields of similar type.
567 # In this example, there will be 4 column indexes being adjusted. The
568 # first one is always at zero. The interior columns are at the start of
569 # the matching tokens, and the last one tracks the maximum line length.
571 # Each time a new line comes in, it joins the current vertical
572 # group if possible. Otherwise it causes the current group to be flushed
573 # and a new group is started.
575 # For each new group member, the column locations are increased, as
576 # necessary, to make room for the new fields. When the group is finally
577 # output, these column numbers are used to compute the amount of spaces of
578 # padding needed for each field.
580 # Programming note: the fields are assumed not to have any tab characters.
581 # Tabs have been previously removed except for tabs in quoted strings and
582 # side comments. Tabs in these fields can mess up the column counting.
583 # The log file warns the user if there are any such tabs.
585 my ( $self, $rcall_hash ) = @_;
587 # Unpack the call args. This form is significantly faster than getting them
592 $break_alignment_after,
593 $break_alignment_before,
595 $forget_side_comment,
597 $is_terminal_ternary,
601 $maximum_line_length,
604 $rvertical_tightness_flags,
611 break_alignment_after
612 break_alignment_before
623 rvertical_tightness_flags
627 my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
630 # The index '$Kend' is a value which passed along with the line text to sub
631 # 'write_code_line' for a convergence check.
633 # number of fields is $jmax
634 # number of tokens between fields is $jmax-1
635 my $jmax = @{$rfields} - 1;
637 my $leading_space_count =
638 ref($indentation) ? $indentation->get_spaces() : $indentation;
640 # set outdented flag to be sure we either align within statements or
641 # across statement boundaries, but not both.
643 $self->[_last_leading_space_count_] > $leading_space_count;
644 $self->[_last_leading_space_count_] = $leading_space_count;
646 # Identify a hanging side comment. Hanging side comments have an empty
648 my $is_hanging_side_comment =
649 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
651 # Undo outdented flag for a hanging side comment
652 $is_outdented = 0 if $is_hanging_side_comment;
654 # Identify a block comment.
655 my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
657 # Block comment .. update count
658 if ($is_block_comment) {
659 $self->[_consecutive_block_comments_]++;
662 # Not a block comment ..
663 # Forget side comment column if we saw 2 or more block comments,
664 # and reset the count
667 if ( $self->[_consecutive_block_comments_] > 1 ) {
668 $self->forget_side_comment();
670 $self->[_consecutive_block_comments_] = 0;
673 # Reset side comment location if we are entering a new block from level 0.
674 # This is intended to keep them from drifting too far to the right.
675 if ($forget_side_comment) {
676 $self->forget_side_comment();
679 my $is_balanced_line = $level_end == $level;
681 my $group_level = $self->[_group_level_];
682 my $group_maximum_line_length = $self->[_group_maximum_line_length_];
685 my $nlines = $self->group_line_count();
687 "Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level=$level, group_level=$group_level, level_end=$level_end\n";
690 # Validate cached line if necessary: If we can produce a container
691 # with just 2 lines total by combining an existing cached opening
692 # token with the closing token to follow, then we will mark both
693 # cached flags as valid.
694 my $cached_line_type = get_cached_line_type();
695 if ($cached_line_type) {
696 my $cached_line_opening_flag = get_cached_line_opening_flag();
697 if ($rvertical_tightness_flags) {
698 my $cached_seqno = get_cached_seqno();
700 && $rvertical_tightness_flags->{_vt_seqno}
701 && $rvertical_tightness_flags->{_vt_seqno} == $cached_seqno )
704 # Fix for b1187 and b1188: Normally this step is only done
705 # if the number of existing lines is 0 or 1. But to prevent
706 # blinking, this range can be controlled by the caller.
707 # If zero values are given we fall back on the range 0 to 1.
708 my $line_count = $self->group_line_count();
709 my $min_lines = $rvertical_tightness_flags->{_vt_min_lines};
710 my $max_lines = $rvertical_tightness_flags->{_vt_max_lines};
711 $min_lines = 0 unless ($min_lines);
712 $max_lines = 1 unless ($max_lines);
713 if ( ( $line_count >= $min_lines )
714 && ( $line_count <= $max_lines ) )
716 $rvertical_tightness_flags->{_vt_valid_flag} ||= 1;
717 set_cached_line_valid(1);
722 # do not join an opening block brace (type 3, see VTFLAGS)
723 # with an unbalanced line unless requested with a flag value of 2
724 if ( $cached_line_type == 3
725 && !$self->group_line_count()
726 && $cached_line_opening_flag < 2
727 && !$is_balanced_line )
729 set_cached_line_valid(0);
734 if ( $level < 0 ) { $level = 0 }
736 # do not align code across indentation level changes
737 # or changes in the maximum line length
738 # or if vertical alignment is turned off
740 $level != $group_level
741 || ( $group_maximum_line_length
742 && $maximum_line_length != $group_maximum_line_length )
744 || ( $is_block_comment && !$self->[_rOpts_valign_block_comments_] )
745 || ( !$is_block_comment
746 && !$self->[_rOpts_valign_side_comments_]
747 && !$self->[_rOpts_valign_code_] )
751 $self->_flush_group_lines( $level - $group_level );
753 $group_level = $level;
754 $self->[_group_level_] = $group_level;
755 $self->[_group_maximum_line_length_] = $maximum_line_length;
757 # Update leading spaces after the above flush because the leading space
758 # count may have been changed if the -icp flag is in effect
759 $leading_space_count =
760 ref($indentation) ? $indentation->get_spaces() : $indentation;
763 # --------------------------------------------------------------------
764 # Collect outdentable block COMMENTS
765 # --------------------------------------------------------------------
766 if ( $self->[_group_type_] eq 'COMMENT' ) {
767 if ( $is_block_comment
768 && $outdent_long_lines
769 && $leading_space_count == $self->[_comment_leading_space_count_] )
772 # Note that for a comment group we are not storing a line
773 # but rather just the text and its length.
774 push @{ $self->[_rgroup_lines_] },
775 [ $rfields->[0], $rfield_lengths->[0], $Kend ];
779 $self->_flush_group_lines();
783 my $rgroup_lines = $self->[_rgroup_lines_];
784 if ( $break_alignment_before && @{$rgroup_lines} ) {
785 $rgroup_lines->[-1]->{'end_group'} = 1;
788 # --------------------------------------------------------------------
789 # add dummy fields for terminal ternary
790 # --------------------------------------------------------------------
791 my $j_terminal_match;
793 if ( $is_terminal_ternary && @{$rgroup_lines} ) {
795 fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
796 $rpatterns, $rfield_lengths, $group_level, );
797 $jmax = @{$rfields} - 1;
800 # --------------------------------------------------------------------
801 # add dummy fields for else statement
802 # --------------------------------------------------------------------
804 # Note the trailing space after 'else' here. If there were no space between
805 # the else and the next '{' then we would not be able to do vertical
806 # alignment of the '{'.
807 if ( $rfields->[0] eq 'else '
809 && $is_balanced_line )
813 fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
814 $rpatterns, $rfield_lengths );
815 $jmax = @{$rfields} - 1;
818 # --------------------------------------------------------------------
819 # Handle simple line of code with no fields to match.
820 # --------------------------------------------------------------------
822 $self->[_zero_count_]++;
824 if ( @{$rgroup_lines}
825 && !get_recoverable_spaces( $rgroup_lines->[0]->{'indentation'} ) )
828 # flush the current group if it has some aligned columns..
829 # or we haven't seen a comment lately
830 if ( $rgroup_lines->[0]->{'jmax'} > 1
831 || $self->[_zero_count_] > 3 )
833 $self->_flush_group_lines();
835 # Update '$rgroup_lines' - it will become a ref to empty array.
836 # This allows avoiding a call to get_group_line_count below.
837 $rgroup_lines = $self->[_rgroup_lines_];
841 # start new COMMENT group if this comment may be outdented
842 if ( $is_block_comment
843 && $outdent_long_lines
844 && !@{$rgroup_lines} )
846 $self->[_group_type_] = 'COMMENT';
847 $self->[_comment_leading_space_count_] = $leading_space_count;
848 $self->[_group_maximum_line_length_] = $maximum_line_length;
849 push @{$rgroup_lines},
850 [ $rfields->[0], $rfield_lengths->[0], $Kend ];
854 # just write this line directly if no current group, no side comment,
855 # and no space recovery is needed.
856 if ( !@{$rgroup_lines}
857 && !get_recoverable_spaces($indentation) )
860 $self->valign_output_step_B(
862 leading_space_count => $leading_space_count,
863 line => $rfields->[0],
864 line_length => $rfield_lengths->[0],
865 side_comment_length => 0,
866 outdent_long_lines => $outdent_long_lines,
867 rvertical_tightness_flags => $rvertical_tightness_flags,
869 level_end => $level_end,
871 maximum_line_length => $maximum_line_length,
878 $self->[_zero_count_] = 0;
881 # --------------------------------------------------------------------
882 # It simplifies things to create a zero length side comment
884 # --------------------------------------------------------------------
885 if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
887 $rtokens->[ $jmax - 1 ] = '#';
888 $rfields->[$jmax] = EMPTY_STRING;
889 $rfield_lengths->[$jmax] = 0;
890 $rpatterns->[$jmax] = '#';
893 # --------------------------------------------------------------------
894 # create an object to hold this line
895 # --------------------------------------------------------------------
897 # The hash keys below must match the list of keys in %valid_LINE_keys.
898 # Values in this hash are accessed directly, except for 'ralignments',
899 # rather than with get/set calls for efficiency.
900 my $new_line = Perl::Tidy::VerticalAligner::Line->new(
905 rpatterns => $rpatterns,
906 rfield_lengths => $rfield_lengths,
907 indentation => $indentation,
908 leading_space_count => $leading_space_count,
909 outdent_long_lines => $outdent_long_lines,
910 list_seqno => $list_seqno,
911 list_type => EMPTY_STRING,
912 is_hanging_side_comment => $is_hanging_side_comment,
913 rvertical_tightness_flags => $rvertical_tightness_flags,
914 is_terminal_ternary => $is_terminal_ternary,
915 j_terminal_match => $j_terminal_match,
916 end_group => $break_alignment_after,
918 ci_level => $ci_level,
920 level_end => $level_end,
922 maximum_line_length => $maximum_line_length,
929 && check_keys( $new_line, \%valid_LINE_keys,
930 "Checking line keys at line definition", 1 );
932 # --------------------------------------------------------------------
933 # Decide if this is a simple list of items.
934 # We use this to be less restrictive in deciding what to align.
935 # --------------------------------------------------------------------
936 decide_if_list($new_line) if ($list_seqno);
938 # --------------------------------------------------------------------
939 # Append this line to the current group (or start new group)
940 # --------------------------------------------------------------------
942 push @{ $self->[_rgroup_lines_] }, $new_line;
943 $self->[_group_maximum_line_length_] = $maximum_line_length;
945 # output this group if it ends in a terminal else or ternary line
946 if ( defined($j_terminal_match) ) {
947 $self->_flush_group_lines();
950 # Force break after jump to lower level
951 elsif ($level_end < $level
952 || $is_closing_token{ substr( $rfields->[0], 0, 1 ) } )
954 $self->_flush_group_lines(-1);
957 # --------------------------------------------------------------------
958 # Some old debugging stuff
959 # --------------------------------------------------------------------
961 print STDOUT "exiting valign_input fields:";
962 dump_array( @{$rfields} );
963 print STDOUT "exiting valign_input tokens:";
964 dump_array( @{$rtokens} );
965 print STDOUT "exiting valign_input patterns:";
966 dump_array( @{$rpatterns} );
970 } ## end sub valign_input
972 sub join_hanging_comment {
974 # Add dummy fields to a hanging side comment to make it look
975 # like the first line in its potential group. This simplifies
977 my ( $new_line, $old_line ) = @_;
979 my $jmax = $new_line->{'jmax'};
982 return 0 unless $jmax == 1;
983 my $rtokens = $new_line->{'rtokens'};
985 # the second field must be a comment
986 return 0 unless $rtokens->[0] eq '#';
987 my $rfields = $new_line->{'rfields'};
989 # the first field must be empty
990 return 0 unless $rfields->[0] =~ /^\s*$/;
992 # the current line must have fewer fields
993 my $maximum_field_index = $old_line->{'jmax'};
995 unless $maximum_field_index > $jmax;
998 my $rpatterns = $new_line->{'rpatterns'};
999 my $rfield_lengths = $new_line->{'rfield_lengths'};
1001 $new_line->{'is_hanging_side_comment'} = 1;
1003 $jmax = $maximum_field_index;
1004 $new_line->{'jmax'} = $jmax;
1005 $rfields->[$jmax] = $rfields->[1];
1006 $rfield_lengths->[$jmax] = $rfield_lengths->[1];
1007 $rtokens->[ $jmax - 1 ] = $rtokens->[0];
1008 $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
1010 foreach my $j ( 1 .. $jmax - 1 ) {
1011 $rfields->[$j] = EMPTY_STRING;
1012 $rfield_lengths->[$j] = 0;
1013 $rtokens->[ $j - 1 ] = EMPTY_STRING;
1014 $rpatterns->[ $j - 1 ] = EMPTY_STRING;
1017 } ## end sub join_hanging_comment
1019 { ## closure for sub decide_if_list
1027 @is_comma_token{@q} = (1) x scalar(@q);
1030 sub decide_if_list {
1034 # A list will be taken to be a line with a forced break in which all
1035 # of the field separators are commas or comma-arrows (except for the
1038 my $rtokens = $line->{'rtokens'};
1039 my $test_token = $rtokens->[0];
1040 my ( $raw_tok, $lev, $tag, $tok_count ) =
1041 decode_alignment_token($test_token);
1042 if ( $is_comma_token{$raw_tok} ) {
1043 my $list_type = $test_token;
1044 my $jmax = $line->{'jmax'};
1046 foreach ( 1 .. $jmax - 2 ) {
1047 ( $raw_tok, $lev, $tag, $tok_count ) =
1048 decode_alignment_token( $rtokens->[$_] );
1049 if ( !$is_comma_token{$raw_tok} ) {
1050 $list_type = EMPTY_STRING;
1054 $line->{'list_type'} = $list_type;
1057 } ## end sub decide_if_list
1060 sub fix_terminal_ternary {
1062 # Add empty fields as necessary to align a ternary term
1071 # returns the index of the terminal question token, if any
1073 my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
1077 return unless ($old_line);
1078 use constant EXPLAIN_TERNARY => 0;
1080 if (%valign_control_hash) {
1081 my $align_ok = $valign_control_hash{'?'};
1082 $align_ok = $valign_control_default unless defined($align_ok);
1083 return unless ($align_ok);
1086 my $jmax = @{$rfields} - 1;
1087 my $rfields_old = $old_line->{'rfields'};
1089 my $rpatterns_old = $old_line->{'rpatterns'};
1090 my $rtokens_old = $old_line->{'rtokens'};
1091 my $maximum_field_index = $old_line->{'jmax'};
1093 # look for the question mark after the :
1096 my $pad = EMPTY_STRING;
1098 foreach my $j ( 0 .. $maximum_field_index - 1 ) {
1099 my $tok = $rtokens_old->[$j];
1100 my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
1101 if ( $raw_tok eq '?' ) {
1102 $depth_question = $lev;
1104 # depth must be correct
1105 next unless ( $depth_question eq $group_level );
1108 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
1109 $pad_length = length($1);
1110 $pad = SPACE x $pad_length;
1113 return; # shouldn't happen
1118 return unless ( defined($jquestion) ); # shouldn't happen
1120 # Now splice the tokens and patterns of the previous line
1121 # into the else line to insure a match. Add empty fields
1123 my $jadd = $jquestion;
1125 # Work on copies of the actual arrays in case we have
1126 # to return due to an error
1127 my @fields = @{$rfields};
1128 my @patterns = @{$rpatterns};
1129 my @tokens = @{$rtokens};
1130 my @field_lengths = @{$rfield_lengths};
1132 EXPLAIN_TERNARY && do {
1133 local $LIST_SEPARATOR = '><';
1134 print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
1135 print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
1136 print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
1137 print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
1138 print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
1139 print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
1142 # handle cases of leading colon on this line
1143 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
1145 my ( $colon, $therest ) = ( $1, $2 );
1147 # Handle sub-case of first field with leading colon plus additional code
1148 # This is the usual situation as at the '1' below:
1154 # Split the first field after the leading colon and insert padding.
1155 # Note that this padding will remain even if the terminal value goes
1156 # out on a separate line. This does not seem to look to bad, so no
1157 # mechanism has been included to undo it.
1158 my $field1 = shift @fields;
1159 my $field_length1 = shift @field_lengths;
1160 my $len_colon = length($colon);
1161 unshift @fields, ( $colon, $pad . $therest );
1162 unshift @field_lengths,
1163 ( $len_colon, $pad_length + $field_length1 - $len_colon );
1165 # change the leading pattern from : to ?
1166 return unless ( $patterns[0] =~ s/^\:/?/ );
1168 # install leading tokens and patterns of existing line
1169 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
1170 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
1172 # insert appropriate number of empty fields
1173 splice( @fields, 1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1174 splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
1177 # handle sub-case of first field just equal to leading colon.
1178 # This can happen for example in the example below where
1179 # the leading '(' would create a new alignment token
1180 # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
1181 # : ( $mname = $name . '->' );
1184 return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
1186 # prepend a leading ? onto the second pattern
1187 $patterns[1] = "?b" . $patterns[1];
1189 # pad the second field
1190 $fields[1] = $pad . $fields[1];
1191 $field_lengths[1] = $pad_length + $field_lengths[1];
1193 # install leading tokens and patterns of existing line, replacing
1194 # leading token and inserting appropriate number of empty fields
1195 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
1196 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
1197 splice( @fields, 1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1198 splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
1202 # Handle case of no leading colon on this line. This will
1203 # be the case when -wba=':' is used. For example,
1208 # install leading tokens and patterns of existing line
1209 $patterns[0] = '?' . 'b' . $patterns[0];
1210 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
1211 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
1213 # insert appropriate number of empty fields
1214 $jadd = $jquestion + 1;
1215 $fields[0] = $pad . $fields[0];
1216 $field_lengths[0] = $pad_length + $field_lengths[0];
1217 splice( @fields, 0, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1218 splice( @field_lengths, 0, 0, (0) x $jadd ) if $jadd;
1221 EXPLAIN_TERNARY && do {
1222 local $LIST_SEPARATOR = '><';
1223 print STDOUT "MODIFIED TOKENS=<@tokens>\n";
1224 print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
1225 print STDOUT "MODIFIED FIELDS=<@fields>\n";
1228 # all ok .. update the arrays
1229 @{$rfields} = @fields;
1230 @{$rtokens} = @tokens;
1231 @{$rpatterns} = @patterns;
1232 @{$rfield_lengths} = @field_lengths;
1234 # force a flush after this line
1236 } ## end sub fix_terminal_ternary
1238 sub fix_terminal_else {
1240 # Add empty fields as necessary to align a balanced terminal
1241 # else block to a previous if/elsif/unless block,
1244 # if ( 1 || $x ) { print "ok 13\n"; }
1245 # else { print "not ok 13\n"; }
1247 # returns a positive value if the else block should be indented
1249 my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
1251 return unless ($old_line);
1252 my $jmax = @{$rfields} - 1;
1253 return unless ( $jmax > 0 );
1255 if (%valign_control_hash) {
1256 my $align_ok = $valign_control_hash{'{'};
1257 $align_ok = $valign_control_default unless defined($align_ok);
1258 return unless ($align_ok);
1261 # check for balanced else block following if/elsif/unless
1262 my $rfields_old = $old_line->{'rfields'};
1264 # TBD: add handling for 'case'
1265 return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
1267 # look for the opening brace after the else, and extract the depth
1268 my $tok_brace = $rtokens->[0];
1270 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
1272 # probably: "else # side_comment"
1275 my $rpatterns_old = $old_line->{'rpatterns'};
1276 my $rtokens_old = $old_line->{'rtokens'};
1277 my $maximum_field_index = $old_line->{'jmax'};
1279 # be sure the previous if/elsif is followed by an opening paren
1281 my $tok_paren = '(' . $depth_brace;
1282 my $tok_test = $rtokens_old->[$jparen];
1283 return unless ( $tok_test eq $tok_paren ); # shouldn't happen
1285 # Now find the opening block brace
1287 foreach my $j ( 1 .. $maximum_field_index - 1 ) {
1288 my $tok = $rtokens_old->[$j];
1289 if ( $tok eq $tok_brace ) {
1294 return unless ( defined($jbrace) ); # shouldn't happen
1296 # Now splice the tokens and patterns of the previous line
1297 # into the else line to insure a match. Add empty fields
1299 my $jadd = $jbrace - $jparen;
1300 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
1301 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
1302 splice( @{$rfields}, 1, 0, (EMPTY_STRING) x $jadd );
1303 splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
1305 # force a flush after this line if it does not follow a case
1306 if ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
1307 else { return $jbrace }
1308 } ## end sub fix_terminal_else
1310 my %is_closing_block_type;
1314 @is_closing_block_type{@q} = (1) x scalar(@q);
1317 # This is a flag for testing alignment by sub sweep_left_to_right only.
1318 # This test can help find problems with the alignment logic.
1319 # This flag should normally be zero.
1320 use constant TEST_SWEEP_ONLY => 0;
1322 use constant EXPLAIN_CHECK_MATCH => 0;
1326 # See if the current line matches the current vertical alignment group.
1328 my ( $self, $new_line, $base_line, $prev_line ) = @_;
1331 # $new_line = the line being considered for group inclusion
1332 # $base_line = the first line of the current group
1333 # $prev_line = the line just before $new_line
1335 # returns a flag and a value as follows:
1336 # return (0, $imax_align) if the line does not match
1337 # return (1, $imax_align) if the line matches but does not fit
1338 # return (2, $imax_align) if the line matches and fits
1340 use constant NO_MATCH => 0;
1341 use constant MATCH_NO_FIT => 1;
1342 use constant MATCH_AND_FIT => 2;
1346 # Returns '$imax_align' which is the index of the maximum matching token.
1347 # It will be used in the subsequent left-to-right sweep to align as many
1348 # tokens as possible for lines which partially match.
1349 my $imax_align = -1;
1351 # variable $GoToMsg explains reason for no match, for debugging
1352 my $GoToMsg = EMPTY_STRING;
1354 my $jmax = $new_line->{'jmax'};
1355 my $maximum_field_index = $base_line->{'jmax'};
1357 my $jlimit = $jmax - 2;
1358 if ( $jmax > $maximum_field_index ) {
1359 $jlimit = $maximum_field_index - 2;
1362 if ( $new_line->{'is_hanging_side_comment'} ) {
1364 # HSC's can join the group if they fit
1370 # A group with hanging side comments ends with the first non hanging
1372 if ( $base_line->{'is_hanging_side_comment'} ) {
1373 $GoToMsg = "end of hanging side comments";
1374 $return_value = NO_MATCH;
1378 # The number of tokens that this line shares with the previous
1379 # line has been stored with the previous line. This value was
1380 # calculated and stored by sub 'match_line_pair'.
1381 $imax_align = $prev_line->{'imax_pair'};
1383 if ( $imax_align != $jlimit ) {
1384 $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
1385 $return_value = NO_MATCH;
1390 if ( !defined($return_value) ) {
1392 # The tokens match, but the lines must have identical number of
1393 # tokens to join the group.
1394 if ( $maximum_field_index != $jmax ) {
1395 $GoToMsg = "token count differs";
1396 $return_value = NO_MATCH;
1399 # The tokens match. Now See if there is space for this line in the
1401 elsif ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY )
1404 $GoToMsg = "match and fit, imax_align=$imax_align, jmax=$jmax\n";
1405 $return_value = MATCH_AND_FIT;
1406 $imax_align = $jlimit;
1409 $GoToMsg = "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
1410 $return_value = MATCH_NO_FIT;
1411 $imax_align = $jlimit;
1417 "returning $return_value because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
1419 return ( $return_value, $imax_align );
1420 } ## end sub check_match
1424 my ( $self, $new_line, $old_line ) = @_;
1426 # The new line has alignments identical to the current group. Now we have
1427 # to fit the new line into the group without causing a field to exceed the
1428 # line length limit.
1429 # return true if successful
1430 # return false if not successful
1432 my $jmax = $new_line->{'jmax'};
1433 my $leading_space_count = $new_line->{'leading_space_count'};
1434 my $rfield_lengths = $new_line->{'rfield_lengths'};
1435 my $padding_available = $old_line->get_available_space_on_right();
1436 my $jmax_old = $old_line->{'jmax'};
1438 # Safety check ... only lines with equal array sizes should arrive here
1439 # from sub check_match. So if this error occurs, look at recent changes in
1440 # sub check_match. It is only supposed to check the fit of lines with
1441 # identical numbers of alignment tokens.
1442 if ( $jmax_old ne $jmax ) {
1445 Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
1446 unexpected difference in array lengths: $jmax != $jmax_old
1451 # Save current columns in case this line does not fit.
1452 my @alignments = @{ $old_line->{'ralignments'} };
1453 foreach my $alignment (@alignments) {
1454 $alignment->save_column();
1457 # Loop over all alignments ...
1458 for my $j ( 0 .. $jmax ) {
1460 my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
1463 $pad += $leading_space_count;
1466 # Keep going if this field does not need any space.
1467 next if ( $pad < 0 );
1469 # Revert to the starting state if does not fit
1470 if ( $pad > $padding_available ) {
1472 #----------------------------------------------
1473 # Line does not fit -- revert to starting state
1474 #----------------------------------------------
1475 foreach my $alignment (@alignments) {
1476 $alignment->restore_column();
1481 # make room for this field
1482 $old_line->increase_field_width( $j, $pad );
1483 $padding_available -= $pad;
1486 #-------------------------------------
1487 # The line fits, the match is accepted
1488 #-------------------------------------
1491 } ## end sub check_fit
1493 sub install_new_alignments {
1495 my ($new_line) = @_;
1497 my $jmax = $new_line->{'jmax'};
1498 my $rfield_lengths = $new_line->{'rfield_lengths'};
1499 my $col = $new_line->{'leading_space_count'};
1502 for my $j ( 0 .. $jmax ) {
1503 $col += $rfield_lengths->[$j];
1505 # create initial alignments for the new group
1507 Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
1508 push @alignments, $alignment;
1510 $new_line->{'ralignments'} = \@alignments;
1512 } ## end sub install_new_alignments
1514 sub copy_old_alignments {
1515 my ( $new_line, $old_line ) = @_;
1516 my @new_alignments = @{ $old_line->{'ralignments'} };
1517 $new_line->{'ralignments'} = \@new_alignments;
1519 } ## end sub copy_old_alignments
1523 # debug routine to dump array contents
1524 local $LIST_SEPARATOR = ')(';
1525 print STDOUT "(@_)\n";
1527 } ## end sub dump_array
1531 # compute decrease in level when we remove $diff spaces from the
1533 my ( $self, $leading_space_count, $diff, $level ) = @_;
1535 my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
1536 if ($rOpts_indent_columns) {
1538 int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
1539 my $nlev = int( $leading_space_count / $rOpts_indent_columns );
1540 $level -= ( $olev - $nlev );
1541 if ( $level < 0 ) { $level = 0 }
1544 } ## end sub level_change
1546 ###############################################
1547 # CODE SECTION 4: Code to process comment lines
1548 ###############################################
1550 sub _flush_comment_lines {
1552 # Output a group consisting of COMMENT lines
1555 my $rgroup_lines = $self->[_rgroup_lines_];
1556 return unless ( @{$rgroup_lines} );
1557 my $group_level = $self->[_group_level_];
1558 my $group_maximum_line_length = $self->[_group_maximum_line_length_];
1559 my $leading_space_count = $self->[_comment_leading_space_count_];
1560 ## my $leading_string =
1561 ## $self->get_leading_string( $leading_space_count, $group_level );
1563 # look for excessively long lines
1565 foreach my $item ( @{$rgroup_lines} ) {
1566 my ( $str, $str_len ) = @{$item};
1568 $str_len + $leading_space_count - $group_maximum_line_length;
1569 if ( $excess > $max_excess ) {
1570 $max_excess = $excess;
1574 # zero leading space count if any lines are too long
1575 if ( $max_excess > 0 ) {
1576 $leading_space_count -= $max_excess;
1577 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
1578 my $file_writer_object = $self->[_file_writer_object_];
1579 my $last_outdented_line_at =
1580 $file_writer_object->get_output_line_number();
1581 my $nlines = @{$rgroup_lines};
1582 $self->[_last_outdented_line_at_] =
1583 $last_outdented_line_at + $nlines - 1;
1584 my $outdented_line_count = $self->[_outdented_line_count_];
1585 unless ($outdented_line_count) {
1586 $self->[_first_outdented_line_at_] = $last_outdented_line_at;
1588 $outdented_line_count += $nlines;
1589 $self->[_outdented_line_count_] = $outdented_line_count;
1593 my $outdent_long_lines = 0;
1595 foreach my $item ( @{$rgroup_lines} ) {
1596 my ( $str, $str_len, $Kend ) = @{$item};
1597 $self->valign_output_step_B(
1599 leading_space_count => $leading_space_count,
1601 line_length => $str_len,
1602 side_comment_length => 0,
1603 outdent_long_lines => $outdent_long_lines,
1604 rvertical_tightness_flags => undef,
1605 level => $group_level,
1606 level_end => $group_level,
1608 maximum_line_length => $group_maximum_line_length,
1613 $self->initialize_for_new_group();
1615 } ## end sub _flush_comment_lines
1617 ######################################################
1618 # CODE SECTION 5: Code to process groups of code lines
1619 ######################################################
1621 sub _flush_group_lines {
1623 # This is the vertical aligner internal flush, which leaves the cache
1625 my ( $self, $level_jump ) = @_;
1627 # $level_jump = $next_level-$group_level, if known
1628 # = undef if not known
1629 # Note: only the sign of the jump is needed
1631 my $rgroup_lines = $self->[_rgroup_lines_];
1632 return unless ( @{$rgroup_lines} );
1633 my $group_type = $self->[_group_type_];
1634 my $group_level = $self->[_group_level_];
1638 my ( $a, $b, $c ) = caller();
1639 my $nlines = @{$rgroup_lines};
1641 "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
1644 #-------------------------------------------
1645 # Section 1: Handle a group of COMMENT lines
1646 #-------------------------------------------
1647 if ( $group_type eq 'COMMENT' ) {
1648 $self->_flush_comment_lines();
1652 #------------------------------------------------------------------------
1653 # Section 2: Handle line(s) of CODE. Most of the actual work of vertical
1654 # aligning happens here in the following steps:
1655 #------------------------------------------------------------------------
1657 # STEP 1: Remove most unmatched tokens. They block good alignments.
1658 my ( $max_lev_diff, $saw_side_comment ) =
1659 delete_unmatched_tokens( $rgroup_lines, $group_level );
1661 # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
1662 # matching common alignments. The indexes of these subgroups are in the
1664 my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
1666 # STEP 3: Sweep left to right through the lines, looking for leading
1667 # alignment tokens shared by groups.
1668 sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
1669 if ( @{$rgroups} > 1 );
1671 # STEP 4: Move side comments to a common column if possible.
1672 if ($saw_side_comment) {
1673 $self->align_side_comments( $rgroup_lines, $rgroups );
1676 # STEP 5: For the -lp option, increase the indentation of lists
1677 # to the desired amount, but do not exceed the line length limit.
1679 # We are allowed to shift a group of lines to the right if:
1680 # (1) its level is greater than the level of the previous group, and
1681 # (2) its level is greater than the level of the next line to be written.
1683 my $extra_indent_ok;
1684 if ( $group_level > $self->[_last_level_written_] ) {
1686 # Use the level jump to next line to come, if given
1687 if ( defined($level_jump) ) {
1688 $extra_indent_ok = $level_jump < 0;
1691 # Otherwise, assume the next line has the level of the end of last line.
1692 # This fixes case c008.
1694 my $level_end = $rgroup_lines->[-1]->{'level_end'};
1695 $extra_indent_ok = $group_level > $level_end;
1699 my $extra_leading_spaces =
1701 ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
1704 # STEP 6: Output the lines.
1705 # All lines in this group have the same leading spacing and maximum line
1707 my $group_leader_length = $rgroup_lines->[0]->{'leading_space_count'};
1708 my $group_maximum_line_length = $rgroup_lines->[0]->{'maximum_line_length'};
1710 foreach my $line ( @{$rgroup_lines} ) {
1711 $self->valign_output_step_A(
1716 group_leader_length => $group_leader_length,
1717 extra_leading_spaces => $extra_leading_spaces,
1718 level => $group_level,
1719 maximum_line_length => $group_maximum_line_length,
1724 # Let the formatter know that this object has been processed and any
1725 # recoverable spaces have been handled. This is needed for setting the
1726 # closing paren location in -lp mode.
1727 my $object = $rgroup_lines->[0]->{'indentation'};
1728 if ( ref($object) ) { $object->set_recoverable_spaces(0) }
1730 $self->initialize_for_new_group();
1732 } ## end sub _flush_group_lines
1734 { ## closure for sub sweep_top_down
1736 my $rall_lines; # all of the lines
1737 my $grp_level; # level of all lines
1738 my $rgroups; # describes the partition of lines we will make here
1739 my $group_line_count; # number of lines in current partition
1741 BEGIN { $rgroups = [] }
1743 sub initialize_for_new_rgroup {
1744 $group_line_count = 0;
1751 my $rline = $rall_lines->[$jend];
1754 if ( $group_line_count == 0 ) {
1755 install_new_alignments($rline);
1758 my $rvals = pop @{$rgroups};
1759 $jbeg = $rvals->[0];
1760 copy_old_alignments( $rline, $rall_lines->[$jbeg] );
1762 push @{$rgroups}, [ $jbeg, $jend, undef ];
1763 $group_line_count++;
1765 } ## end sub add_to_rgroup
1767 sub get_rgroup_jrange {
1769 return unless @{$rgroups};
1770 return unless ( $group_line_count > 0 );
1771 my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
1772 return ( $jbeg, $jend );
1773 } ## end sub get_rgroup_jrange
1777 my ($imax_align) = @_;
1778 return unless @{$rgroups};
1779 return unless ( $group_line_count > 0 );
1781 my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
1782 push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
1784 # Undo some alignments of poor two-line combinations.
1785 # We had to wait until now to know the line count.
1786 if ( $jend - $jbeg == 1 ) {
1787 my $line_0 = $rall_lines->[$jbeg];
1788 my $line_1 = $rall_lines->[$jend];
1790 my $imax_pair = $line_1->{'imax_pair'};
1791 if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
1793 ## flag for possible future use:
1794 ## my $is_isolated_pair = $imax_pair < 0
1796 ## || $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} < 0 );
1799 $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} : -1;
1801 my ( $is_marginal, $imax_align_fix ) =
1802 is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
1805 combine_fields( $line_0, $line_1, $imax_align_fix );
1809 initialize_for_new_rgroup();
1811 } ## end sub end_rgroup
1813 sub block_penultimate_match {
1815 # emergency reset to prevent sweep_left_to_right from trying to match a
1816 # failed terminal else match
1817 return unless @{$rgroups} > 1;
1818 $rgroups->[-2]->[2] = -1;
1820 } ## end sub block_penultimate_match
1822 sub sweep_top_down {
1823 my ( $self, $rlines, $group_level ) = @_;
1825 # Partition the set of lines into final alignment subgroups
1826 # and store the alignments with the lines.
1828 # The alignment subgroups we are making here are groups of consecutive
1829 # lines which have (1) identical alignment tokens and (2) do not
1830 # exceed the allowable maximum line length. A later sweep from
1831 # left-to-right ('sweep_lr') will handle additional alignments.
1833 # transfer args to closure variables
1834 $rall_lines = $rlines;
1835 $grp_level = $group_level;
1837 initialize_for_new_rgroup();
1838 return unless @{$rlines}; # shouldn't happen
1840 # Unset the _end_group flag for the last line if it it set because it
1841 # is not needed and can causes problems for -lp formatting
1842 $rall_lines->[-1]->{'end_group'} = 0;
1844 # Loop over all lines ...
1846 foreach my $new_line ( @{$rall_lines} ) {
1849 # Start a new subgroup if necessary
1850 if ( !$group_line_count ) {
1851 add_to_rgroup($jline);
1852 if ( $new_line->{'end_group'} ) {
1858 my $j_terminal_match = $new_line->{'j_terminal_match'};
1859 my ( $jbeg, $jend ) = get_rgroup_jrange();
1860 if ( !defined($jbeg) ) {
1862 # safety check, shouldn't happen
1864 Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
1865 undefined index for group line count $group_line_count
1869 my $base_line = $rall_lines->[$jbeg];
1871 # Initialize a global flag saying if the last line of the group
1872 # should match end of group and also terminate the group. There
1873 # should be no returns between here and where the flag is handled
1875 my $col_matching_terminal = 0;
1876 if ( defined($j_terminal_match) ) {
1878 # remember the column of the terminal ? or { to match with
1879 $col_matching_terminal =
1880 $base_line->get_column($j_terminal_match);
1882 # Ignore an undefined value as a defensive step; shouldn't
1884 $col_matching_terminal = 0
1885 unless defined($col_matching_terminal);
1888 # -------------------------------------------------------------
1889 # Allow hanging side comment to join current group, if any. The
1890 # only advantage is to keep the other tokens in the same group. For
1891 # example, this would make the '=' align here:
1892 # $ax = 1; # side comment
1893 # # hanging side comment
1894 # $boondoggle = 5; # side comment
1895 # $beetle = 5; # side comment
1897 # here is another example..
1899 # _rtoc_name_count => {}, # hash to track ..
1900 # _rpackage_stack => [], # stack to check ..
1902 # _rlast_level => \$last_level, # brace indentation
1905 # If this were not desired, the next step could be skipped.
1906 # -------------------------------------------------------------
1907 if ( $new_line->{'is_hanging_side_comment'} ) {
1908 join_hanging_comment( $new_line, $base_line );
1911 # If this line has no matching tokens, then flush out the lines
1912 # BEFORE this line unless both it and the previous line have side
1913 # comments. This prevents this line from pushing side comments out
1915 elsif ( $new_line->{'jmax'} == 1 ) {
1917 # There are no matching tokens, so now check side comments.
1918 # Programming note: accessing arrays with index -1 is
1919 # risky in Perl, but we have verified there is at least one
1920 # line in the group and that there is at least one field.
1922 $rall_lines->[ $jline - 1 ]->{'rfields'}->[-1];
1923 my $side_comment = $new_line->{'rfields'}->[-1];
1924 end_rgroup(-1) unless ( $side_comment && $prev_comment );
1927 # See if the new line matches and fits the current group,
1928 # if it still exists. Flush the current group if not.
1930 if ($group_line_count) {
1931 ( $match_code, my $imax_align ) =
1932 $self->check_match( $new_line, $base_line,
1933 $rall_lines->[ $jline - 1 ] );
1934 if ( $match_code != 2 ) { end_rgroup($imax_align) }
1937 # Store the new line
1938 add_to_rgroup($jline);
1940 if ( defined($j_terminal_match) ) {
1942 # Decide if we should fix a terminal match. We can either:
1943 # 1. fix it and prevent the sweep_lr from changing it, or
1944 # 2. leave it alone and let sweep_lr try to fix it.
1946 # The current logic is to fix it if:
1947 # -it has not joined to previous lines,
1948 # -and either the previous subgroup has just 1 line, or
1949 # -this line matched but did not fit (so sweep won't work)
1951 if ( $group_line_count == 1 ) {
1952 $fixit ||= $match_code;
1954 if ( @{$rgroups} > 1 ) {
1955 my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
1956 my $nlines = $jendx - $jbegx + 1;
1957 $fixit ||= $nlines <= 1;
1963 $base_line = $new_line;
1964 my $col_now = $base_line->get_column($j_terminal_match);
1966 # Ignore an undefined value as a defensive step; shouldn't
1968 $col_now = 0 unless defined($col_now);
1970 my $pad = $col_matching_terminal - $col_now;
1971 my $padding_available =
1972 $base_line->get_available_space_on_right();
1973 if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
1974 $base_line->increase_field_width( $j_terminal_match,
1978 # do not let sweep_left_to_right change an isolated 'else'
1979 if ( !$new_line->{'is_terminal_ternary'} ) {
1980 block_penultimate_match();
1986 # end the group if we know we cannot match next line.
1987 elsif ( $new_line->{'end_group'} ) {
1990 } ## end loop over lines
1994 } ## end sub sweep_top_down
1999 my ( $line_m, $line, $imax_min ) = @_;
2002 # two isolated (list) lines
2003 # imax_min = number of common alignment tokens
2005 # $pad_max = maximum suggested pad distance
2006 # = 0 if alignment not recommended
2007 # Note that this is only for two lines which do not have alignment tokens
2008 # in common with any other lines. It is intended for lists, but it might
2009 # also be used for two non-list lines with a common leading '='.
2011 # Allow alignment if the difference in the two unpadded line lengths
2012 # is not more than either line length. The idea is to avoid
2013 # aligning lines with very different field lengths, like these two:
2016 # 'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
2017 # 1, 0, 0, 0, undef, 0, 0
2019 my $rfield_lengths = $line->{'rfield_lengths'};
2020 my $rfield_lengths_m = $line_m->{'rfield_lengths'};
2022 # Safety check - shouldn't happen
2024 unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m};
2028 foreach my $i ( 0 .. $imax_min ) {
2029 $lensum_m += $rfield_lengths_m->[$i];
2030 $lensum += $rfield_lengths->[$i];
2033 my ( $lenmin, $lenmax ) =
2034 $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
2037 if ( $line_m->{'list_type'} && $line->{'list_type'} ) {
2038 $patterns_match = 1;
2039 my $rpatterns_m = $line_m->{'rpatterns'};
2040 my $rpatterns = $line->{'rpatterns'};
2041 foreach my $i ( 0 .. $imax_min ) {
2042 my $pat = $rpatterns->[$i];
2043 my $pat_m = $rpatterns_m->[$i];
2044 if ( $pat ne $pat_m ) { $patterns_match = 0; last }
2048 my $pad_max = $lenmax;
2049 if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
2052 } ## end sub two_line_pad
2054 sub sweep_left_to_right {
2056 my ( $rlines, $rgroups, $group_level ) = @_;
2058 # So far we have divided the lines into groups having an equal number of
2059 # identical alignments. Here we are going to look for common leading
2060 # alignments between the different groups and align them when possible.
2061 # For example, the three lines below are in three groups because each line
2062 # has a different number of commas. In this routine we will sweep from
2063 # left to right, aligning the leading commas as we go, but stopping if we
2064 # hit the line length limit.
2066 # my ( $num, $numi, $numj, $xyza, $ka, $xyzb, $kb, $aff, $error );
2067 # my ( $i, $j, $error, $aff, $asum, $avec );
2068 # my ( $km, $area, $varea );
2070 # nothing to do if just one group
2071 my $ng_max = @{$rgroups} - 1;
2072 return unless ( $ng_max > 0 );
2074 #---------------------------------------------------------------------
2075 # Step 1: Loop over groups to find all common leading alignment tokens
2076 #---------------------------------------------------------------------
2080 my $imax; # index of maximum non-side-comment alignment token
2081 my $istop; # an optional stopping index
2082 my $jbeg; # starting line index
2083 my $jend; # ending line index
2094 # Look at neighboring pairs of groups and form a simple list
2095 # of all common leading alignment tokens. Foreach such match we
2096 # store [$i, $ng], where
2097 # $i = index of the token in the line (0,1,...)
2098 # $ng is the second of the two groups with this common token
2101 # Hash to hold the maximum alignment change for any group
2104 # a small number of columns
2108 foreach my $item ( @{$rgroups} ) {
2111 $istop_mm = $istop_m;
2113 # save _m values of previous group
2115 $rtokens_m = $rtokens;
2121 # Get values for this group. Note that we just have to use values for
2122 # one of the lines of the group since all members have the same
2124 ( $jbeg, $jend, $istop ) = @{$item};
2126 $line = $rlines->[$jbeg];
2127 $rtokens = $line->{'rtokens'};
2128 $imax = $line->{'jmax'} - 2;
2129 $istop = -1 unless ( defined($istop) );
2130 $istop = $imax if ( $istop > $imax );
2132 # Initialize on first group
2133 next if ( $ng == 0 );
2135 # Use the minimum index limit of the two groups
2136 my $imax_min = $imax > $imax_m ? $imax_m : $imax;
2138 # Also impose a limit if given.
2139 if ( $istop_m < $imax_min ) {
2140 $imax_min = $istop_m;
2143 # Special treatment of two one-line groups isolated from other lines,
2144 # unless they form a simple list or a terminal match. Otherwise the
2145 # alignment can look strange in some cases.
2146 my $list_type = $rlines->[$jbeg]->{'list_type'};
2149 && $jend_m == $jbeg_m
2150 && ( $ng == 1 || $istop_mm < 0 )
2151 && ( $ng == $ng_max || $istop < 0 )
2152 && !$line->{'j_terminal_match'}
2154 # Only do this for imperfect matches. This is normally true except
2155 # when two perfect matches cannot form a group because the line
2156 # length limit would be exceeded. In that case we can still try
2157 # to match as many alignments as possible.
2158 && ( $imax != $imax_m || $istop_m != $imax_m )
2162 # We will just align assignments and simple lists
2163 next unless ( $imax_min >= 0 );
2165 unless ( $rtokens->[0] =~ /^=\d/
2168 # In this case we will limit padding to a short distance. This
2169 # is a compromise to keep some vertical alignment but prevent large
2170 # gaps, which do not look good for just two lines.
2172 two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
2173 next unless ($pad_max);
2175 $max_move{"$ng_m"} = $pad_max;
2176 $max_move{"$ng"} = $pad_max;
2179 # Loop to find all common leading tokens.
2180 if ( $imax_min >= 0 ) {
2181 foreach my $i ( 0 .. $imax_min ) {
2182 my $tok = $rtokens->[$i];
2183 my $tok_m = $rtokens_m->[$i];
2184 last if ( $tok ne $tok_m );
2185 push @icommon, [ $i, $ng, $tok ];
2189 return unless @icommon;
2191 #----------------------------------------------------------
2192 # Step 2: Reorder and consolidate the list into a task list
2193 #----------------------------------------------------------
2195 # We have to work first from lowest token index to highest, then by group,
2196 # sort our list first on token index then group number
2197 @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
2199 # Make a task list of the form
2200 # [$i, ng_beg, $ng_end, $tok], ..
2202 # $i is the index of the token to be aligned
2203 # $ng_beg..$ng_end is the group range for this action
2205 my ( $i, $ng_end, $tok );
2206 foreach my $item (@icommon) {
2207 my $ng_last = $ng_end;
2209 ( $i, $ng_end, $tok ) = @{$item};
2210 my $ng_beg = $ng_end - 1;
2211 if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
2212 my $var = pop(@todo);
2213 $ng_beg = $var->[1];
2215 my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
2216 push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
2219 #------------------------------
2220 # Step 3: Execute the task list
2221 #------------------------------
2222 do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
2225 } ## end sub sweep_left_to_right
2227 { ## closure for sub do_left_to_right_sweep
2229 my %is_good_alignment_token;
2233 # One of the most difficult aspects of vertical alignment is knowing
2234 # when not to align. Alignment can go from looking very nice to very
2235 # bad when overdone. In the sweep algorithm there are two special
2236 # cases where we may need to limit padding to a '$short_pad' distance
2237 # to avoid some very ugly formatting:
2239 # 1. Two isolated lines with partial alignment
2240 # 2. A 'tail-wag-dog' situation, in which a single terminal
2241 # line with partial alignment could cause a significant pad
2242 # increase in many previous lines if allowed to join the alignment.
2244 # For most alignment tokens, we will allow only a small pad to be
2245 # introduced (the hardwired $short_pad variable) . But for some 'good'
2246 # alignments we can be less restrictive.
2248 # These are 'good' alignments, which are allowed more padding:
2250 => = ? if unless or || {
2253 @is_good_alignment_token{@q} = (0) x scalar(@q);
2255 # Promote a few of these to 'best', with essentially no pad limit:
2256 $is_good_alignment_token{'='} = 1;
2257 $is_good_alignment_token{'if'} = 1;
2258 $is_good_alignment_token{'unless'} = 1;
2259 $is_good_alignment_token{'=>'} = 1
2261 # Note the hash values are set so that:
2262 # if ($is_good_alignment_token{$raw_tok}) => best
2263 # if defined ($is_good_alignment_token{$raw_tok}) => good or best
2267 sub move_to_common_column {
2269 # This is a sub called by sub do_left_to_right_sweep to
2270 # move the alignment column of token $itok to $col_want for a
2271 # sequence of groups.
2272 my ( $rlines, $rgroups, $rmax_move, $ngb, $nge, $itok, $col_want,
2275 return unless ( defined($ngb) && $nge > $ngb );
2276 foreach my $ng ( $ngb .. $nge ) {
2278 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
2279 my $line = $rlines->[$jbeg];
2280 my $col = $line->get_column($itok);
2281 my $move = $col_want - $col;
2284 # limit padding increase in isolated two lines
2286 if ( defined( $rmax_move->{$ng} )
2287 && $move > $rmax_move->{$ng}
2288 && !$is_good_alignment_token{$raw_tok} );
2290 $line->increase_field_width( $itok, $move );
2292 elsif ( $move < 0 ) {
2294 # spot to take special action on failure to move
2298 } ## end sub move_to_common_column
2300 sub do_left_to_right_sweep {
2301 my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
2304 # $blocking_level[$nj is the level at a match failure between groups
2307 my $group_list_type = $rlines->[0]->{'list_type'};
2309 foreach my $task ( @{$rtodo} ) {
2310 my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
2312 # Nothing to do for a single group
2313 next unless ( $ng_end > $ng_beg );
2315 my $ng_first; # index of the first group of a continuous sequence
2316 my $col_want; # the common alignment column of a sequence of groups
2317 my $col_limit; # maximum column before bumping into max line length
2318 my $line_count_ng_m = 0;
2322 # Loop over the groups
2323 # 'ix_' = index in the array of lines
2324 # 'ng_' = index in the array of groups
2325 # 'it_' = index in the array of tokens
2326 my $ix_min = $rgroups->[$ng_beg]->[0];
2327 my $ix_max = $rgroups->[$ng_end]->[1];
2328 my $lines_total = $ix_max - $ix_min + 1;
2329 foreach my $ng ( $ng_beg .. $ng_end ) {
2330 my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
2331 my $line_count_ng = $ix_end - $ix_beg + 1;
2333 # Important: note that since all lines in a group have a common
2334 # alignments object, we just have to work on one of the lines
2335 # (the first line). All of the rest will be changed
2337 my $line = $rlines->[$ix_beg];
2338 my $jmax = $line->{'jmax'};
2340 # the maximum space without exceeding the line length:
2341 my $avail = $line->get_available_space_on_right();
2342 my $col = $line->get_column($itok);
2343 my $col_max = $col + $avail;
2345 # Initialize on first group
2346 if ( !defined($col_want) ) {
2349 $col_limit = $col_max;
2350 $line_count_ng_m = $line_count_ng;
2352 $it_stop_m = $it_stop;
2356 # RULE: Throw a blocking flag upon encountering a token level
2357 # different from the level of the first blocking token. For
2358 # example, in the following example, if the = matches get
2359 # blocked between two groups as shown, then we want to start
2360 # blocking matches at the commas, which are at deeper level, so
2361 # that we do not get the big gaps shown here:
2363 # my $unknown3 = pack( "v", -2 );
2364 # my $unknown4 = pack( "v", 0x09 );
2365 # my $unknown5 = pack( "VVV", 0x06, 0x00, 0x00 );
2366 # my $num_bbd_blocks = pack( "V", $num_lists );
2367 # my $root_startblock = pack( "V", $root_start );
2368 # my $unknown6 = pack( "VV", 0x00, 0x1000 );
2370 # On the other hand, it is okay to keep matching at the same
2371 # level such as in a simple list of commas and/or fat commas.
2373 my $is_blocked = defined( $blocking_level[$ng] )
2374 && $lev > $blocking_level[$ng];
2376 # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
2377 # Do not let one or two lines with a **different number of
2378 # alignments** open up a big gap in a large block. For
2379 # example, we will prevent something like this, where the first
2380 # line pries open the rest:
2382 # $worksheet->write( "B7", "http://www.perl.com", undef, $format );
2383 # $worksheet->write( "C7", "", $format );
2384 # $worksheet->write( "D7", "", $format );
2385 # $worksheet->write( "D8", "", $format );
2386 # $worksheet->write( "D8", "", $format );
2388 # We should exclude from consideration two groups which are
2389 # effectively the same but separated because one does not
2390 # fit in the maximum allowed line length.
2392 $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
2394 my $lines_above = $ix_beg - $ix_min;
2395 my $lines_below = $lines_total - $lines_above;
2397 # Increase the tolerable gap for certain favorable factors
2399 my $top_level = $lev == $group_level;
2401 # Align best top level alignment tokens like '=', 'if', ...
2402 # A factor of 10 allows a gap of up to 40 spaces
2403 if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
2407 # Otherwise allow some minimal padding of good alignments
2410 defined( $is_good_alignment_token{$raw_tok} )
2412 # We have to be careful if there are just 2 lines. This
2413 # two-line factor allows large gaps only for 2 lines which
2414 # are simple lists with fewer items on the second line. It
2415 # gives results similar to previous versions of perltidy.
2416 && ( $lines_total > 2
2417 || $group_list_type && $jmax < $jmax_m && $top_level )
2427 if ( !$is_same_group ) {
2430 || $lines_above == 2 && $lines_below >= 4 )
2431 && $col_want > $col + $short_pad * $factor;
2434 || $lines_below == 2 && $lines_above >= 4 )
2435 && $col > $col_want + $short_pad * $factor;
2438 # if match is limited by gap size, stop aligning at this level
2440 $blocking_level[$ng] = $lev - 1;
2443 # quit and restart if it cannot join this batch
2444 if ( $col_want > $col_max
2445 || $col > $col_limit
2450 # remember the level of the first blocking token
2451 if ( !defined( $blocking_level[$ng] ) ) {
2452 $blocking_level[$ng] = $lev;
2455 move_to_common_column(
2456 $rlines, $rgroups, $rmax_move, $ng_first,
2457 $ng - 1, $itok, $col_want, $raw_tok
2461 $col_limit = $col_max;
2462 $line_count_ng_m = $line_count_ng;
2464 $it_stop_m = $it_stop;
2468 $line_count_ng_m += $line_count_ng;
2470 # update the common column and limit
2471 if ( $col > $col_want ) { $col_want = $col }
2472 if ( $col_max < $col_limit ) { $col_limit = $col_max }
2474 } ## end loop over groups
2476 if ( $ng_end > $ng_first ) {
2477 move_to_common_column(
2478 $rlines, $rgroups, $rmax_move, $ng_first,
2479 $ng_end, $itok, $col_want, $raw_tok
2481 } ## end loop over groups for one task
2482 } ## end loop over tasks
2485 } ## end sub do_left_to_right_sweep
2488 sub delete_selected_tokens {
2490 my ( $line_obj, $ridel ) = @_;
2492 # $line_obj is the line to be modified
2493 # $ridel is a ref to list of indexes to be deleted
2495 # remove an unused alignment token(s) to improve alignment chances
2497 return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
2499 my $jmax_old = $line_obj->{'jmax'};
2500 my $rfields_old = $line_obj->{'rfields'};
2501 my $rfield_lengths_old = $line_obj->{'rfield_lengths'};
2502 my $rpatterns_old = $line_obj->{'rpatterns'};
2503 my $rtokens_old = $line_obj->{'rtokens'};
2504 my $j_terminal_match = $line_obj->{'j_terminal_match'};
2506 use constant EXPLAIN_DELETE_SELECTED => 0;
2508 local $LIST_SEPARATOR = '> <';
2509 EXPLAIN_DELETE_SELECTED && print <<EOM;
2510 delete indexes: <@{$ridel}>
2512 old tokens: <@{$rtokens_old}>
2513 old patterns: <@{$rpatterns_old}>
2514 old fields: <@{$rfields_old}>
2515 old field_lengths: <@{$rfield_lengths_old}>
2518 my $rfields_new = [];
2519 my $rpatterns_new = [];
2520 my $rtokens_new = [];
2521 my $rfield_lengths_new = [];
2523 # Convert deletion list to a hash to allow any order, multiple entries,
2524 # and avoid problems with index values out of range
2526 @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
2528 my $pattern_0 = $rpatterns_old->[0];
2529 my $field_0 = $rfields_old->[0];
2530 my $field_length_0 = $rfield_lengths_old->[0];
2531 push @{$rfields_new}, $field_0;
2532 push @{$rfield_lengths_new}, $field_length_0;
2533 push @{$rpatterns_new}, $pattern_0;
2535 # Loop to either copy items or concatenate fields and patterns
2537 foreach my $j ( 0 .. $jmax_old - 1 ) {
2538 my $token = $rtokens_old->[$j];
2539 my $field = $rfields_old->[ $j + 1 ];
2540 my $field_length = $rfield_lengths_old->[ $j + 1 ];
2541 my $pattern = $rpatterns_old->[ $j + 1 ];
2542 if ( !$delete_me{$j} ) {
2543 push @{$rtokens_new}, $token;
2544 push @{$rfields_new}, $field;
2545 push @{$rpatterns_new}, $pattern;
2546 push @{$rfield_lengths_new}, $field_length;
2549 if ( !defined($jmin_del) ) { $jmin_del = $j }
2550 $rfields_new->[-1] .= $field;
2551 $rfield_lengths_new->[-1] += $field_length;
2552 $rpatterns_new->[-1] .= $pattern;
2556 # ----- x ------ x ------ x ------
2557 #t 0 1 2 <- token indexing
2558 #f 0 1 2 3 <- field and pattern
2560 my $jmax_new = @{$rfields_new} - 1;
2561 $line_obj->{'rtokens'} = $rtokens_new;
2562 $line_obj->{'rpatterns'} = $rpatterns_new;
2563 $line_obj->{'rfields'} = $rfields_new;
2564 $line_obj->{'rfield_lengths'} = $rfield_lengths_new;
2565 $line_obj->{'jmax'} = $jmax_new;
2567 # The value of j_terminal_match will be incorrect if we delete tokens prior
2568 # to it. We will have to give up on aligning the terminal tokens if this
2570 if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
2571 $line_obj->{'j_terminal_match'} = undef;
2574 # update list type -
2575 if ( $line_obj->{'list_seqno'} ) {
2577 ## This works, but for efficiency see if we need to make a change:
2578 ## decide_if_list($line_obj);
2580 # An existing list will still be a list but with possibly different
2582 my $old_list_type = $line_obj->{'list_type'};
2583 my $new_list_type = EMPTY_STRING;
2584 if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
2585 $new_list_type = $rtokens_new->[0];
2587 if ( !$old_list_type || $old_list_type ne $new_list_type ) {
2588 decide_if_list($line_obj);
2592 EXPLAIN_DELETE_SELECTED && print <<EOM;
2595 new tokens: <@{$rtokens_new}>
2596 new patterns: <@{$rpatterns_new}>
2597 new fields: <@{$rfields_new}>
2600 } ## end sub delete_selected_tokens
2602 { ## closure for sub decode_alignment_token
2604 # This routine is called repeatedly for each token, so it needs to be
2605 # efficient. We can speed things up by remembering the inputs and outputs
2609 sub initialize_decode {
2611 # We will re-initialize the hash for each file. Otherwise, there is
2612 # a danger that the hash can become arbitrarily large if a very large
2613 # number of files is processed at once.
2614 %decoded_token = ();
2616 } ## end sub initialize_decode
2618 sub decode_alignment_token {
2620 # Unpack the values packed in an alignment token
2623 # my ( $raw_tok, $lev, $tag, $tok_count ) =
2624 # decode_alignment_token($token);
2626 # Alignment tokens have a trailing decimal level and optional tag (for
2628 # For example, the first comma in the following line
2629 # sub banner { crlf; report( shift, '/', shift ); crlf }
2630 # is decorated as follows:
2631 # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6)
2633 # An optional token count may be appended with a leading dot.
2634 # Currently this is only done for '=' tokens but this could change.
2635 # For example, consider the following line:
2636 # $nport = $port = shift || $name;
2637 # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
2638 # The second '=' will be '=0.2' [level 0, second equals]
2641 if ( defined( $decoded_token{$tok} ) ) {
2642 return @{ $decoded_token{$tok} };
2645 my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, EMPTY_STRING, 1 );
2646 if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
2650 $tok_count = $5 if ($5);
2652 my @vals = ( $raw_tok, $lev, $tag, $tok_count );
2653 $decoded_token{$tok} = \@vals;
2655 } ## end sub decode_alignment_token
2658 { ## closure for sub delete_unmatched_tokens
2661 my %keep_after_deleted_assignment;
2667 = **= += *= &= <<= &&=
2668 -= /= |= >>= ||= //=
2672 @is_assignment{@q} = (1) x scalar(@q);
2674 # These tokens may be kept following an = deletion
2678 @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
2682 sub delete_unmatched_tokens {
2683 my ( $rlines, $group_level ) = @_;
2685 # This is a important first step in vertical alignment in which
2686 # we remove as many obviously un-needed alignment tokens as possible.
2687 # This will prevent them from interfering with the final alignment.
2690 my $max_lev_diff = 0; # used to avoid a call to prune_tree
2691 my $saw_side_comment = 0; # used to avoid a call for side comments
2693 # Handle no lines -- shouldn't happen
2694 return unless @{$rlines};
2696 # Handle a single line
2697 if ( @{$rlines} == 1 ) {
2698 my $line = $rlines->[0];
2699 my $jmax = $line->{'jmax'};
2700 my $length = $line->{'rfield_lengths'}->[$jmax];
2701 $saw_side_comment = $length > 0;
2702 return ( $max_lev_diff, $saw_side_comment );
2705 # ignore hanging side comments in these operations
2706 my @filtered = grep { !$_->{'is_hanging_side_comment'} } @{$rlines};
2707 my $rnew_lines = \@filtered;
2709 $saw_side_comment = @filtered != @{$rlines};
2712 # nothing to do if all lines were hanging side comments
2713 my $jmax = @{$rnew_lines} - 1;
2714 return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 );
2716 #----------------------------------------------------
2717 # Create a hash of alignment token info for each line
2718 #----------------------------------------------------
2719 ( my $rline_hashes, my $requals_info, $saw_side_comment, $max_lev_diff )
2720 = make_alignment_info( $group_level, $rnew_lines, $saw_side_comment );
2722 #------------------------------------------------------------
2723 # Find independent subgroups of lines. Neighboring subgroups
2724 # do not have a common alignment token.
2725 #------------------------------------------------------------
2727 push @subgroups, [ 0, $jmax ];
2728 foreach my $jl ( 0 .. $jmax - 1 ) {
2729 if ( $rnew_lines->[$jl]->{'end_group'} ) {
2730 $subgroups[-1]->[1] = $jl;
2731 push @subgroups, [ $jl + 1, $jmax ];
2735 #-----------------------------------------------------------
2736 # PASS 1 over subgroups to remove unmatched alignment tokens
2737 #-----------------------------------------------------------
2738 delete_unmatched_tokens_main_loop(
2739 $group_level, $rnew_lines, \@subgroups,
2740 $rline_hashes, $requals_info
2743 #----------------------------------------------------------------
2744 # PASS 2: Construct a tree of matched lines and delete some small
2745 # deeper levels of tokens. They also block good alignments.
2746 #----------------------------------------------------------------
2747 prune_alignment_tree($rnew_lines) if ($max_lev_diff);
2749 #--------------------------------------------
2750 # PASS 3: compare all lines for common tokens
2751 #--------------------------------------------
2752 match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
2754 return ( $max_lev_diff, $saw_side_comment );
2755 } ## end sub delete_unmatched_tokens
2757 sub make_alignment_info {
2759 my ( $group_level, $rnew_lines, $saw_side_comment ) = @_;
2761 #------------------------------------------------------------
2762 # Loop to create a hash of alignment token info for each line
2763 #------------------------------------------------------------
2764 my $rline_hashes = [];
2766 my @line_info; # no longer used
2767 my $jmax = @{$rnew_lines} - 1;
2768 my $max_lev_diff = 0;
2769 foreach my $line ( @{$rnew_lines} ) {
2771 my $rtokens = $line->{'rtokens'};
2772 my $rpatterns = $line->{'rpatterns'};
2774 my ( $i_eq, $tok_eq, $pat_eq );
2775 my ( $lev_min, $lev_max );
2776 foreach my $tok ( @{$rtokens} ) {
2777 my ( $raw_tok, $lev, $tag, $tok_count ) =
2778 decode_alignment_token($tok);
2780 if ( $tok ne '#' ) {
2781 if ( !defined($lev_min) ) {
2786 if ( $lev < $lev_min ) { $lev_min = $lev }
2787 if ( $lev > $lev_max ) { $lev_max = $lev }
2791 if ( !$saw_side_comment ) {
2792 my $length = $line->{'rfield_lengths'}->[ $i + 1 ];
2793 $saw_side_comment ||= $length;
2797 # Possible future upgrade: for multiple matches,
2798 # record [$i1, $i2, ..] instead of $i
2800 [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
2802 # remember the first equals at line level
2803 if ( !defined($i_eq) && $raw_tok eq '=' ) {
2805 if ( $lev eq $group_level ) {
2808 $pat_eq = $rpatterns->[$i];
2813 push @{$rline_hashes}, $rhash;
2814 push @equals_info, [ $i_eq, $tok_eq, $pat_eq ];
2815 push @line_info, [ $lev_min, $lev_max ];
2816 if ( defined($lev_min) ) {
2817 my $lev_diff = $lev_max - $lev_min;
2818 if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
2822 #----------------------------------------------------
2823 # Loop to compare each line pair and remember matches
2824 #----------------------------------------------------
2827 foreach my $jl ( 0 .. $jmax - 1 ) {
2831 my $rhash_l = $rline_hashes->[$jl];
2832 my $rhash_r = $rline_hashes->[$jr];
2833 foreach my $tok ( keys %{$rhash_l} ) {
2834 if ( defined( $rhash_r->{$tok} ) ) {
2835 my $il = $rhash_l->{$tok}->[0];
2836 my $ir = $rhash_r->{$tok}->[0];
2837 $rhash_l->{$tok}->[2] = $ir;
2838 $rhash_r->{$tok}->[1] = $il;
2839 if ( $tok ne '#' ) {
2840 push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
2846 # Set a line break if no matching tokens between these lines
2847 # (this is not strictly necessary now but does not hurt)
2848 if ( $nr == 0 && $nl > 0 ) {
2849 $rnew_lines->[$jl]->{'end_group'} = 1;
2852 # Also set a line break if both lines have simple equals but with
2853 # different leading characters in patterns. This check is similar
2854 # to one in sub check_match, and will prevent sub
2855 # prune_alignment_tree from removing alignments which otherwise
2856 # should be kept. This fix is rarely needed, but it can
2857 # occasionally improve formatting.
2859 # my $name = $this->{Name};
2860 # $type = $this->ctype($genlooptype) if defined $genlooptype;
2861 # my $declini = ( $asgnonly ? "" : "\t$type *" );
2862 # my $cast = ( $type ? "($type *)" : "" );
2863 # The last two lines start with 'my' and will not match the
2864 # previous line starting with $type, so we do not want
2865 # prune_alignment tree to delete their ? : alignments at a deeper
2867 my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
2868 my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
2869 if ( defined($i_eq_l) && defined($i_eq_r) ) {
2871 # Also, do not align equals across a change in ci level
2872 my $ci_jump = $rnew_lines->[$jl]->{'ci_level'} !=
2873 $rnew_lines->[$jr]->{'ci_level'};
2876 $tok_eq_l eq $tok_eq_r
2879 && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
2883 $rnew_lines->[$jl]->{'end_group'} = 1;
2887 return ( $rline_hashes, \@equals_info, $saw_side_comment,
2889 } ## end sub make_alignment_info
2891 sub delete_unmatched_tokens_main_loop {
2894 $group_level, $rnew_lines, $rsubgroups,
2895 $rline_hashes, $requals_info
2898 #--------------------------------------------------------------
2899 # Main loop over subgroups to remove unmatched alignment tokens
2900 #--------------------------------------------------------------
2902 # flag to allow skipping pass 2 - not currently used
2903 my $saw_large_group;
2905 my $has_terminal_match = $rnew_lines->[-1]->{'j_terminal_match'};
2907 foreach my $item ( @{$rsubgroups} ) {
2908 my ( $jbeg, $jend ) = @{$item};
2910 my $nlines = $jend - $jbeg + 1;
2912 #---------------------------------------------------
2913 # Look for complete if/elsif/else and ternary blocks
2914 #---------------------------------------------------
2916 # We are looking for a common '$dividing_token' like these:
2918 # if ( $b and $s ) { $p->{'type'} = 'a'; }
2919 # elsif ($b) { $p->{'type'} = 'b'; }
2920 # elsif ($s) { $p->{'type'} = 's'; }
2921 # else { $p->{'type'} = ''; }
2922 # ^----------- dividing_token
2925 # !$routine ? '[PFX]'
2926 # : $routine =~ /warn.*_d\z/ ? '[DS]'
2927 # : $routine =~ /ck_warn/ ? 'W'
2928 # : $routine =~ /ckWARN\d*reg_d/ ? 'S'
2929 # : $routine =~ /ckWARN\d*reg/ ? 'W'
2930 # : $routine =~ /vWARN\d/ ? '[WDS]'
2932 # ^----------- dividing_token
2934 # Only look for groups which are more than 2 lines long. Two lines
2935 # can get messed up doing this, probably due to the various
2939 my %token_line_count;
2940 if ( $nlines > 2 ) {
2942 foreach my $jj ( $jbeg .. $jend ) {
2944 my $line = $rnew_lines->[$jj];
2945 my $rtokens = $line->{'rtokens'};
2946 foreach my $tok ( @{$rtokens} ) {
2947 if ( !$seen{$tok} ) {
2949 $token_line_count{$tok}++;
2954 foreach my $tok ( keys %token_line_count ) {
2955 if ( $token_line_count{$tok} == $nlines ) {
2956 if ( substr( $tok, 0, 1 ) eq '?'
2957 || substr( $tok, 0, 1 ) eq '{'
2958 && $tok =~ /^\{\d+if/ )
2960 $dividing_token = $tok;
2967 #-------------------------------------------------------------
2968 # Loop over subgroup lines to remove unwanted alignment tokens
2969 #-------------------------------------------------------------
2970 foreach my $jj ( $jbeg .. $jend ) {
2971 my $line = $rnew_lines->[$jj];
2972 my $rtokens = $line->{'rtokens'};
2973 my $rhash = $rline_hashes->[$jj];
2974 my $i_eq = $requals_info->[$jj]->[0];
2976 my $imax = @{$rtokens} - 2;
2977 my $delete_above_level;
2978 my $deleted_assignment_token;
2980 my $saw_dividing_token = EMPTY_STRING;
2981 $saw_large_group ||= $nlines > 2 && $imax > 1;
2983 # Loop over all alignment tokens
2984 foreach my $i ( 0 .. $imax ) {
2985 my $tok = $rtokens->[$i];
2986 next if ( $tok eq '#' ); # shouldn't happen
2987 my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
2988 @{ $rhash->{$tok} };
2990 #------------------------------------------------------
2991 # Here is the basic RULE: remove an unmatched alignment
2992 # which does not occur in the surrounding lines.
2993 #------------------------------------------------------
2994 my $delete_me = !defined($il) && !defined($ir);
2996 # Apply any user controls. Note that not all lines pass
2997 # this way so they have to be applied elsewhere too.
2999 if (%valign_control_hash) {
3000 $align_ok = $valign_control_hash{$raw_tok};
3001 $align_ok = $valign_control_default
3002 unless defined($align_ok);
3003 $delete_me ||= !$align_ok;
3006 # But now we modify this with exceptions...
3008 # EXCEPTION 1: If we are in a complete ternary or
3009 # if/elsif/else group, and this token is not on every line
3010 # of the group, should we delete it to preserve overall
3012 if ($dividing_token) {
3013 if ( $token_line_count{$tok} >= $nlines ) {
3014 $saw_dividing_token ||= $tok eq $dividing_token;
3018 # For shorter runs, delete toks to save alignment.
3019 # For longer runs, keep toks after the '{' or '?'
3020 # to allow sub-alignments within braces. The
3021 # number 5 lines is arbitrary but seems to work ok.
3023 ( $nlines < 5 || !$saw_dividing_token );
3027 # EXCEPTION 2: Remove all tokens above a certain level
3028 # following a previous deletion. For example, we have to
3029 # remove tagged higher level alignment tokens following a
3030 # '=>' deletion because the tags of higher level tokens
3031 # will now be incorrect. For example, this will prevent
3032 # aligning commas as follows after deleting the second '=>'
3034 # ListBox => origin => [ 270, 160 ],
3035 # size => [ 200, 55 ],
3037 if ( defined($delete_above_level) ) {
3038 if ( $lev > $delete_above_level ) {
3041 else { $delete_above_level = undef }
3044 # EXCEPTION 3: Remove all but certain tokens after an
3045 # assignment deletion.
3047 $deleted_assignment_token
3048 && ( $lev > $group_level
3049 || !$keep_after_deleted_assignment{$raw_tok} )
3055 # EXCEPTION 4: Do not touch the first line of a 2 line
3056 # terminal match, such as below, because j_terminal has
3058 # if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
3059 # else { $tago = $tagc = ''; }
3060 # But see snippets 'else1.t' and 'else2.t'
3063 && $has_terminal_match
3066 # EXCEPTION 5: misc additional rules for commas and equals
3067 if ( $delete_me && $tok_count == 1 ) {
3069 # okay to delete second and higher copies of a token
3072 if ( $raw_tok eq ',' ) {
3074 # Do not delete commas before an equals
3076 if ( defined($i_eq) && $i < $i_eq );
3078 # Do not delete line-level commas
3079 $delete_me = 0 if ( $lev <= $group_level );
3082 # For an assignment at group level..
3083 if ( $is_assignment{$raw_tok}
3084 && $lev == $group_level )
3087 # Do not delete if it is the last alignment of
3088 # multiple tokens; this will prevent some
3089 # undesirable alignments
3090 if ( $imax > 0 && $i == $imax ) {
3094 # Otherwise, set a flag to delete most
3096 else { $deleted_assignment_token = $raw_tok }
3100 # Do not let a user exclusion be reactivated by above rules
3101 $delete_me ||= !$align_ok;
3103 #------------------------------------
3104 # Add this token to the deletion list
3105 #------------------------------------
3109 # update deletion propagation flags
3110 if ( !defined($delete_above_level)
3111 || $lev < $delete_above_level )
3114 # delete all following higher level alignments
3115 $delete_above_level = $lev;
3117 # but keep deleting after => to next lower level
3118 # to avoid some bizarre alignments
3119 if ( $raw_tok eq '=>' ) {
3120 $delete_above_level = $lev - 1;
3124 } # End loop over alignment tokens
3126 # Process all deletion requests for this line
3128 delete_selected_tokens( $line, \@idel );
3130 } # End loopover lines
3131 } ## end main loop over subgroups
3134 } ## end sub delete_unmatched_tokens_main_loop
3137 sub match_line_pairs {
3138 my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
3140 # Compare each pair of lines and save information about common matches
3141 # $rlines = list of lines including hanging side comments
3142 # $rnew_lines = list of lines without any hanging side comments
3143 # $rsubgroups = list of subgroups of the new lines
3146 # Maybe change: imax_pair => pair_match_info = ref to array
3147 # = [$imax_align, $rMsg, ... ]
3148 # This may eventually have multi-level match info
3150 # Previous line vars
3151 my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
3152 $list_type_m, $ci_level_m );
3155 my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
3158 # loop over subgroups
3159 foreach my $item ( @{$rsubgroups} ) {
3160 my ( $jbeg, $jend ) = @{$item};
3161 my $nlines = $jend - $jbeg + 1;
3162 next unless ( $nlines > 1 );
3164 # loop over lines in a subgroup
3165 foreach my $jj ( $jbeg .. $jend ) {
3168 $rtokens_m = $rtokens;
3169 $rpatterns_m = $rpatterns;
3170 $rfield_lengths_m = $rfield_lengths;
3172 $list_type_m = $list_type;
3173 $ci_level_m = $ci_level;
3175 $line = $rnew_lines->[$jj];
3176 $rtokens = $line->{'rtokens'};
3177 $rpatterns = $line->{'rpatterns'};
3178 $rfield_lengths = $line->{'rfield_lengths'};
3179 $imax = @{$rtokens} - 2;
3180 $list_type = $line->{'list_type'};
3181 $ci_level = $line->{'ci_level'};
3183 # nothing to do for first line
3184 next if ( $jj == $jbeg );
3186 my $ci_jump = $ci_level - $ci_level_m;
3188 my $imax_min = $imax_m < $imax ? $imax_m : $imax;
3190 my $imax_align = -1;
3192 # find number of leading common tokens
3194 #---------------------------------
3195 # No match to hanging side comment
3196 #---------------------------------
3197 if ( $line->{'is_hanging_side_comment'} ) {
3199 # Should not get here; HSC's have been filtered out
3203 #-----------------------------
3204 # Handle comma-separated lists
3205 #-----------------------------
3206 elsif ( $list_type && $list_type eq $list_type_m ) {
3208 # do not align lists across a ci jump with new list method
3209 if ($ci_jump) { $imax_min = -1 }
3211 my $i_nomatch = $imax_min + 1;
3212 foreach my $i ( 0 .. $imax_min ) {
3213 my $tok = $rtokens->[$i];
3214 my $tok_m = $rtokens_m->[$i];
3215 if ( $tok ne $tok_m ) {
3221 $imax_align = $i_nomatch - 1;
3228 my $i_nomatch = $imax_min + 1;
3229 foreach my $i ( 0 .. $imax_min ) {
3230 my $tok = $rtokens->[$i];
3231 my $tok_m = $rtokens_m->[$i];
3232 if ( $tok ne $tok_m ) {
3237 my $pat = $rpatterns->[$i];
3238 my $pat_m = $rpatterns_m->[$i];
3240 # If patterns don't match, we have to be careful...
3241 if ( $pat_m ne $pat ) {
3243 $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
3244 my ( $match_code, $rmsg ) =
3245 compare_patterns( $group_level,
3246 $tok, $tok_m, $pat, $pat_m, $pad );
3248 if ( $match_code == 1 ) { $i_nomatch = $i }
3249 elsif ( $match_code == 2 ) { $i_nomatch = 0 }
3254 $imax_align = $i_nomatch - 1;
3257 $line_m->{'imax_pair'} = $imax_align;
3259 } ## end loop over lines
3261 # Put fence at end of subgroup
3262 $line->{'imax_pair'} = -1;
3264 } ## end loop over subgroups
3266 # if there are hanging side comments, propagate the pair info down to them
3267 # so that lines can just look back one line for their pair info.
3268 if ( @{$rlines} > @{$rnew_lines} ) {
3269 my $last_pair_info = -1;
3270 foreach my $line ( @{$rlines} ) {
3271 if ( $line->{'is_hanging_side_comment'} ) {
3272 $line->{'imax_pair'} = $last_pair_info;
3275 $last_pair_info = $line->{'imax_pair'};
3280 } ## end sub match_line_pairs
3282 sub compare_patterns {
3284 my ( $group_level, $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
3286 # helper routine for sub match_line_pairs to decide if patterns in two
3287 # lines match well enough..Given
3288 # $tok_m, $pat_m = token and pattern of first line
3289 # $tok, $pat = token and pattern of second line
3290 # $pad = 0 if no padding is needed, !=0 otherwise
3292 # 0 = patterns match, continue
3294 # 2 = no match, and lines do not match at all
3296 my $GoToMsg = EMPTY_STRING;
3297 my $return_code = 0;
3299 use constant EXPLAIN_COMPARE_PATTERNS => 0;
3301 my ( $alignment_token, $lev, $tag, $tok_count ) =
3302 decode_alignment_token($tok);
3304 # We have to be very careful about aligning commas
3305 # when the pattern's don't match, because it can be
3306 # worse to create an alignment where none is needed
3307 # than to omit one. Here's an example where the ','s
3308 # are not in named containers. The first line below
3309 # should not match the next two:
3310 # ( $a, $b ) = ( $b, $r );
3311 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
3312 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
3313 if ( $alignment_token eq ',' ) {
3315 # do not align commas unless they are in named
3317 if ( $tok !~ /[A-Za-z]/ ) {
3319 $GoToMsg = "do not align commas in unnamed containers";
3326 # do not align parens unless patterns match;
3327 # large ugly spaces can occur in math expressions.
3328 elsif ( $alignment_token eq '(' ) {
3330 # But we can allow a match if the parens don't
3331 # require any padding.
3334 $GoToMsg = "do not align '(' unless patterns match or pad=0";
3341 # Handle an '=' alignment with different patterns to
3343 elsif ( $alignment_token eq '=' ) {
3345 # It is best to be a little restrictive when
3346 # aligning '=' tokens. Here is an example of
3347 # two lines that we will not align:
3350 # The problem is that one is a 'my' declaration,
3351 # and the other isn't, so they're not very similar.
3352 # We will filter these out by comparing the first
3353 # letter of the pattern. This is crude, but works
3355 if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
3356 $GoToMsg = "first character before equals differ";
3360 # The introduction of sub 'prune_alignment_tree'
3361 # enabled alignment of lists left of the equals with
3362 # other scalar variables. For example:
3363 # my ( $D, $s, $e ) = @_;
3364 # my $d = length $D;
3365 # my $c = $e - $s - $d;
3367 # But this would change formatting of a lot of scripts,
3368 # so for now we prevent alignment of comma lists on the
3369 # left with scalars on the left. We will also prevent
3370 # any partial alignments.
3372 # set return code 2 if the = is at line level, but
3373 # set return code 1 if the = is below line level, i.e.
3374 # sub new { my ( $p, $v ) = @_; bless \$v, $p }
3375 # sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
3377 elsif ( ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) ) {
3378 $GoToMsg = "mixed commas/no-commas before equals";
3380 if ( $lev eq $group_level ) {
3392 EXPLAIN_COMPARE_PATTERNS
3394 && print STDERR "no match because $GoToMsg\n";
3396 return ( $return_code, \$GoToMsg );
3398 } ## end sub compare_patterns
3400 sub fat_comma_to_comma {
3403 # We are changing '=>' to ',' and removing any trailing decimal count
3404 # because currently fat commas have a count and commas do not.
3405 # For example, we will change '=>2+{-3.2' into ',2+{-3'
3406 if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
3408 } ## end sub fat_comma_to_comma
3410 sub get_line_token_info {
3412 # scan lines of tokens and return summary information about the range of
3413 # levels and patterns.
3416 # First scan to check monotonicity. Here is an example of several
3417 # lines which are monotonic. The = is the lowest level, and
3418 # the commas are all one level deeper. So this is not nonmonotonic.
3419 # $$d{"weeks"} = [ "w", "wk", "wks", "week", "weeks" ];
3420 # $$d{"days"} = [ "d", "day", "days" ];
3421 # $$d{"hours"} = [ "h", "hr", "hrs", "hour", "hours" ];
3423 my $all_monotonic = 1;
3424 foreach my $jj ( 0 .. @{$rlines} - 1 ) {
3425 my ($line) = $rlines->[$jj];
3426 my $rtokens = $line->{'rtokens'};
3428 my $is_monotonic = 1;
3430 foreach my $tok ( @{$rtokens} ) {
3432 my ( $raw_tok, $lev, $tag, $tok_count ) =
3433 decode_alignment_token($tok);
3434 push @{ $all_token_info[$jj] },
3435 [ $raw_tok, $lev, $tag, $tok_count ];
3436 last if ( $tok eq '#' );
3437 if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
3440 if ( !$is_monotonic ) { $all_monotonic = 0 }
3443 my $rline_values = [];
3444 foreach my $jj ( 0 .. @{$rlines} - 1 ) {
3445 my ($line) = $rlines->[$jj];
3447 my $rtokens = $line->{'rtokens'};
3449 my ( $lev_min, $lev_max );
3450 my $token_pattern_max = EMPTY_STRING;
3452 my $is_monotonic = 1;
3454 # find the index of the last token before the side comment
3455 my $imax = @{$rtokens} - 2;
3456 my $imax_true = $imax;
3458 # If the entire group is monotonic, and the line ends in a comma list,
3459 # walk it back to the first such comma. this will have the effect of
3460 # making all trailing ragged comma lists match in the prune tree
3461 # routine. these trailing comma lists can better be handled by later
3464 # Treat fat commas the same as commas here by converting them to
3465 # commas. This will improve the chance of aligning the leading parts
3468 my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
3469 if ( $all_monotonic && $tok_end =~ /^,/ ) {
3472 && fat_comma_to_comma( $rtokens->[$ii] ) eq $tok_end )
3479 # make a first pass to find level range
3481 foreach my $tok ( @{$rtokens} ) {
3483 last if ( $i > $imax );
3484 last if ( $tok eq '#' );
3485 my ( $raw_tok, $lev, $tag, $tok_count ) =
3486 @{ $all_token_info[$jj]->[$i] };
3488 last if ( $tok eq '#' );
3489 $token_pattern_max .= $tok;
3491 if ( !defined($lev_min) ) {
3496 if ( $lev < $lev_min ) { $lev_min = $lev; }
3497 if ( $lev > $lev_max ) { $lev_max = $lev; }
3498 if ( $lev < $last_lev ) { $is_monotonic = 0 }
3504 my $rtoken_patterns = {};
3505 my $rtoken_indexes = {};
3506 my @levs = sort keys %saw_level;
3507 if ( !defined($lev_min) ) {
3511 $rtoken_patterns->{$lev_min} = EMPTY_STRING;
3512 $rtoken_indexes->{$lev_min} = [];
3516 elsif ( $lev_max == $lev_min ) {
3517 $rtoken_patterns->{$lev_max} = $token_pattern_max;
3518 $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
3521 # handle multiple levels
3523 $rtoken_patterns->{$lev_max} = $token_pattern_max;
3524 $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
3526 my $lev_top = pop @levs; # alread did max level
3528 foreach my $tok ( @{$rtokens} ) {
3530 last if ( $itok > $imax );
3531 my ( $raw_tok, $lev, $tag, $tok_count ) =
3532 @{ $all_token_info[$jj]->[$itok] };
3533 last if ( $raw_tok eq '#' );
3534 foreach my $lev_test (@levs) {
3535 next if ( $lev > $lev_test );
3536 $rtoken_patterns->{$lev_test} .= $tok;
3537 push @{ $rtoken_indexes->{$lev_test} }, $itok;
3540 push @levs, $lev_top;
3543 push @{$rline_values},
3545 $lev_min, $lev_max, $rtoken_patterns, \@levs,
3546 $rtoken_indexes, $is_monotonic, $imax_true, $imax,
3551 local $LIST_SEPARATOR = ')(';
3552 print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
3553 foreach my $key ( sort keys %{$rtoken_patterns} ) {
3554 print "$key => $rtoken_patterns->{$key}\n";
3555 print "$key => @{$rtoken_indexes->{$key}}\n";
3558 } ## end loop over lines
3559 return ( $rline_values, $all_monotonic );
3560 } ## end sub get_line_token_info
3562 sub prune_alignment_tree {
3564 my $jmax = @{$rlines} - 1;
3565 return unless $jmax > 0;
3567 # Vertical alignment in perltidy is done as an iterative process. The
3568 # starting point is to mark all possible alignment tokens ('=', ',', '=>',
3569 # etc) for vertical alignment. Then we have to delete all alignments
3570 # which, if actually made, would detract from overall alignment. This
3571 # is done in several phases of which this is one.
3573 # In this routine we look at the alignments of a group of lines as a
3574 # hierarchical tree. We will 'prune' the tree to limited depths if that
3575 # will improve overall alignment at the lower depths.
3576 # For each line we will be looking at its alignment patterns down to
3577 # different fixed depths. For each depth, we include all lower depths and
3578 # ignore all higher depths. We want to see if we can get alignment of a
3579 # larger group of lines if we ignore alignments at some lower depth.
3580 # Here is an # example:
3583 # [ '$var', sub { join $_, "bar" }, 0, "bar" ],
3584 # [ 'CONSTANT', sub { join "foo", "bar" }, 0, "bar" ],
3585 # [ 'CONSTANT', sub { join "foo", "bar", 3 }, 1, "barfoo3" ],
3586 # [ '$myvar', sub { my $var; join $var, "bar" }, 0, "bar" ],
3589 # In the above example, all lines have three commas at the lowest depth
3590 # (zero), so if there were no other alignments, these lines would all
3591 # align considering only the zero depth alignment token. But some lines
3592 # have additional comma alignments at the next depth, so we need to decide
3593 # if we should drop those to keep the top level alignments, or keep those
3594 # for some additional low level alignments at the expense losing some top
3595 # level alignments. In this case we will drop the deeper level commas to
3596 # keep the entire collection aligned. But in some cases the decision could
3599 # The tree for this example at the zero depth has one node containing
3600 # all four lines, since they are identical at zero level (three commas).
3601 # At depth one, there are three 'children' nodes, namely:
3602 # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
3603 # - line 3, which has 2 commas at depth 1
3604 # - line4, which has a ';' and a ',' at depth 1
3605 # There are no deeper alignments in this example.
3606 # so the tree structure for this example is:
3608 # depth 0 depth 1 depth 2
3609 # [lines 1-4] -- [line 1-2] - (empty)
3610 # | [line 3] - (empty)
3611 # | [line 4] - (empty)
3613 # We can carry this to any depth, but it is not really useful to go below
3614 # depth 2. To cleanly stop there, we will consider depth 2 to contain all
3615 # alignments at depth >=2.
3617 use constant EXPLAIN_PRUNE => 0;
3619 #-------------------------------------------------------------------
3620 # Prune Tree Step 1. Start by scanning the lines and collecting info
3621 #-------------------------------------------------------------------
3623 # Note that the caller had this info but we have to redo this now because
3624 # alignment tokens may have been deleted.
3625 my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
3627 # If all the lines have levels which increase monotonically from left to
3628 # right, then the sweep-left-to-right pass can do a better job of alignment
3629 # than pruning, and without deleting alignments.
3630 return if ($all_monotonic);
3632 # Contents of $rline_values
3634 # $lev_min, $lev_max, $rtoken_patterns, \@levs,
3635 # $rtoken_indexes, $is_monotonic, $imax_true, $imax,
3638 # We can work to any depth, but there is little advantage to working
3639 # to a a depth greater than 2
3642 # This arrays will hold the tree of alignment tokens at different depths
3646 # Tree nodes contain these values:
3647 # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
3648 # $nc_beg_p, $nc_end_p, $rindexes];
3650 # $depth = 0,1,2 = index of depth of the match
3652 # $jbeg beginning index j of the range of lines in this match
3653 # $jend ending index j of the range of lines in this match
3654 # $n_parent = index of the containing group at $depth-1, if it exists
3655 # $level = actual level of code being matched in this group
3656 # $pattern = alignment pattern being matched
3657 # $nc_beg_p = first child
3658 # $nc_end_p = last child
3659 # $rindexes = ref to token indexes
3661 # the patterns and levels of the current group being formed at each depth
3662 my ( @token_patterns_current, @levels_current, @token_indexes_current );
3664 # the patterns and levels of the next line being tested at each depth
3665 my ( @token_patterns_next, @levels_next, @token_indexes_next );
3667 #-----------------------------------------------------------
3668 # define a recursive worker subroutine for tree construction
3669 #-----------------------------------------------------------
3671 # This is a recursive routine which is called if a match condition changes
3672 # at any depth when a new line is encountered. It ends the match node
3673 # which changed plus all deeper nodes attached to it.
3676 my ( $depth, $jl, $n_parent ) = @_;
3678 # $depth is the tree depth
3679 # $jl is the index of the line
3680 # $n_parent is index of the parent node of this node
3682 return if ( $depth > $MAX_DEPTH );
3684 # end any current group at this depth
3686 && defined( $match_tree[$depth] )
3687 && @{ $match_tree[$depth] }
3688 && defined( $levels_current[$depth] ) )
3690 $match_tree[$depth]->[-1]->[1] = $jl;
3693 # Define the index of the node we will create below
3695 if ( defined( $match_tree[$depth] ) ) {
3696 $ng_self = @{ $match_tree[$depth] };
3699 # end any next deeper child node(s)
3700 $end_node->( $depth + 1, $jl, $ng_self );
3702 # update the levels being matched
3703 $token_patterns_current[$depth] = $token_patterns_next[$depth];
3704 $token_indexes_current[$depth] = $token_indexes_next[$depth];
3705 $levels_current[$depth] = $levels_next[$depth];
3707 # Do not start a new group at this level if it is not being used
3708 if ( !defined( $levels_next[$depth] )
3710 && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
3715 # Create a node for the next group at this depth. We initially assume
3716 # that it will continue to $jmax, and correct that later if the node
3718 push @{ $match_tree[$depth] },
3720 $jl + 1, $jmax, $n_parent, $levels_current[$depth],
3721 $token_patterns_current[$depth],
3722 undef, undef, $token_indexes_current[$depth],
3726 }; ## end sub end_node
3728 #-----------------------------------------------------
3729 # Prune Tree Step 2. Loop to form the tree of matches.
3730 #-----------------------------------------------------
3731 foreach my $jp ( 0 .. $jmax ) {
3733 # working with two adjacent line indexes, 'm'=minus, 'p'=plus
3736 # Pull out needed values for the next line
3737 my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
3738 $is_monotonic, $imax_true, $imax )
3739 = @{ $rline_values->[$jp] };
3741 # Transfer levels and patterns for this line to the working arrays.
3742 # If the number of levels differs from our chosen MAX_DEPTH ...
3743 # if fewer than MAX_DEPTH: leave levels at missing depths undefined
3744 # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
3745 @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
3746 if ( @{$rlevs} > $MAX_DEPTH ) {
3747 $levels_next[$MAX_DEPTH] = $rlevs->[-1];
3750 foreach my $item (@levels_next) {
3751 $token_patterns_next[$depth] =
3752 defined($item) ? $rtoken_patterns->{$item} : undef;
3753 $token_indexes_next[$depth] =
3754 defined($item) ? $rtoken_indexes->{$item} : undef;
3758 # Look for a change in match groups...
3760 # Initialize on the first line
3763 $end_node->( 0, $jm, $n_parent );
3766 # End groups if a hard flag has been set
3767 elsif ( $rlines->[$jm]->{'end_group'} ) {
3769 $end_node->( 0, $jm, $n_parent );
3772 # Continue at hanging side comment
3773 elsif ( $rlines->[$jp]->{'is_hanging_side_comment'} ) {
3777 # Otherwise see if anything changed and update the tree if so
3779 foreach my $depth ( 0 .. $MAX_DEPTH ) {
3781 my $def_current = defined( $token_patterns_current[$depth] );
3782 my $def_next = defined( $token_patterns_next[$depth] );
3783 last unless ( $def_current || $def_next );
3786 || $token_patterns_current[$depth] ne
3787 $token_patterns_next[$depth] )
3790 if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
3791 $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
3793 $end_node->( $depth, $jm, $n_parent );
3798 } ## end loop to form tree of matches
3800 #---------------------------------------------------------
3801 # Prune Tree Step 3. Make links from parent to child nodes
3802 #---------------------------------------------------------
3804 # It seemed cleaner to do this as a separate step rather than during tree
3805 # construction. The children nodes have links up to the parent node which
3806 # created them. Now make links in the opposite direction, so the parents
3807 # can find the children. We store the range of children nodes ($nc_beg,
3808 # $nc_end) of each parent with two additional indexes in the original array.
3809 # These will be undef if no children.
3810 foreach my $depth ( reverse( 1 .. $MAX_DEPTH ) ) {
3811 next unless defined( $match_tree[$depth] );
3812 my $nc_max = @{ $match_tree[$depth] } - 1;
3814 foreach my $nc ( 0 .. $nc_max ) {
3815 my $np = $match_tree[$depth]->[$nc]->[2];
3816 if ( !defined($np) ) {
3819 #print STDERR "lost child $np at depth $depth\n";
3822 if ( !defined($np_now) || $np != $np_now ) {
3824 $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
3826 $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
3828 } ## end loop to make links down to the child nodes
3830 EXPLAIN_PRUNE > 0 && do {
3831 print "Tree complete. Found these groups:\n";
3832 foreach my $depth ( 0 .. $MAX_DEPTH ) {
3833 Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
3837 #------------------------------------------------------
3838 # Prune Tree Step 4. Make a list of nodes to be deleted
3839 #------------------------------------------------------
3841 # list of lines with tokens to be deleted:
3842 # [$jbeg, $jend, $level_keep]
3843 # $jbeg..$jend is the range of line indexes,
3844 # $level_keep is the minimum level to keep
3847 # Not currently used:
3848 # Groups with ending comma lists and their range of sizes:
3849 # $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
3850 ## my %ragged_comma_group;
3852 # We work with a list of nodes to visit at the next deeper depth.
3854 if ( defined( $match_tree[0] ) ) {
3855 @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
3858 foreach my $depth ( 0 .. $MAX_DEPTH ) {
3859 last unless (@todo_list);
3861 foreach my $np (@todo_list) {
3862 my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
3864 = @{ $match_tree[$depth]->[$np] };
3865 my $nlines_p = $jend_p - $jbeg_p + 1;
3867 # nothing to do if no children
3868 next unless defined($nc_beg_p);
3870 # Define the number of lines to either keep or delete a child node.
3871 # This is the key decision we have to make. We want to delete
3872 # short runs of matched lines, and keep long runs. It seems easier
3873 # for the eye to follow breaks in monotonic level changes than
3874 # non-monotonic level changes. For example, the following looks
3875 # best if we delete the lower level alignments:
3878 # [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
3879 # [ qr/o/, qr/a/ ] ~~ [ ["foo"], ["bar"] ];
3880 # [ "foo", "bar" ] ~~ [ qr/o/, qr/a/ ];
3881 # [ qr/o/, qr/a/ ] ~~ [ "foo", "bar" ];
3884 # So we will use two thresholds.
3885 my $nmin_mono = $depth + 2;
3886 my $nmin_non_mono = $depth + 6;
3887 if ( $nmin_mono > $nlines_p - 1 ) {
3888 $nmin_mono = $nlines_p - 1;
3890 if ( $nmin_non_mono > $nlines_p - 1 ) {
3891 $nmin_non_mono = $nlines_p - 1;
3894 # loop to keep or delete each child node
3895 foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
3896 my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
3898 = @{ $match_tree[ $depth + 1 ]->[$nc] };
3899 my $nlines_c = $jend_c - $jbeg_c + 1;
3900 my $is_monotonic = $rline_values->[$jbeg_c]->[5];
3901 my $nmin = $is_monotonic ? $nmin_mono : $nmin_non_mono;
3902 if ( $nlines_c < $nmin ) {
3903 ##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
3904 push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
3907 ##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
3908 push @todo_next, $nc;
3912 @todo_list = @todo_next;
3913 } ## end loop to mark nodes to delete
3915 #------------------------------------------------------------
3916 # Prune Tree Step 5. Loop to delete selected alignment tokens
3917 #------------------------------------------------------------
3918 foreach my $item (@delete_list) {
3919 my ( $jbeg, $jend, $level_keep ) = @{$item};
3920 foreach my $jj ( $jbeg .. $jend ) {
3921 my $line = $rlines->[$jj];
3923 my $rtokens = $line->{'rtokens'};
3924 my $imax = @{$rtokens} - 2;
3925 foreach my $i ( 0 .. $imax ) {
3926 my $tok = $rtokens->[$i];
3927 my ( $raw_tok, $lev, $tag, $tok_count ) =
3928 decode_alignment_token($tok);
3929 if ( $lev > $level_keep ) {
3934 delete_selected_tokens( $line, \@idel );
3937 } ## end loop to delete selected alignment tokens
3940 } ## end sub prune_alignment_tree
3942 sub Dump_tree_groups {
3943 my ( $rgroup, $msg ) = @_;
3947 local $LIST_SEPARATOR = ')(';
3948 foreach my $item ( @{$rgroup} ) {
3950 foreach my $val (@fix) { $val = "undef" unless defined $val; }
3955 } ## end sub Dump_tree_groups
3957 { ## closure for sub is_marginal_match
3961 my %is_good_alignment;
3963 # This test did not give sufficiently better results to use as an update,
3964 # but the flag is worth keeping as a starting point for future testing.
3965 use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
3972 @is_if_or{@q} = (1) x scalar(@q);
3975 = **= += *= &= <<= &&=
3976 -= /= |= >>= ||= //=
3980 @is_assignment{@q} = (1) x scalar(@q);
3982 # Vertically aligning on certain "good" tokens is usually okay
3983 # so we can be less restrictive in marginal cases.
3984 @q = qw( { ? => = );
3986 @is_good_alignment{@q} = (1) x scalar(@q);
3989 sub is_marginal_match {
3991 my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
3993 # Decide if we should undo some or all of the common alignments of a
3994 # group of just two lines.
3997 # $line_0 and $line_1 - the two lines
3998 # $group_level = the indentation level of the group being processed
3999 # $imax_align = the maximum index of the common alignment tokens
4001 # $imax_prev = the maximum index of the common alignment tokens
4002 # with the line before $line_0 (=-1 of does not exist)
4005 # $is_marginal = true if the two lines should NOT be fully aligned
4006 # = false if the two lines can remain fully aligned
4007 # $imax_align = the index of the highest alignment token shared by
4008 # these two lines to keep if the match is marginal.
4010 # When we have an alignment group of just two lines like this, we are
4011 # working in the twilight zone of what looks good and what looks bad.
4012 # This routine is a collection of rules which work have been found to
4013 # work fairly well, but it will need to be updated from time to time.
4015 my $is_marginal = 0;
4017 #---------------------------------------
4018 # Always align certain special cases ...
4019 #---------------------------------------
4022 # always keep alignments of a terminal else or ternary
4023 defined( $line_1->{'j_terminal_match'} )
4025 # always align lists
4026 || $line_0->{'list_type'}
4028 # always align hanging side comments
4029 || $line_1->{'is_hanging_side_comment'}
4033 return ( $is_marginal, $imax_align );
4036 my $jmax_0 = $line_0->{'jmax'};
4037 my $jmax_1 = $line_1->{'jmax'};
4038 my $rtokens_1 = $line_1->{'rtokens'};
4039 my $rtokens_0 = $line_0->{'rtokens'};
4040 my $rfield_lengths_0 = $line_0->{'rfield_lengths'};
4041 my $rfield_lengths_1 = $line_1->{'rfield_lengths'};
4042 my $rpatterns_0 = $line_0->{'rpatterns'};
4043 my $rpatterns_1 = $line_1->{'rpatterns'};
4044 my $imax_next = $line_1->{'imax_pair'};
4046 # We will scan the alignment tokens and set a flag '$is_marginal' if
4047 # it seems that the an alignment would look bad.
4049 my $saw_good_alignment = 0;
4050 my $saw_if_or; # if we saw an 'if' or 'or' at group level
4051 my $raw_tokb = EMPTY_STRING; # first token seen at group level
4053 my $line_ending_fat_comma; # is last token just a '=>' ?
4057 foreach my $j ( 0 .. $jmax_1 - 2 ) {
4058 my ( $raw_tok, $lev, $tag, $tok_count ) =
4059 decode_alignment_token( $rtokens_1->[$j] );
4060 if ( $raw_tok && $lev == $group_level ) {
4061 if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
4062 $saw_if_or ||= $is_if_or{$raw_tok};
4065 # When the first of the two lines ends in a bare '=>' this will
4066 # probably be marginal match. (For a bare =>, the next field length
4067 # will be 2 or 3, depending on side comment)
4068 $line_ending_fat_comma =
4071 && $rfield_lengths_0->[ $j + 1 ] <= 3;
4073 my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
4075 $pad += $line_1->{'leading_space_count'} -
4076 $line_0->{'leading_space_count'};
4078 # Remember the pad at a leading equals
4079 if ( $raw_tok eq '=' && $lev == $group_level ) {
4082 0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
4083 $j0_max_pad = 4 if ( $j0_max_pad < 4 );
4087 if ( $pad < 0 ) { $pad = -$pad }
4088 if ( $pad > $max_pad ) { $max_pad = $pad }
4089 if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
4090 $saw_good_alignment = 1;
4093 $jfirst_bad = $j unless defined($jfirst_bad);
4095 if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
4097 # Flag this as a marginal match since patterns differ.
4098 # Normally, we will not allow just two lines to match if
4099 # marginal. But we can allow matching in some specific cases.
4101 $jfirst_bad = $j if ( !defined($jfirst_bad) );
4102 $is_marginal = 1 if ( $is_marginal == 0 );
4103 if ( $raw_tok eq '=' ) {
4105 # Here is an example of a marginal match:
4107 # $op = compile_bblock($op);
4108 # The left tokens are both identifiers, but
4109 # one accesses a hash and the other doesn't.
4110 # We'll let this be a tentative match and undo
4111 # it later if we don't find more than 2 lines
4118 $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
4120 # Turn off the "marginal match" flag in some cases...
4121 # A "marginal match" occurs when the alignment tokens agree
4122 # but there are differences in the other tokens (patterns).
4123 # If we leave the marginal match flag set, then the rule is that we
4124 # will align only if there are more than two lines in the group.
4125 # We will turn of the flag if we almost have a match
4126 # and either we have seen a good alignment token or we
4127 # just need a small pad (2 spaces) to fit. These rules are
4128 # the result of experimentation. Tokens which misaligned by just
4129 # one or two characters are annoying. On the other hand,
4130 # large gaps to less important alignment tokens are also annoying.
4131 if ( $is_marginal == 1
4132 && ( $saw_good_alignment || $max_pad < 3 ) )
4137 # We will use the line endings to help decide on alignments...
4138 # See if the lines end with semicolons...
4141 if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
4146 my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
4147 my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
4148 $sc_term0 = $pat0 =~ /;b?$/;
4149 $sc_term1 = $pat1 =~ /;b?$/;
4152 if ( !$is_marginal && !$sc_term0 ) {
4154 # First line of assignment should be semicolon terminated.
4155 # For example, do not align here:
4156 # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4157 # $$href{-NUM_DIRS} = 0;
4158 if ( $is_assignment{$raw_tokb} ) {
4163 # Try to avoid some undesirable alignments of opening tokens
4164 # for example, the space between grep and { here:
4165 # return map { ( $_ => $_ ) }
4166 # grep { /$handles/ } $self->_get_delegate_method_list;
4168 ( $raw_tokb eq '(' || $raw_tokb eq '{' )
4170 && $sc_term0 ne $sc_term1;
4172 #---------------------------------------
4173 # return if this is not a marginal match
4174 #---------------------------------------
4175 if ( !$is_marginal ) {
4176 return ( $is_marginal, $imax_align );
4179 # Undo the marginal match flag in certain cases,
4181 # Two lines with a leading equals-like operator are allowed to
4182 # align if the patterns to the left of the equals are the same.
4183 # For example the following two lines are a marginal match but have
4184 # the same left side patterns, so we will align the equals.
4185 # my $orig = my $format = "^<<<<< ~~\n";
4187 # But these have a different left pattern so they will not be
4190 # $self->{'leftovers'} .= "<bx-seq:seq" . $';
4192 # First line semicolon terminated but second not, usually ok:
4193 # my $want = "'ab', 'a', 'b'";
4194 # my $got = join( ", ",
4195 # map { defined($_) ? "'$_'" : "undef" }
4197 # First line not semicolon terminated, Not OK to match:
4198 # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4199 # $$href{-NUM_DIRS} = 0;
4200 my $pat0 = $rpatterns_0->[0];
4201 my $pat1 = $rpatterns_1->[0];
4203 #---------------------------------------------------------
4204 # Turn off the marginal flag for some types of assignments
4205 #---------------------------------------------------------
4206 if ( $is_assignment{$raw_tokb} ) {
4208 # undo marginal flag if first line is semicolon terminated
4209 # and leading patters match
4210 if ($sc_term0) { # && $sc_term1) {
4211 $is_marginal = $pat0 ne $pat1;
4214 elsif ( $raw_tokb eq '=>' ) {
4216 # undo marginal flag if patterns match
4217 $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
4219 elsif ( $raw_tokb eq '=~' ) {
4221 # undo marginal flag if both lines are semicolon terminated
4222 # and leading patters match
4223 if ( $sc_term1 && $sc_term0 ) {
4224 $is_marginal = $pat0 ne $pat1;
4228 #-----------------------------------------------------
4229 # Turn off the marginal flag if we saw an 'if' or 'or'
4230 #-----------------------------------------------------
4232 # A trailing 'if' and 'or' often gives a good alignment
4233 # For example, we can align these:
4234 # return -1 if $_[0] =~ m/^CHAPT|APPENDIX/;
4235 # return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
4238 # $d_in_m[2] = 29 if ( &Date_LeapYear($y) );
4239 # $d = $d_in_m[$m] if ( $d > $d_in_m[$m] );
4243 # undo marginal flag if both lines are semicolon terminated
4244 if ( $sc_term0 && $sc_term1 ) {
4249 # For a marginal match, only keep matches before the first 'bad' match
4251 && defined($jfirst_bad)
4252 && $imax_align > $jfirst_bad - 1 )
4254 $imax_align = $jfirst_bad - 1;
4257 #----------------------------------------------------------
4258 # Allow sweep to match lines with leading '=' in some cases
4259 #----------------------------------------------------------
4260 if ( $imax_align < 0 && defined($j0_eq_pad) ) {
4264 # If there is a following line with leading equals, or
4265 # preceding line with leading equals, then let the sweep align
4266 # them without restriction. For example, the first two lines
4267 # here are a marginal match, but they are followed by a line
4268 # with leading equals, so the sweep-lr logic can align all of
4271 # $date[1] = $month_to_num{ $date[1] }; # <--line_0
4272 # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4273 # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4274 # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4276 # Likewise, if we reverse the two pairs we want the same result
4278 # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4279 # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4280 # $date[1] = $month_to_num{ $date[1] }; # <--line_0
4281 # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4286 || TEST_MARGINAL_EQ_ALIGNMENT
4288 && $j0_eq_pad >= -$j0_max_pad
4289 && $j0_eq_pad <= $j0_max_pad
4293 # But do not do this if there is a comma before the '='.
4294 # For example, the first two lines below have commas and
4295 # therefore are not allowed to align with lines 3 & 4:
4297 # my ( $x, $y ) = $self->Size(); #<--line_0
4298 # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
4299 # my $vx = $right - $left;
4300 # my $vy = $bottom - $top;
4302 if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
4308 return ( $is_marginal, $imax_align );
4309 } ## end sub is_marginal_match
4310 } ## end closure for sub is_marginal_match
4312 sub get_extra_leading_spaces {
4314 my ( $rlines, $rgroups ) = @_;
4316 #----------------------------------------------------------
4317 # Define any extra indentation space (for the -lp option).
4319 # If a list has side comments, sub scan_list must dump the
4320 # list before it sees everything. When this happens, it sets
4321 # the indentation to the standard scheme, but notes how
4322 # many spaces it would have liked to use. We may be able
4323 # to recover that space here in the event that all of the
4324 # lines of a list are back together again.
4325 #----------------------------------------------------------
4327 return 0 unless ( @{$rlines} && @{$rgroups} );
4329 my $object = $rlines->[0]->{'indentation'};
4330 return 0 unless ( ref($object) );
4331 my $extra_leading_spaces = 0;
4332 my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
4333 return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted);
4335 my $min_spaces = $extra_indentation_spaces_wanted;
4336 if ( $min_spaces > 0 ) { $min_spaces = 0 }
4338 # loop over all groups
4340 my $ngroups = @{$rgroups};
4341 foreach my $item ( @{$rgroups} ) {
4343 my ( $jbeg, $jend ) = @{$item};
4344 foreach my $j ( $jbeg .. $jend ) {
4345 next if ( $j == 0 );
4347 # all indentation objects must be the same
4348 if ( $object != $rlines->[$j]->{'indentation'} ) {
4353 # find the maximum space without exceeding the line length for this group
4354 my $avail = $rlines->[$jbeg]->get_available_space_on_right();
4356 ( $avail > $extra_indentation_spaces_wanted )
4357 ? $extra_indentation_spaces_wanted
4360 #--------------------------------------------------------
4361 # Note: min spaces can be negative; for example with -gnu
4363 # do { 1; !!(my $x = bless []); }
4365 #--------------------------------------------------------
4366 # The following rule is needed to match older formatting:
4367 # For multiple groups, we will keep spaces non-negative.
4368 # For a single group, we will allow a negative space.
4369 if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
4371 # update the minimum spacing
4372 if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
4373 $extra_leading_spaces = $spaces;
4377 # update the indentation object because with -icp the terminal
4378 # ');' will use the same adjustment.
4379 $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
4380 return $extra_leading_spaces;
4381 } ## end sub get_extra_leading_spaces
4383 sub forget_side_comment {
4385 $self->[_last_side_comment_column_] = 0;
4389 sub is_good_side_comment_column {
4390 my ( $self, $line, $line_number, $level, $num5 ) = @_;
4392 # Upon encountering the first side comment of a group, decide if
4393 # a previous side comment should be forgotten. This involves
4394 # checking several rules.
4396 # Return true to KEEP old comment location
4397 # Return false to FORGET old comment location
4401 my $rfields = $line->{'rfields'};
4402 my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4404 # RULE1: Never forget comment before a hanging side comment
4405 return $KEEP if ($is_hanging_side_comment);
4407 # RULE2: Forget a side comment after a short line difference,
4408 # where 'short line difference' is computed from a formula.
4409 # Using a smooth formula helps minimize sudden large changes.
4410 my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
4411 my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
4413 # '$num5' is the number of comments in the first 5 lines after the first
4414 # comment. It is needed to keep a compact group of side comments from
4415 # being influenced by a more distant side comment.
4416 $num5 = 1 unless ($num5);
4420 # $adiff $num5 $short_diff
4432 my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
4435 if ( $line_diff > $short_diff
4436 || !$self->[_rOpts_valign_side_comments_] );
4438 # RULE3: Forget a side comment if this line is at lower level and
4440 my $last_sc_level = $self->[_last_side_comment_level_];
4442 if ( $level < $last_sc_level
4443 && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
4445 # RULE 4: Forget the last side comment if this comment might join a cached
4447 if ( my $cached_line_type = get_cached_line_type() ) {
4449 # ... otherwise side comment alignment will get messed up.
4450 # For example, in the following test script
4451 # with using 'perltidy -sct -act=2', the last comment would try to
4452 # align with the previous and then be in the wrong column when
4453 # the lines are combined:
4456 # [0, 1, 2], [3, 4, 5], [6, 7, 8], # rows
4457 # [0, 3, 6], [1, 4, 7], [2, 5, 8], # columns
4458 # [0, 4, 8], [2, 4, 6]
4461 if ( $cached_line_type == 2 || $cached_line_type == 4 );
4464 # Otherwise, keep it alive
4466 } ## end sub is_good_side_comment_column
4468 sub align_side_comments {
4470 my ( $self, $rlines, $rgroups ) = @_;
4472 # Align any side comments in this batch of lines
4475 # $rlines - the lines
4476 # $rgroups - the partition of the lines into groups
4478 # We will be working group-by-group because all side comments
4479 # (real or fake) in each group are already aligned. So we just have
4480 # to make alignments between groups wherever possible.
4482 # An unusual aspect is that within each group we have aligned both real
4483 # and fake side comments. This has the consequence that the lengths of
4484 # long lines without real side comments can cause 'push' all side comments
4485 # to the right. This seems unusual, but testing with and without this
4486 # feature shows that it is usually better this way. Otherwise, side
4487 # comments can be hidden between long lines without side comments and
4488 # thus be harder to read.
4490 my $group_level = $self->[_group_level_];
4491 my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
4492 && $group_level == $self->[_last_level_written_];
4494 # Find groups with side comments, and remember the first nonblank comment
4498 foreach my $item ( @{$rgroups} ) {
4500 my ( $jbeg, $jend ) = @{$item};
4501 foreach my $j ( $jbeg .. $jend ) {
4502 my $line = $rlines->[$j];
4503 my $jmax = $line->{'jmax'};
4504 if ( $line->{'rfield_lengths'}->[$jmax] ) {
4506 # this group has a line with a side comment
4508 if ( !defined($j_sc_beg) ) {
4516 # done if no groups with side comments
4517 return unless @todo;
4519 # Count $num5 = number of comments in the 5 lines after the first comment
4520 # This is an important factor in a decision formula
4522 foreach my $jj ( $j_sc_beg + 1 .. @{$rlines} - 1 ) {
4523 my $ldiff = $jj - $j_sc_beg;
4524 last if ( $ldiff > 5 );
4525 my $line = $rlines->[$jj];
4526 my $jmax = $line->{'jmax'};
4527 my $sc_len = $line->{'rfield_lengths'}->[$jmax];
4528 next unless ($sc_len);
4532 # Forget the old side comment location if necessary
4533 my $line_0 = $rlines->[$j_sc_beg];
4535 $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
4537 $self->is_good_side_comment_column( $line_0, $lnum, $group_level, $num5 );
4538 my $last_side_comment_column =
4539 $keep_it ? $self->[_last_side_comment_column_] : 0;
4541 # If there are multiple groups we will do two passes
4542 # so that we can find a common alignment for all groups.
4543 my $MAX_PASS = @todo > 1 ? 2 : 1;
4546 my $max_comment_column = $last_side_comment_column;
4547 foreach my $PASS ( 1 .. $MAX_PASS ) {
4549 # If there are two passes, then on the last pass make the old column
4550 # equal to the largest of the group. This will result in the comments
4551 # being aligned if possible.
4552 if ( $PASS == $MAX_PASS ) {
4553 $last_side_comment_column = $max_comment_column;
4556 # Loop over the groups with side comments
4558 foreach my $ng (@todo) {
4559 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
4561 # Note that since all lines in a group have common alignments, we
4562 # just have to work on one of the lines (the first line).
4563 my $line = $rlines->[$jbeg];
4564 my $jmax = $line->{'jmax'};
4565 my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4567 if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
4569 # the maximum space without exceeding the line length:
4570 my $avail = $line->get_available_space_on_right();
4572 # try to use the previous comment column
4573 my $side_comment_column = $line->get_column( $jmax - 1 );
4574 my $move = $last_side_comment_column - $side_comment_column;
4576 # Remember the maximum possible column of the first line with
4578 if ( !defined($column_limit) ) {
4579 $column_limit = $side_comment_column + $avail;
4582 next if ( $jmax <= 0 );
4584 # but if this doesn't work, give up and use the minimum space
4585 my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
4586 if ( $move > $avail ) {
4590 # but we want some minimum space to the comment
4593 && $continuing_sc_flow )
4598 # remove constraints on hanging side comments
4599 if ($is_hanging_side_comment) { $min_move = 0 }
4601 if ( $move < $min_move ) {
4605 # don't exceed the available space
4606 if ( $move > $avail ) { $move = $avail }
4608 # We can only increase space, never decrease.
4609 if ( $move < 0 ) { $move = 0 }
4611 # Discover the largest column on the preliminary pass
4612 if ( $PASS < $MAX_PASS ) {
4613 my $col = $line->get_column( $jmax - 1 ) + $move;
4615 # but ignore columns too large for the starting line
4616 if ( $col > $max_comment_column && $col < $column_limit ) {
4617 $max_comment_column = $col;
4621 # Make the changes on the final pass
4623 $line->increase_field_width( $jmax - 1, $move );
4625 # remember this column for the next group
4626 $last_side_comment_column = $line->get_column( $jmax - 1 );
4628 } ## end loop over groups
4629 } ## end loop over passes
4631 # Find the last side comment
4633 my $ng_last = $todo[-1];
4634 my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
4635 foreach my $jj ( reverse( $jbeg .. $jend ) ) {
4636 my $line = $rlines->[$jj];
4637 my $jmax = $line->{'jmax'};
4638 if ( $line->{'rfield_lengths'}->[$jmax] ) {
4644 # Save final side comment info for possible use by the next batch
4645 if ( defined($j_sc_last) ) {
4647 $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
4648 $self->[_last_side_comment_column_] = $last_side_comment_column;
4649 $self->[_last_side_comment_line_number_] = $line_number;
4650 $self->[_last_side_comment_level_] = $group_level;
4653 } ## end sub align_side_comments
4655 ###############################
4656 # CODE SECTION 6: Output Step A
4657 ###############################
4659 sub valign_output_step_A {
4661 #------------------------------------------------------------
4662 # This is Step A in writing vertically aligned lines.
4663 # The line is prepared according to the alignments which have
4664 # been found. Then it is shipped to the next step.
4665 #------------------------------------------------------------
4667 my ( $self, $rinput_hash ) = @_;
4669 my $line = $rinput_hash->{line};
4670 my $min_ci_gap = $rinput_hash->{min_ci_gap};
4671 my $do_not_align = $rinput_hash->{do_not_align};
4672 my $group_leader_length = $rinput_hash->{group_leader_length};
4673 my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
4674 my $level = $rinput_hash->{level};
4675 my $maximum_line_length = $rinput_hash->{maximum_line_length};
4677 my $rfields = $line->{'rfields'};
4678 my $rfield_lengths = $line->{'rfield_lengths'};
4679 my $leading_space_count = $line->{'leading_space_count'};
4680 my $outdent_long_lines = $line->{'outdent_long_lines'};
4681 my $maximum_field_index = $line->{'jmax'};
4682 my $rvertical_tightness_flags = $line->{'rvertical_tightness_flags'};
4683 my $Kend = $line->{'Kend'};
4684 my $level_end = $line->{'level_end'};
4686 # Check for valid hash keys at end of lifetime of $line during development
4688 && check_keys( $line, \%valid_LINE_keys,
4689 "Checking line keys at valign_output_step_A", 1 );
4691 # add any extra spaces
4692 if ( $leading_space_count > $group_leader_length ) {
4693 $leading_space_count += $min_ci_gap;
4696 my $str = $rfields->[0];
4697 my $str_len = $rfield_lengths->[0];
4699 my @alignments = @{ $line->{'ralignments'} };
4700 if ( @alignments != $maximum_field_index + 1 ) {
4702 # Shouldn't happen: sub install_new_alignments makes jmax alignments
4703 my $jmax_alignments = @alignments - 1;
4706 "alignment jmax=$jmax_alignments should equal $maximum_field_index\n"
4712 # loop to concatenate all fields of this line and needed padding
4713 my $total_pad_count = 0;
4714 for my $j ( 1 .. $maximum_field_index ) {
4716 # skip zero-length side comments
4719 ( $j == $maximum_field_index )
4720 && ( !defined( $rfields->[$j] )
4721 || ( $rfield_lengths->[$j] == 0 ) )
4724 # compute spaces of padding before this field
4725 my $col = $alignments[ $j - 1 ]->{'column'};
4726 my $pad = $col - ( $str_len + $leading_space_count );
4728 if ($do_not_align) {
4730 ( $j < $maximum_field_index )
4732 : $self->[_rOpts_minimum_space_to_comment_] - 1;
4735 # if the -fpsc flag is set, move the side comment to the selected
4736 # column if and only if it is possible, ignoring constraints on
4737 # line length and minimum space to comment
4738 if ( $self->[_rOpts_fixed_position_side_comment_]
4739 && $j == $maximum_field_index )
4742 $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
4743 if ( $newpad >= 0 ) { $pad = $newpad; }
4746 # accumulate the padding
4747 if ( $pad > 0 ) { $total_pad_count += $pad; }
4749 # only add padding when we have a finite field;
4750 # this avoids extra terminal spaces if we have empty fields
4751 if ( $rfield_lengths->[$j] > 0 ) {
4752 $str .= SPACE x $total_pad_count;
4753 $str_len += $total_pad_count;
4754 $total_pad_count = 0;
4755 $str .= $rfields->[$j];
4756 $str_len += $rfield_lengths->[$j];
4759 $total_pad_count = 0;
4763 my $side_comment_length = $rfield_lengths->[$maximum_field_index];
4765 # ship this line off
4766 $self->valign_output_step_B(
4768 leading_space_count => $leading_space_count + $extra_leading_spaces,
4770 line_length => $str_len,
4771 side_comment_length => $side_comment_length,
4772 outdent_long_lines => $outdent_long_lines,
4773 rvertical_tightness_flags => $rvertical_tightness_flags,
4775 level_end => $level_end,
4777 maximum_line_length => $maximum_line_length,
4781 } ## end sub valign_output_step_A
4783 sub combine_fields {
4785 # We have a group of two lines for which we do not want to align tokens
4786 # between index $imax_align and the side comment. So we will delete fields
4787 # between $imax_align and the side comment. Alignments have already
4788 # been set so we have to adjust them.
4790 my ( $line_0, $line_1, $imax_align ) = @_;
4792 if ( !defined($imax_align) ) { $imax_align = -1 }
4794 # First delete the unwanted tokens
4795 my $jmax_old = $line_0->{'jmax'};
4796 my @idel = ( $imax_align + 1 .. $jmax_old - 2 );
4797 return unless (@idel);
4799 # Get old alignments before any changes are made
4800 my @old_alignments = @{ $line_0->{'ralignments'} };
4802 foreach my $line ( $line_0, $line_1 ) {
4803 delete_selected_tokens( $line, \@idel );
4806 # Now adjust the alignments. Note that the side comment alignment
4807 # is always at jmax-1, and there is an ending alignment at jmax.
4809 if ( $imax_align >= 0 ) {
4810 @new_alignments[ 0 .. $imax_align ] =
4811 @old_alignments[ 0 .. $imax_align ];
4814 my $jmax_new = $line_0->{'jmax'};
4816 $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
4817 $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
4818 $line_0->{'ralignments'} = \@new_alignments;
4819 $line_1->{'ralignments'} = \@new_alignments;
4821 } ## end sub combine_fields
4823 sub get_output_line_number {
4825 # The output line number reported to a caller =
4826 # the number of items still in the buffer +
4827 # the number of items written.
4828 return $_[0]->group_line_count() +
4829 $_[0]->[_file_writer_object_]->get_output_line_number();
4830 } ## end sub get_output_line_number
4832 ###############################
4833 # CODE SECTION 7: Output Step B
4834 ###############################
4836 { ## closure for sub valign_output_step_B
4838 # These are values for a cache used by valign_output_step_B.
4839 my $cached_line_text;
4840 my $cached_line_text_length;
4841 my $cached_line_type;
4842 my $cached_line_opening_flag;
4843 my $cached_line_closing_flag;
4845 my $cached_line_valid;
4846 my $cached_line_leading_space_count;
4847 my $cached_seqno_string;
4848 my $cached_line_Kend;
4849 my $cached_line_maximum_length;
4851 # These are passed to step_C:
4853 my $last_nonblank_seqno_string;
4855 sub set_last_nonblank_seqno_string {
4857 $last_nonblank_seqno_string = $val;
4861 sub get_cached_line_opening_flag {
4862 return $cached_line_opening_flag;
4865 sub get_cached_line_type {
4866 return $cached_line_type;
4869 sub set_cached_line_valid {
4871 $cached_line_valid = $val;
4875 sub get_cached_seqno {
4876 return $cached_seqno;
4879 sub initialize_step_B_cache {
4881 # valign_output_step_B cache:
4882 $cached_line_text = EMPTY_STRING;
4883 $cached_line_text_length = 0;
4884 $cached_line_type = 0;
4885 $cached_line_opening_flag = 0;
4886 $cached_line_closing_flag = 0;
4888 $cached_line_valid = 0;
4889 $cached_line_leading_space_count = 0;
4890 $cached_seqno_string = EMPTY_STRING;
4891 $cached_line_Kend = undef;
4892 $cached_line_maximum_length = undef;
4894 # These vars hold a string of sequence numbers joined together used by
4896 $seqno_string = EMPTY_STRING;
4897 $last_nonblank_seqno_string = EMPTY_STRING;
4899 } ## end sub initialize_step_B_cache
4901 sub _flush_step_B_cache {
4904 # Send any text in the step_B cache on to step_C
4905 if ($cached_line_type) {
4906 $seqno_string = $cached_seqno_string;
4907 $self->valign_output_step_C(
4909 $last_nonblank_seqno_string,
4912 $cached_line_leading_space_count,
4913 $self->[_last_level_written_],
4916 $cached_line_type = 0;
4917 $cached_line_text = EMPTY_STRING;
4918 $cached_line_text_length = 0;
4919 $cached_seqno_string = EMPTY_STRING;
4920 $cached_line_Kend = undef;
4921 $cached_line_maximum_length = undef;
4924 } ## end sub _flush_step_B_cache
4926 sub handle_cached_line {
4928 my ( $self, $rinput, $leading_string, $leading_string_length ) = @_;
4930 # The cached line will either be:
4931 # - passed along to step_C, or
4932 # - or combined with the current line
4934 my $last_level_written = $self->[_last_level_written_];
4936 my $leading_space_count = $rinput->{leading_space_count};
4937 my $str = $rinput->{line};
4938 my $str_length = $rinput->{line_length};
4939 my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
4940 my $level = $rinput->{level};
4941 my $level_end = $rinput->{level_end};
4942 my $maximum_line_length = $rinput->{maximum_line_length};
4944 my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
4945 $seqno_beg, $seqno_end );
4946 if ($rvertical_tightness_flags) {
4948 $open_or_close = $rvertical_tightness_flags->{_vt_type};
4949 $seqno_beg = $rvertical_tightness_flags->{_vt_seqno_beg};
4952 # Dump an invalid cached line
4953 if ( !$cached_line_valid ) {
4954 $self->valign_output_step_C(
4956 $last_nonblank_seqno_string,
4959 $cached_line_leading_space_count,
4960 $last_level_written,
4965 # Handle cached line ending in OPENING tokens
4966 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
4968 my $gap = $leading_space_count - $cached_line_text_length;
4970 # handle option of just one tight opening per line:
4971 if ( $cached_line_opening_flag == 1 ) {
4972 if ( defined($open_or_close) && $open_or_close == 1 ) {
4977 # Do not join the lines if this might produce a one-line
4978 # container which exceeds the maximum line length. This is
4979 # necessary prevent blinking, particularly with the combination
4980 # -xci -pvt=2. In that case a one-line block alternately forms
4981 # and breaks, causing -xci to alternately turn on and off (case
4983 # Patched to fix cases b656 b862 b971 b972: always do the check
4984 # if the maximum line length changes (due to -vmll).
4987 && ( $maximum_line_length != $cached_line_maximum_length
4988 || ( defined($level_end) && $level > $level_end ) )
4991 my $test_line_length =
4992 $cached_line_text_length + $gap + $str_length;
4994 # Add a small tolerance in the length test (fixes case b862)
4995 if ( $test_line_length > $cached_line_maximum_length - 2 ) {
5000 if ( $gap >= 0 && defined($seqno_beg) ) {
5001 $maximum_line_length = $cached_line_maximum_length;
5002 $leading_string = $cached_line_text . SPACE x $gap;
5003 $leading_string_length = $cached_line_text_length + $gap;
5004 $leading_space_count = $cached_line_leading_space_count;
5005 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5006 $level = $last_level_written;
5009 $self->valign_output_step_C(
5011 $last_nonblank_seqno_string,
5014 $cached_line_leading_space_count,
5015 $last_level_written,
5021 # Handle cached line ending in CLOSING tokens
5024 $cached_line_text . SPACE x $cached_line_closing_flag . $str;
5025 my $test_line_length =
5026 $cached_line_text_length +
5027 $cached_line_closing_flag +
5031 # The new line must start with container
5034 # The container combination must be okay..
5037 # okay to combine like types
5038 ( $open_or_close == $cached_line_type )
5040 # closing block brace may append to non-block
5041 || ( $cached_line_type == 2 && $open_or_close == 4 )
5043 # something like ');'
5044 || ( !$open_or_close && $cached_line_type == 2 )
5048 # The combined line must fit
5049 && ( $test_line_length <= $cached_line_maximum_length )
5053 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5055 # Patch to outdent closing tokens ending # in ');' If we
5056 # are joining a line like ');' to a previous stacked set of
5057 # closing tokens, then decide if we may outdent the
5058 # combined stack to the indentation of the ');'. Since we
5059 # should not normally outdent any of the other tokens more
5060 # than the indentation of the lines that contained them, we
5061 # will only do this if all of the corresponding opening
5062 # tokens were on the same line. This can happen with -sot
5065 # For example, it is ok here:
5066 # __PACKAGE__->load_components( qw(
5071 # But, for example, we do not outdent in this example
5072 # because that would put the closing sub brace out farther
5073 # than the opening sub brace:
5075 # perltidy -sot -sct
5077 # '<Control-f>' => sub {
5079 # my $e = $c->XEvent;
5080 # itemsUnderArea $c;
5084 && $cached_line_text =~ /^[\)\}\]\s]*$/ )
5087 # The way to tell this is if the stacked sequence
5088 # numbers of this output line are the reverse of the
5089 # stacked sequence numbers of the previous non-blank
5090 # line of sequence numbers. So we can join if the
5091 # previous nonblank string of tokens is the mirror
5092 # image. For example if stack )}] is 13:8:6 then we
5093 # are looking for a leading stack like [{( which
5094 # is 6:8:13. We only need to check the two ends,
5095 # because the intermediate tokens must fall in order.
5096 # Note on speed: having to split on colons and
5097 # eliminate multiple colons might appear to be slow,
5098 # but it's not an issue because we almost never come
5099 # through here. In a typical file we don't.
5101 $seqno_string =~ s/^:+//;
5102 $last_nonblank_seqno_string =~ s/^:+//;
5103 $seqno_string =~ s/:+/:/g;
5104 $last_nonblank_seqno_string =~ s/:+/:/g;
5106 # how many spaces can we outdent?
5108 $cached_line_leading_space_count - $leading_space_count;
5110 && length($seqno_string)
5111 && length($last_nonblank_seqno_string) ==
5112 length($seqno_string) )
5115 ( split /:/, $last_nonblank_seqno_string );
5116 my @seqno_now = ( split /:/, $seqno_string );
5119 && $seqno_now[-1] == $seqno_last[0]
5120 && $seqno_now[0] == $seqno_last[-1] )
5124 # for absolute safety, be sure we only remove
5126 my $ws = substr( $test_line, 0, $diff );
5127 if ( ( length($ws) == $diff )
5131 $test_line = substr( $test_line, $diff );
5132 $cached_line_leading_space_count -= $diff;
5133 $last_level_written =
5134 $self->level_change(
5135 $cached_line_leading_space_count,
5136 $diff, $last_level_written );
5137 $self->reduce_valign_buffer_indentation($diff);
5140 # shouldn't happen, but not critical:
5142 ## ERROR transferring indentation here
5148 # Change the args to look like we received the combined line
5150 $str_length = $test_line_length;
5151 $leading_string = EMPTY_STRING;
5152 $leading_string_length = 0;
5153 $leading_space_count = $cached_line_leading_space_count;
5154 $level = $last_level_written;
5155 $maximum_line_length = $cached_line_maximum_length;
5158 $self->valign_output_step_C(
5160 $last_nonblank_seqno_string,
5163 $cached_line_leading_space_count,
5164 $last_level_written,
5169 return ( $str, $str_length, $leading_string, $leading_string_length,
5170 $leading_space_count, $level, $maximum_line_length, );
5172 } ## end sub handle_cached_line
5174 sub valign_output_step_B {
5176 #---------------------------------------------------------
5177 # This is Step B in writing vertically aligned lines.
5178 # Vertical tightness is applied according to preset flags.
5179 # In particular this routine handles stacking of opening
5180 # and closing tokens.
5181 #---------------------------------------------------------
5183 my ( $self, $rinput ) = @_;
5185 my $leading_space_count = $rinput->{leading_space_count};
5186 my $str = $rinput->{line};
5187 my $str_length = $rinput->{line_length};
5188 my $side_comment_length = $rinput->{side_comment_length};
5189 my $outdent_long_lines = $rinput->{outdent_long_lines};
5190 my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
5191 my $level = $rinput->{level};
5192 my $level_end = $rinput->{level_end};
5193 my $Kend = $rinput->{Kend};
5194 my $maximum_line_length = $rinput->{maximum_line_length};
5196 # Useful -gcs test cases for wide characters are
5197 # perl527/(method.t.2, reg_mesg.t, mime-header.t)
5199 # handle outdenting of long lines:
5200 my $is_outdented_line;
5201 if ($outdent_long_lines) {
5204 $side_comment_length +
5205 $leading_space_count -
5206 $maximum_line_length;
5207 if ( $excess > 0 ) {
5208 $leading_space_count = 0;
5209 my $file_writer_object = $self->[_file_writer_object_];
5210 my $last_outdented_line_at =
5211 $file_writer_object->get_output_line_number();
5212 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
5214 my $outdented_line_count = $self->[_outdented_line_count_];
5215 unless ($outdented_line_count) {
5216 $self->[_first_outdented_line_at_] =
5217 $last_outdented_line_at;
5219 $outdented_line_count++;
5220 $self->[_outdented_line_count_] = $outdented_line_count;
5221 $is_outdented_line = 1;
5225 # Make preliminary leading whitespace. It could get changed
5226 # later by entabbing, so we have to keep track of any changes
5227 # to the leading_space_count from here on.
5228 my $leading_string =
5229 $leading_space_count > 0
5230 ? ( SPACE x $leading_space_count )
5232 my $leading_string_length = length($leading_string);
5234 # Unpack any recombination data; it was packed by
5235 # sub 'Formatter::set_vertical_tightness_flags'
5240 # 0 _vt_type: 1=opening non-block 2=closing non-block
5241 # 3=opening block brace 4=closing block brace
5243 # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
5244 # 1b _vt_closing_flag: spaces of padding to use if closing
5245 # 2 _vt_seqno: sequence number of container
5246 # 3 _vt_valid flag: do not append if this flag is false. Will be
5247 # true if appropriate -vt flag is set. Otherwise, Will be
5248 # made true only for 2 line container in parens with -lp
5249 # 4 _vt_seqno_beg: sequence number of first token of line
5250 # 5 _vt_seqno_end: sequence number of last token of line
5251 # 6 _vt_min_lines: min number of lines for joining opening cache,
5253 # 7 _vt_max_lines: max number of lines for joining opening cache,
5256 my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
5257 $seqno_beg, $seqno_end );
5258 if ($rvertical_tightness_flags) {
5260 $open_or_close = $rvertical_tightness_flags->{_vt_type};
5261 $opening_flag = $rvertical_tightness_flags->{_vt_opening_flag};
5262 $closing_flag = $rvertical_tightness_flags->{_vt_closing_flag};
5263 $seqno = $rvertical_tightness_flags->{_vt_seqno};
5264 $valid = $rvertical_tightness_flags->{_vt_valid_flag};
5265 $seqno_beg = $rvertical_tightness_flags->{_vt_seqno_beg};
5266 $seqno_end = $rvertical_tightness_flags->{_vt_seqno_end};
5269 $seqno_string = $seqno_end;
5271 # handle any cached line ..
5272 # either append this line to it or write it out
5273 # Note: the function length() is used in this next test out of caution.
5274 # All testing has shown that the variable $cached_line_text_length is
5275 # correct, but its calculation is complex and a loss of cached text
5276 # would be a disaster.
5277 if ( length($cached_line_text) ) {
5283 $leading_string_length,
5284 $leading_space_count,
5286 $maximum_line_length
5288 ) = $self->handle_cached_line( $rinput, $leading_string,
5289 $leading_string_length );
5291 $cached_line_type = 0;
5292 $cached_line_text = EMPTY_STRING;
5293 $cached_line_text_length = 0;
5294 $cached_line_Kend = undef;
5295 $cached_line_maximum_length = undef;
5299 # make the line to be written
5300 my $line = $leading_string . $str;
5301 my $line_length = $leading_string_length + $str_length;
5303 # Safety check: be sure that a line to be cached as a stacked block
5304 # brace line ends in the appropriate opening or closing block brace.
5305 # This should always be the case if the caller set flags correctly.
5306 # Code '3' is for -sobb, code '4' is for -scbb.
5307 if ($open_or_close) {
5308 if ( $open_or_close == 3 && $line !~ /\{\s*$/
5309 || $open_or_close == 4 && $line !~ /\}\s*$/ )
5315 # write or cache this line ...
5316 # fix for case b999: do not cache an outdented line
5317 # fix for b1378: do not cache an empty line
5318 if ( !$open_or_close
5319 || $side_comment_length > 0
5320 || $is_outdented_line
5323 $self->valign_output_step_C(
5325 $last_nonblank_seqno_string,
5328 $leading_space_count,
5334 $cached_line_text = $line;
5335 $cached_line_text_length = $line_length;
5336 $cached_line_type = $open_or_close;
5337 $cached_line_opening_flag = $opening_flag;
5338 $cached_line_closing_flag = $closing_flag;
5339 $cached_seqno = $seqno;
5340 $cached_line_valid = $valid;
5341 $cached_line_leading_space_count = $leading_space_count;
5342 $cached_seqno_string = $seqno_string;
5343 $cached_line_Kend = $Kend;
5344 $cached_line_maximum_length = $maximum_line_length;
5347 $self->[_last_level_written_] = $level;
5348 $self->[_last_side_comment_length_] = $side_comment_length;
5350 } ## end sub valign_output_step_B
5353 ###############################
5354 # CODE SECTION 8: Output Step C
5355 ###############################
5357 { ## closure for sub valign_output_step_C
5359 # Vertical alignment buffer used by valign_output_step_C
5360 my $valign_buffer_filling;
5363 sub initialize_valign_buffer {
5364 @valign_buffer = ();
5365 $valign_buffer_filling = EMPTY_STRING;
5369 sub dump_valign_buffer {
5372 # Send all lines in the current buffer on to step_D
5373 if (@valign_buffer) {
5374 foreach (@valign_buffer) {
5375 $self->valign_output_step_D( @{$_} );
5377 @valign_buffer = ();
5379 $valign_buffer_filling = EMPTY_STRING;
5381 } ## end sub dump_valign_buffer
5383 sub reduce_valign_buffer_indentation {
5385 my ( $self, $diff ) = @_;
5387 # Reduce the leading indentation of lines in the current
5388 # buffer by $diff spaces
5389 if ( $valign_buffer_filling && $diff ) {
5390 my $max_valign_buffer = @valign_buffer;
5391 foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
5392 my ( $line, $leading_space_count, $level, $Kend ) =
5393 @{ $valign_buffer[$i] };
5394 my $ws = substr( $line, 0, $diff );
5395 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
5396 $line = substr( $line, $diff );
5398 if ( $leading_space_count >= $diff ) {
5399 $leading_space_count -= $diff;
5401 $self->level_change( $leading_space_count, $diff,
5404 $valign_buffer[$i] =
5405 [ $line, $leading_space_count, $level, $Kend ];
5409 } ## end sub reduce_valign_buffer_indentation
5411 sub valign_output_step_C {
5413 #-----------------------------------------------------------------------
5414 # This is Step C in writing vertically aligned lines.
5415 # Lines are either stored in a buffer or passed along to the next step.
5416 # The reason for storing lines is that we may later want to reduce their
5417 # indentation when -sot and -sct are both used.
5418 #-----------------------------------------------------------------------
5422 $last_nonblank_seqno_string,
5427 # Dump any saved lines if we see a line with an unbalanced opening or
5429 $self->dump_valign_buffer()
5430 if ( $seqno_string && $valign_buffer_filling );
5432 # Either store or write this line
5433 if ($valign_buffer_filling) {
5434 push @valign_buffer, [@args_to_D];
5437 $self->valign_output_step_D(@args_to_D);
5440 # For lines starting or ending with opening or closing tokens..
5441 if ($seqno_string) {
5442 $last_nonblank_seqno_string = $seqno_string;
5443 set_last_nonblank_seqno_string($seqno_string);
5445 # Start storing lines when we see a line with multiple stacked
5447 # patch for RT #94354, requested by Colin Williams
5448 if ( index( $seqno_string, ':' ) >= 0
5449 && $seqno_string =~ /^\d+(\:+\d+)+$/
5450 && $args_to_D[0] !~ /^[\}\)\]\:\?]/ )
5453 # This test is efficient but a little subtle: The first test
5454 # says that we have multiple sequence numbers and hence
5455 # multiple opening or closing tokens in this line. The second
5456 # part of the test rejects stacked closing and ternary tokens.
5457 # So if we get here then we should have stacked unbalanced
5460 # Here is a complex example:
5462 # Foo($Bar[0], { # (side comment)
5466 # The first line has sequence 6::4. It does not begin with
5467 # a closing token or ternary, so it passes the test and must be
5468 # stacked opening tokens.
5470 # The last line has sequence 4:6 but is a stack of closing
5471 # tokens, so it gets rejected.
5473 # Note that the sequence number of an opening token for a qw
5474 # quote is a negative number and will be rejected. For
5475 # example, for the following line: skip_symbols([qw(
5476 # $seqno_string='10:5:-1'. It would be okay to accept it but I
5477 # decided not to do this after testing.
5479 $valign_buffer_filling = $seqno_string;
5484 } ## end sub valign_output_step_C
5487 ###############################
5488 # CODE SECTION 9: Output Step D
5489 ###############################
5491 sub valign_output_step_D {
5493 #----------------------------------------------------------------
5494 # This is Step D in writing vertically aligned lines.
5495 # It is the end of the vertical alignment pipeline.
5496 # Write one vertically aligned line of code to the output object.
5497 #----------------------------------------------------------------
5499 my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
5501 # The line is currently correct if there is no tabbing (recommended!)
5502 # We may have to lop off some leading spaces and replace with tabs.
5503 if ( $leading_space_count > 0 ) {
5505 my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5506 my $rOpts_tabs = $self->[_rOpts_tabs_];
5507 my $rOpts_entab_leading_whitespace =
5508 $self->[_rOpts_entab_leading_whitespace_];
5510 # Nothing to do if no tabs
5511 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5512 || $rOpts_indent_columns <= 0 )
5518 # Handle entab option
5519 elsif ($rOpts_entab_leading_whitespace) {
5521 # Patch 12-nov-2018 based on report from Glenn. Extra padding was
5522 # not correctly entabbed, nor were side comments: Increase leading
5523 # space count for a padded line to get correct tabbing
5524 if ( $line =~ /^(\s+)(.*)$/ ) {
5525 my $spaces = length($1);
5526 if ( $spaces > $leading_space_count ) {
5527 $leading_space_count = $spaces;
5532 $leading_space_count % $rOpts_entab_leading_whitespace;
5534 int( $leading_space_count / $rOpts_entab_leading_whitespace );
5535 my $leading_string = "\t" x $tab_count . SPACE x $space_count;
5536 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5537 substr( $line, 0, $leading_space_count ) = $leading_string;
5541 # shouldn't happen - program error counting whitespace
5545 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5550 # Handle option of one tab per level
5552 my $leading_string = ( "\t" x $level );
5554 $leading_space_count - $level * $rOpts_indent_columns;
5557 if ( $space_count < 0 ) {
5559 # But it could be an outdented comment
5560 if ( $line !~ /^\s*#/ ) {
5563 "Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
5566 $leading_string = ( SPACE x $leading_space_count );
5569 $leading_string .= ( SPACE x $space_count );
5571 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5572 substr( $line, 0, $leading_space_count ) = $leading_string;
5576 # shouldn't happen - program error counting whitespace
5577 # we'll skip entabbing
5580 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5585 my $file_writer_object = $self->[_file_writer_object_];
5586 $file_writer_object->write_code_line( $line . "\n", $Kend );
5589 } ## end sub valign_output_step_D
5591 { ## closure for sub get_leading_string
5593 my @leading_string_cache;
5595 sub initialize_leading_string_cache {
5596 @leading_string_cache = ();
5600 sub get_leading_string {
5602 # define the leading whitespace string for this line..
5603 my ( $self, $leading_whitespace_count, $group_level ) = @_;
5605 # Handle case of zero whitespace, which includes multi-line quotes
5606 # (which may have a finite level; this prevents tab problems)
5607 if ( $leading_whitespace_count <= 0 ) {
5608 return EMPTY_STRING;
5611 # look for previous result
5612 elsif ( $leading_string_cache[$leading_whitespace_count] ) {
5613 return $leading_string_cache[$leading_whitespace_count];
5616 # must compute a string for this number of spaces
5619 # Handle simple case of no tabs
5620 my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5621 my $rOpts_tabs = $self->[_rOpts_tabs_];
5622 my $rOpts_entab_leading_whitespace =
5623 $self->[_rOpts_entab_leading_whitespace_];
5625 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5626 || $rOpts_indent_columns <= 0 )
5628 $leading_string = ( SPACE x $leading_whitespace_count );
5631 # Handle entab option
5632 elsif ($rOpts_entab_leading_whitespace) {
5634 $leading_whitespace_count % $rOpts_entab_leading_whitespace;
5635 my $tab_count = int(
5636 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
5637 $leading_string = "\t" x $tab_count . SPACE x $space_count;
5640 # Handle option of one tab per level
5642 $leading_string = ( "\t" x $group_level );
5644 $leading_whitespace_count - $group_level * $rOpts_indent_columns;
5647 if ( $space_count < 0 ) {
5650 "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
5654 $leading_string = ( SPACE x $leading_whitespace_count );
5657 $leading_string .= ( SPACE x $space_count );
5660 $leading_string_cache[$leading_whitespace_count] = $leading_string;
5661 return $leading_string;
5662 } ## end sub get_leading_string
5663 } ## end get_leading_string
5665 ##########################
5666 # CODE SECTION 10: Summary
5667 ##########################
5669 sub report_anything_unusual {
5672 my $outdented_line_count = $self->[_outdented_line_count_];
5673 if ( $outdented_line_count > 0 ) {
5674 write_logfile_entry(
5675 "$outdented_line_count long lines were outdented:\n");
5676 my $first_outdented_line_at = $self->[_first_outdented_line_at_];
5677 write_logfile_entry(
5678 " First at output line $first_outdented_line_at\n");
5680 if ( $outdented_line_count > 1 ) {
5681 my $last_outdented_line_at = $self->[_last_outdented_line_at_];
5682 write_logfile_entry(
5683 " Last at output line $last_outdented_line_at\n");
5685 write_logfile_entry(
5686 " use -noll to prevent outdenting, -l=n to increase line length\n"
5688 write_logfile_entry("\n");
5691 } ## end sub report_anything_unusual